From 9bf850280c6291a6d81ac1bdc113b6deb8f95449 Mon Sep 17 00:00:00 2001 From: Trevor Smigiel Date: Tue, 27 May 2008 08:19:45 +0000 Subject: [PATCH] Add TImode libgcc functions for mul and div. From-SVN: r135973 --- gcc/config/spu/divmodti4.c | 168 +++++++++++++++++++++++ gcc/config/spu/multi3.c | 99 +++++++++++++ gcc/config/spu/spu.c | 12 +- gcc/testsuite/gcc.target/spu/muldivti3.c | 46 +++++++ 4 files changed, 323 insertions(+), 2 deletions(-) create mode 100644 gcc/config/spu/divmodti4.c create mode 100644 gcc/config/spu/multi3.c create mode 100644 gcc/testsuite/gcc.target/spu/muldivti3.c diff --git a/gcc/config/spu/divmodti4.c b/gcc/config/spu/divmodti4.c new file mode 100644 index 00000000000..ca643cc33bf --- /dev/null +++ b/gcc/config/spu/divmodti4.c @@ -0,0 +1,168 @@ +/* Copyright (C) 2008 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with this file; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* As a special exception, if you link this library with files compiled with + GCC to produce an executable, this does not cause the resulting executable + to be covered by the GNU General Public License. The exception does not + however invalidate any other reasons why the executable file might be covered + by the GNU General Public License. */ + +#include + +typedef unsigned int UTItype __attribute__ ((mode (TI))); +typedef int TItype __attribute__ ((mode (TI))); +TItype __divti3 (TItype u, TItype v); +TItype __modti3 (TItype u, TItype v); +UTItype __udivti3 (UTItype u, UTItype v); +UTItype __umodti3 (UTItype u, UTItype v); +UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w); + +inline static unsigned int +count_leading_zeros (UTItype x) +{ + qword c = si_clz (*(qword *) & x); + qword cmp0 = si_cgti (c, 31); + qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4)); + qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8)); + qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4))); + s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8))); + s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12))); + return si_to_uint (s); +} + +/* Based on implementation of udivmodsi4, which is essentially + * an optimized version of gcc/config/udivmodsi4.c + clz %7,%2 + clz %4,%1 + il %5,1 + fsmbi %0,0 + sf %7,%4,%7 + ori %3,%1,0 + shl %5,%5,%7 + shl %4,%2,%7 +1: or %8,%0,%5 + rotmi %5,%5,-1 + clgt %6,%4,%3 + sf %7,%4,%3 + rotmi %4,%4,-1 + selb %0,%8,%0,%6 + selb %3,%7,%3,%6 +3: brnz %5,1b + */ + +UTItype +__udivmodti4 (UTItype num, UTItype den, UTItype * rp) +{ + qword shift = + si_from_uint (count_leading_zeros (den) - count_leading_zeros (num)); + qword n0 = *(qword *) & num; + qword d0 = *(qword *) & den; + qword bit = si_andi (si_fsmbi (1), 1); + qword r0 = si_il (0); + qword m1 = si_fsmbi (0x000f); + qword mask, r1, n1; + + d0 = si_shlqbybi (si_shlqbi (d0, shift), shift); + bit = si_shlqbybi (si_shlqbi (bit, shift), shift); + + do + { + r1 = si_or (r0, bit); + + // n1 = n0 - d0 in TImode + n1 = si_bg (d0, n0); + n1 = si_shlqbyi (n1, 4); + n1 = si_sf (m1, n1); + n1 = si_bgx (d0, n0, n1); + n1 = si_shlqbyi (n1, 4); + n1 = si_sf (m1, n1); + n1 = si_bgx (d0, n0, n1); + n1 = si_shlqbyi (n1, 4); + n1 = si_sf (m1, n1); + n1 = si_sfx (d0, n0, n1); + + mask = si_fsm (si_cgti (n1, -1)); + r0 = si_selb (r0, r1, mask); + n0 = si_selb (n0, n1, mask); + bit = si_rotqmbii (bit, -1); + d0 = si_rotqmbii (d0, -1); + } + while (si_to_uint (si_orx (bit))); + if (rp) + *rp = *(UTItype *) & n0; + return *(UTItype *) & r0; +} + +UTItype +__udivti3 (UTItype n, UTItype d) +{ + return __udivmodti4 (n, d, (UTItype *)0); +} + +UTItype +__umodti3 (UTItype n, UTItype d) +{ + UTItype w; + __udivmodti4 (n, d, &w); + return w; +} + +TItype +__divti3 (TItype n, TItype d) +{ + int c = 0; + TItype w; + + if (n < 0) + { + c = ~c; + n = -n; + } + if (d < 0) + { + c = ~c; + d = -d; + } + + w = __udivmodti4 (n, d, (UTItype *)0); + if (c) + w = -w; + return w; +} + +TItype +__modti3 (TItype n, TItype d) +{ + int c = 0; + TItype w; + + if (n < 0) + { + c = ~c; + n = -n; + } + if (d < 0) + { + c = ~c; + d = -d; + } + + __udivmodti4 (n, d, (UTItype *) &w); + if (c) + w = -w; + return w; +} diff --git a/gcc/config/spu/multi3.c b/gcc/config/spu/multi3.c new file mode 100644 index 00000000000..6998ed026f8 --- /dev/null +++ b/gcc/config/spu/multi3.c @@ -0,0 +1,99 @@ +/* Copyright (C) 2008 Free Software Foundation, Inc. + + This file is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This file is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with this file; see the file COPYING. If not, write to the Free + Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* As a special exception, if you link this library with files compiled with + GCC to produce an executable, this does not cause the resulting executable + to be covered by the GNU General Public License. The exception does not + however invalidate any other reasons why the executable file might be covered + by the GNU General Public License. */ + +#include + +typedef int TItype __attribute__ ((mode (TI))); + +/* A straight forward vectorization and unrolling of + * short l[8], r[8]; + * TItype total = 0; + * for (i = 0; i < 8; i++) + * for (j = 0; j < 8; j++) + * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j))); + */ +TItype +__multi3 (TItype l, TItype r) +{ + qword u = *(qword *) & l; + qword v = *(qword *) & r; + qword splat0 = si_shufb (v, v, si_ilh (0x0001)); + qword splat1 = si_shufb (v, v, si_ilh (0x0203)); + qword splat2 = si_shufb (v, v, si_ilh (0x0405)); + qword splat3 = si_shufb (v, v, si_ilh (0x0607)); + qword splat4 = si_shufb (v, v, si_ilh (0x0809)); + qword splat5 = si_shufb (v, v, si_ilh (0x0a0b)); + qword splat6 = si_shufb (v, v, si_ilh (0x0c0d)); + qword splat7 = si_shufb (v, v, si_ilh (0x0e0f)); + + qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14); + qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14); + qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12); + qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12); + qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10); + qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10); + qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8); + qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8); + qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6); + qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6); + qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4); + qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4); + qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2); + qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2); + qword part7l = si_mpyu (u, splat7); + + qword carry, total0, total1, total2, total3, total4; + qword total5, total6, total7, total8, total9, total10; + qword total; + + total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l); + total1 = si_a (part2l, part3h); + total2 = si_a (part3l, part4h); + total3 = si_a (part4l, part5h); + total4 = si_a (part5l, part6h); + total5 = si_a (part6l, part7h); + total6 = si_a (total0, total1); + total7 = si_a (total2, total3); + total8 = si_a (total4, total5); + total9 = si_a (total6, total7); + total10 = si_a (total8, total9); + + carry = si_cg (part2l, part3h); + carry = si_a (carry, si_cg (part3l, part4h)); + carry = si_a (carry, si_cg (part4l, part5h)); + carry = si_a (carry, si_cg (part5l, part6h)); + carry = si_a (carry, si_cg (part6l, part7h)); + carry = si_a (carry, si_cg (total0, total1)); + carry = si_a (carry, si_cg (total2, total3)); + carry = si_a (carry, si_cg (total4, total5)); + carry = si_a (carry, si_cg (total6, total7)); + carry = si_a (carry, si_cg (total8, total9)); + carry = si_shlqbyi (carry, 4); + + total = si_cg (total10, carry); + total = si_shlqbyi (total, 4); + total = si_cgx (total10, carry, total); + total = si_shlqbyi (total, 4); + total = si_addx (total10, carry, total); + return *(TItype *) & total; +} diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c index 692a8dae34f..de307ab32ed 100644 --- a/gcc/config/spu/spu.c +++ b/gcc/config/spu/spu.c @@ -4422,6 +4422,13 @@ spu_init_libfuncs (void) set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf"); set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf"); + + set_optab_libfunc (smul_optab, TImode, "__multi3"); + set_optab_libfunc (sdiv_optab, TImode, "__divti3"); + set_optab_libfunc (smod_optab, TImode, "__modti3"); + set_optab_libfunc (udiv_optab, TImode, "__udivti3"); + set_optab_libfunc (umod_optab, TImode, "__umodti3"); + set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4"); } /* Make a subreg, stripping any existing subreg. We could possibly just @@ -4473,7 +4480,7 @@ spu_init_builtins (void) unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4); unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2); - spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node; + spu_builtin_types[SPU_BTI_QUADWORD] = intTI_type_node; spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE]; spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE]; @@ -5368,7 +5375,8 @@ spu_expand_builtin_1 (struct spu_builtin_description *d, if (VECTOR_MODE_P (mode) && (GET_CODE (ops[i]) == CONST_INT || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT - || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT)) + || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT) + && d->parm[i] != SPU_BTI_QUADWORD) { if (GET_CODE (ops[i]) == CONST_INT) ops[i] = spu_const (mode, INTVAL (ops[i])); diff --git a/gcc/testsuite/gcc.target/spu/muldivti3.c b/gcc/testsuite/gcc.target/spu/muldivti3.c new file mode 100644 index 00000000000..0363e342075 --- /dev/null +++ b/gcc/testsuite/gcc.target/spu/muldivti3.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-options "-std=c99" } */ +#include +typedef unsigned int uqword __attribute__((mode(TI))); +typedef int qword __attribute__((mode(TI))); + +typedef union +{ + uqword uq; + qword q; + unsigned long long ull[2]; +} u; + +int main(void) +{ + uqword e, f; + qword g, h; + + e = 0x1111111111111111ULL; + f = 0xFULL; + g = 0x0000000000111100ULL; + h = 0x0000000000000000ULL; + + u m, n, o, p, q; + + m.ull[0] = f; + m.ull[1] = e; + n.ull[0] = h; + n.ull[1] = g; + + /* __multi3 */ + o.q = m.q * n.q; + + o.q = o.q + n.q + 0x1110FF; + /* __udivti3, __umodti3 */ + p.uq = o.uq / n.uq; + q.uq = o.uq % n.uq; + if (p.uq != (m.uq+1)) abort(); + if (q.uq != 0x1110FF) abort(); + /* __divti3, __modti3 */ + p.q = -o.q / n.q; + q.q = -o.q % n.q; + if ((-p.q * n.q - q.q) != o.q) abort(); + + return 0; +} -- 2.30.2