media/audio/mp3/imdct36_standalone.c

   1 /*
   2  * Copyright (c) 2001, 2002 Fabrice Bellard
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <stdint.h>
  22 #include <stddef.h>
  23
  24 #define USE_FLOATS 1
  25 #define SBLIMIT 32
  26 #define MDCT_BUF_SIZE 40
  27
  28 #define RENAME(n) n##_float
  29
  30 static inline float round_sample(float *sum)
  31 {
  32     float sum1=*sum;
  33     *sum = 0;
  34     return sum1;
  35 }
  36
  37 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
  38 #define MULS(ra, rb) ((ra)*(rb))
  39 #define MULH3(x, y, s) ((s)*(y)*(x))
  40 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
  41 #define MULLx(x, y, s) ((y)*(x))
  42 #define FIXHR(x)        ((float)(x))
  43 #define FIXR(x)        ((float)(x))
  44 #define SHR(a,b)       ((a)*(1.0f/(1<<(b))))
  45
  46 /* cos(pi*i/18) */
  47 #define C1 FIXHR(0.98480775301220805936/2)
  48 #define C2 FIXHR(0.93969262078590838405/2)
  49 #define C3 FIXHR(0.86602540378443864676/2)
  50 #define C4 FIXHR(0.76604444311897803520/2)
  51 #define C5 FIXHR(0.64278760968653932632/2)
  52 #define C6 FIXHR(0.5/2)
  53 #define C7 FIXHR(0.34202014332566873304/2)
  54 #define C8 FIXHR(0.17364817766693034885/2)
  55
  56 /* 0.5 / cos(pi*(2*i+1)/36) */
  57 static const float icos36[9] = {
  58     FIXR(0.50190991877167369479),
  59     FIXR(0.51763809020504152469), //0
  60     FIXR(0.55168895948124587824),
  61     FIXR(0.61038729438072803416),
  62     FIXR(0.70710678118654752439), //1
  63     FIXR(0.87172339781054900991),
  64     FIXR(1.18310079157624925896),
  65     FIXR(1.93185165257813657349), //2
  66     FIXR(5.73685662283492756461),
  67 };
  68
  69 /* 0.5 / cos(pi*(2*i+1)/36) */
  70 static const float icos36h[9] = {
  71     FIXHR(0.50190991877167369479/2),
  72     FIXHR(0.51763809020504152469/2), //0
  73     FIXHR(0.55168895948124587824/2),
  74     FIXHR(0.61038729438072803416/2),
  75     FIXHR(0.70710678118654752439/2), //1
  76     FIXHR(0.87172339781054900991/2),
  77     FIXHR(1.18310079157624925896/4),
  78     FIXHR(1.93185165257813657349/4), //2
  79 //    FIXHR(5.73685662283492756461),
  80 };
  81
  82 /* using Lee like decomposition followed by hand coded 9 points DCT */
  83 void imdct36(float *out, float *buf, float *in, const float *win)
  84 {
  85     int i, j, predicate;
  86     float t0, t1, t2, t3, s0, s1, s2, s3;
  87     float tmp[18], *tmp1, *in1;
  88
  89     /* straight sv.fadds/mr for this one, should do write-hazard-free
  90        overlapping adds:
  91         sv.fadds/mrr 11.v, 10.v, 10.v
  92      */
  93     for (i = 17; i >= 1; i--)
  94         in[i] += in[i-1];
  95
  96     /* this can use mrr mode with predicate 0b010101010101,
  97        it will do the job, something like:
  98         li r30, 0b010101010101
  99         setvl 16
 100         sv.fadds/mrr/m=r30 12.v, 10.v, 10.v
 101         which will issue adds *in reverse* order
 102               fadds 24, 26, 24  - not predicated (active)
 103               fadds 23, 25, 23  - predicated (masked out)
 104               fadds 22, 24, 22  - not predicated (active)
 105               fadds 21, 23, 21  - predicated (masked out)
 106               ...
 107         should result in write-hazard-free overlapping adds,
 108         skipping every other element because of the predication
 109     */
 110     for (i = 17; i >= 3; i -= 2)
 111         in[i] += in[i-2];
 112
 113     /* all of these are independent, should be possible to do with
 114         setvl 2.  probably possible to break these down into some
 115         sort of much smaller loop but the hand-coded nature of the
 116         algorithm makes it difficult to spot the pattern.
 117     */
 118     for (j = 0; j < 2; j++) {
 119         tmp1 = tmp + j;
 120         in1 = in + j;
 121
 122         t2 = in1[2*4] + in1[2*8] - in1[2*2];
 123
 124         t3 = in1[2*0] + SHR(in1[2*6],1);
 125         t1 = in1[2*0] - in1[2*6];
 126         tmp1[ 6] = t1 - SHR(t2,1);
 127         tmp1[16] = t1 + t2;
 128
 129         t0 = MULH3(in1[2*2] + in1[2*4] ,    C2, 2);
 130         t1 = MULH3(in1[2*4] - in1[2*8] , -2*C8, 1);
 131         t2 = MULH3(in1[2*2] + in1[2*8] ,   -C4, 2);
 132
 133         tmp1[10] = t3 - t0 - t2;
 134         tmp1[ 2] = t3 + t0 + t1;
 135         tmp1[14] = t3 + t2 - t1;
 136
 137         tmp1[ 4] = MULH3(in1[2*5] + in1[2*7] - in1[2*1], -C3, 2);
 138         t2 = MULH3(in1[2*1] + in1[2*5],    C1, 2);
 139         t3 = MULH3(in1[2*5] - in1[2*7], -2*C7, 1);
 140         t0 = MULH3(in1[2*3], C3, 2);
 141
 142         t1 = MULH3(in1[2*1] + in1[2*7],   -C5, 2);
 143
 144         tmp1[ 0] = t2 + t3 + t0;
 145         tmp1[12] = t2 + t1 - t0;
 146         tmp1[ 8] = t3 - t1 - t0;
 147     }
 148
 149     /* this can be done setvl 5 and with a predicate 0b01111 for the
 150        items marked "if predicate"
 151      */
 152     i = 0;
 153     for (j = 0; j < 5; j++) {
 154         predicate = (j != 4);
 155         t0 = tmp[i];
 156         if (predicate) t1 = tmp[i + 2]; else t1 = 0.0;
 157         s0 = t1 + t0;
 158         if (predicate) s2 = t1 - t0; else s2 = 0.0;
 159
 160         t2 = tmp[i + 1];
 161         if (predicate) t3 = tmp[i + 3]; else t3 = 0.0;
 162         s1 = MULH3(t3 + t2, icos36h[    j], 2);
 163         if (predicate) s3 = MULLx(t3 - t2, icos36 [8 - j], FRAC_BITS);
 164
 165         t0 = s0 + s1;
 166         t1 = s0 - s1;
 167         out[(9 + j) * SBLIMIT] = MULH3(t1, win[     9 + j], 1) + buf[4*(9 + j)];
 168         out[(8 - j) * SBLIMIT] = MULH3(t1, win[     8 - j], 1) + buf[4*(8 - j)];
 169         buf[4 * ( 9 + j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + j], 1);
 170         buf[4 * ( 8 - j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 8 - j], 1);
 171
 172         if (predicate) t0 = s2 + s3;
 173         if (predicate) t1 = s2 - s3;
 174         if (predicate) out[(9 + 8 - j) * SBLIMIT] = MULH3(t1, win[     9 + 8 - j], 1) + buf[4*(9 + 8 - j)];
 175         if (predicate) out[         j  * SBLIMIT] = MULH3(t1, win[             j], 1) + buf[4*(        j)];
 176         if (predicate) buf[4 * ( 9 + 8 - j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2 + 9 + 8 - j], 1);
 177         if (predicate) buf[4 * (         j     )] = MULH3(t0, win[MDCT_BUF_SIZE/2         + j], 1);
 178         i += 4;
 179     }
 180 }