-# # ffmpeg lgpl 2.1 or later
-#
-# some instructions could be saved by using fmac (sv.fmadds, sv.fnmsubs)
-# but the accuracy is so high it produces different results. this
-# demo therefore uses fmuls followed by fmsub/fmadd in map-reduce mode
-# also note, the FP registers are overwritten, not saved on stack yet.
-# at some point 128 registers will be available, meaning that an EABI
-# will be defined where there will be plenty of temporaries and no need
-# to store 24 FP regs on the stack.
+# ffmpeg lgpl 2.1 or later
-# ints
-.set out, 3
-.set buf, 4
-.set in, 5
-.set win, 6
-
-.set i, 7
-.set vin, 8
-.set vin1, 9
-.set vin2, 11
-.set pred, 30
-
-# floats
-
- .machine libresoc
- .text
+ .file "mpegaudiodsp_float.c"
+ .machine power9
.abiversion 2
- .file "imdct36_standalone.c"
- .section .rodata.cst4,"aM",@progbits,4
- .p2align 2 # -- Begin function imdct36
-.LC_zero:
- .long 0 # float 0
-.LC_2_0:
- .long 0x40000000 # float 2
-.LC_0_5:
- .long 1056964608 # float 0.5
-.LCPI0_2:
- .long 1064341426 # float 0.939692616
-.LCPI0_3:
- .long 3190935764 # float -0.173648179
-.LCPI0_4:
- .long 3208911741 # float -0.766044437
-.LCPI0_5:
- .long 3210589143 # float -0.866025388
-.LCPI0_6:
- .long 1065098332 # float 0.984807729
-.LCPI0_7:
- .long 3199147332 # float -0.342020154
-.LCPI0_8:
- .long 1063105495 # float 0.866025388
-.LCPI0_9:
- .long 3206843835 # float -0.642787635
- .text
- .globl imdct36
- .p2align 4
- .type imdct36,@function
-imdct36: # @imdct36
-.Lfunc_begin0:
-.Lfunc_gep0:
- addis 2, 12, .TOC.-.Lfunc_gep0@ha
- addi 2, 2, .TOC.-.Lfunc_gep0@l
-.Lfunc_lep0:
- .localentry imdct36, .Lfunc_lep0-.Lfunc_gep0
-# %bb.0:
- std 30, -16(1) # 8-byte Folded Spill
- std 3, -24(1)
- std 4, -32(1)
- std 5, -40(1)
- std 6, -48(1)
-
-.loop1:
- setvl 0,0,18,0,1,1 # Set VL to 18 elements
- # Load 18 floats from (in)
- sv.lfs *vin, 0(in)
- # equivalent to: for (i = 17; i >= 1; i--) in[i] += in[i-1];
- sv.fadds/mrr *vin1, *vin1, *vin
- # SETVL to 16 as the next loop is from 1-17 floats to (out)
- setvl 0,0,16,0,1,1
- li 30, 0
- ori 30, 30, 0xaaaa # Predicate mask 0b1010101010101010
- # equivalent to: for (i = 17; i >= 3; i -= 2) in[i] += in[i-2];
- sv.fadds/mrr/m=pred *vin2, *vin2, *vin1
- # Use SETVL again as we want to store 18 floats to (out)
- setvl 0,0,18,0,1,1
- sv.stfs *vin, 0(out)
-
- # Load 2.0f constant in register 29, will be needed for SHR macro
- # fmvis 29, 0x4000
-
- # Use SETVL 2 for the next loop and calculate first the temporary variables, t1,t2,t3
- # equivalent to:
- # for (j = 0; j < 2; j++) {
- # in1 = in + j;
- # t1 = in1[2*0] - in1[2*6];
- # t2 = in1[2*4] + in1[2*8] - in1[2*2];
- # t3 = in1[2*8] + SHR(in1[2*6],1);
- # t4 = t1 - SHR(t2, 1);
- # t5 = t1 + t2;
- # }
- # t1 -> r32-r34
- # t2 -> r35-r37
- # t3 -> r38-r40
- # t4 -> r41-r43
- # t5 -> r44-r46
- # Similarly, the values of 'in' array are already in registers 8-26
- setvl 0,0,2,0,1,1
- # t1
- sv.fsubs 32.v, 8.v, 20.v
- # t2
- sv.fadds 35.v, 16.v, 24.v
- sv.fsubs 35.v, 35.v, 12.v
- # t3, SHR(a,b) = a * 1.0f/(1 << (1)) = a / 2 essentially fdiv a, a, 2.0
- sv.fdivs 38.v, 20.v, 29
- sv.fadds 38.v, 38.v, 8.v
- # t4, essentially fdiv 41.v, 35.v, 29
- sv.fdivs 41.v, 35.v, 29
- sv.fsubs 41.v, 32.v, 41.v
- # t5
- sv.fadds 44.v, 32.v, 35.v
-
- # Use SETVL again as we want to store 18 floats to (out)
- setvl 0,0,18,0,1,1
- sv.stfs 32.v, 0(3)
+ .section ".text"
+ .section ".toc","aw"
+ .align 3
+.LCTOC0:
+ .tc .LCTOC1[TC],.LCTOC1
+ .section ".toc1","aw"
+ .align 3
+.LCTOC1 = .+32768
+.LC0:
+ .quad 0x3f000000
+.LC1:
+ .quad 0x3f708fb2
+.LC2:
+ .quad 0xbe31d0d4
+.LC3:
+ .quad 0xbf441b7d
+.LC4:
+ .quad 0xbf5db3d7
+.LC5:
+ .quad 0x3f7c1c5c
+.LC6:
+ .quad 0xbeaf1d44
+.LC7:
+ .quad 0x3f5db3d7
+.LC8:
+ .quad 0xbf248dbb
+.LC9:
+ .quad 0x3f007d2b
+.LC10:
+ .quad 0x40b79454
+.LC11:
+ .quad 0x3f0483ee
+.LC12:
+ .quad 0x3ff746ea
+.LC13:
+ .quad 0x3f0d3b7d
+.LC14:
+ .quad 0x3f976fd9
+.LC15:
+ .quad 0x3f1c4257
+.LC16:
+ .quad 0x3f5f2944
+.LC17:
+ .quad 0x3f3504f3
+ .section ".text"
+ .align 2
+ .p2align 4,,15
+ .globl imdct36
+ .type imdct36, @function
+imdct36:
+.LCF0:
+0: addis 2,12,.TOC.-.LCF0@ha
+ addi 2,2,.TOC.-.LCF0@l
+ .localentry imdct36,.-imdct36
+ stfd 15,-136(1)
+ stfd 16,-128(1)
+ stfd 17,-120(1)
+ stfd 18,-112(1)
+ stfd 19,-104(1)
+ stfd 20,-96(1)
+ stfd 21,-88(1)
+ stfd 22,-80(1)
+ stfd 23,-72(1)
+ stfd 24,-64(1)
+ stfd 25,-56(1)
+ stfd 26,-48(1)
+ stfd 27,-40(1)
+ stfd 28,-32(1)
+ stfd 29,-24(1)
+ stfd 30,-16(1)
+ stfd 31,-8(1)
+ std 30,-152(1)
+ lfs 11,60(5)
+ lfs 12,52(5)
+ lfs 27,48(5)
+ lfs 1,40(5)
+ lfs 13,32(5)
+ lfs 2,24(5)
+ lfs 19,64(5)
+ lfs 30,56(5)
+ lfs 29,44(5)
+ lfs 3,36(5)
+ lfs 6,28(5)
+ lfs 8,20(5)
+ lfs 31,16(5)
+ lfs 10,68(5)
+ lfs 5,12(5)
+ lfs 9,4(5)
+ lfs 4,0(5)
+ lfs 18,8(5)
+ fadds 28,12,27
+ ld 30,.LCTOC0@toc(2)
+ fadds 7,11,30
+ fadds 0,29,1
+ fadds 30,30,12
+ fadds 29,27,29
+ fadds 12,6,2
+ fadds 6,13,6
+ fadds 16,8,31
+ fadds 8,2,8
+ fadds 10,19,10
+ fadds 19,19,11
+ fadds 11,3,13
+ fadds 31,31,5
+ fadds 24,4,9
+ fadds 3,1,3
+ lfs 1,.LC0-.LCTOC1(30)
+ fadds 17,5,18
+ fadds 18,18,9
+ lfs 27,.LC5-.LCTOC1(30)
+ stfs 6,32(5)
+ lfs 5,.LC6-.LCTOC1(30)
+ stfs 29,48(5)
+ lfs 13,.LC1-.LCTOC1(30)
+ stfs 30,56(5)
+ stfs 8,24(5)
+ fadds 10,10,7
+ fadds 7,7,28
+ fadds 28,28,0
+ fadds 0,0,11
+ fadds 11,11,12
+ fadds 12,12,16
+ fadds 2,6,31
+ fsubs 23,6,19
+ fadds 16,16,17
+ fadds 17,24,17
+ fadds 6,19,6
+ fsubs 26,3,30
+ fadds 21,3,18
+ stfs 19,64(5)
+ stfs 31,16(5)
+ stfs 24,4(5)
+ stfs 3,40(5)
+ fadds 3,30,3
+ stfs 18,8(5)
+ fadds 30,30,18
+ fsubs 9,0,7
+ stfs 28,52(5)
+ stfs 10,68(5)
+ stfs 0,44(5)
+ fsubs 22,11,10
+ fmuls 2,2,13
+ stfs 11,36(5)
+ stfs 12,28(5)
+ fadds 20,17,0
+ fadds 25,11,16
+ fsubs 6,6,31
+ fadds 31,19,31
+ fmuls 19,28,1
+ fmuls 21,21,27
+ fsubs 28,24,28
+ fadds 11,10,11
+ fsubs 3,3,18
+ stfs 16,20(5)
+ fadds 10,10,16
+ fadds 0,7,0
+ fmuls 9,9,5
+ fmuls 5,26,5
+ lfs 26,.LC2-.LCTOC1(30)
+ lfs 15,.LC9-.LCTOC1(30)
+ stfs 7,60(5)
+ stfs 17,12(5)
+ fmuls 20,20,27
+ lfs 27,.LC7-.LCTOC1(30)
+ fmuls 13,25,13
+ fadds 24,19,24
+ fmuls 25,29,1
+ fsubs 29,4,29
+ fsubs 11,11,16
+ lfs 16,.LC10-.LCTOC1(30)
+ fadds 7,17,7
+ fsubs 0,0,17
+ fadds 18,21,5
+ fmuls 22,22,26
+ fmuls 23,23,26
+ fadds 19,20,9
+ fmuls 12,12,27
+ fadds 25,25,4
+ fadds 4,24,13
+ fmuls 8,8,27
+ fsubs 13,24,13
+ fadds 19,19,12
+ fadds 4,4,22
+ fadds 26,25,2
+ fadds 18,18,8
+ fsubs 2,25,2
+ fadds 27,4,19
+ fsubs 4,4,19
+ fmuls 19,11,1
+ fmuls 1,6,1
+ fadds 26,26,23
+ fadds 6,6,29
+ fadds 11,11,28
+ fsubs 1,29,1
+ fmuls 4,4,16
+ lfs 29,.LC8-.LCTOC1(30)
+ lfs 16,.LC4-.LCTOC1(30)
+ fadds 17,26,18
+ fmuls 27,27,15
+ fsubs 26,26,18
+ fsubs 28,28,19
+ lfs 18,.LC3-.LCTOC1(30)
+ fmuls 30,30,29
+ fmuls 7,7,29
+ lfs 29,36(6)
+ fmuls 0,0,16
+ fsubs 19,17,27
+ fmuls 3,3,16
+ fadds 27,27,17
+ fmuls 31,31,18
+ fmuls 10,10,18
+ fsubs 17,26,4
+ fadds 4,4,26
+ fsubs 18,5,30
+ fsubs 5,30,5
+ fadds 15,28,0
+ fsubs 28,28,0
+ lfs 0,144(4)
+ fmuls 16,19,29
+ fadds 26,1,3
+ fadds 29,21,30
+ fsubs 2,2,31
+ fsubs 3,1,3
+ fadds 31,25,31
+ fsubs 1,9,7
+ fsubs 9,7,9
+ fsubs 13,13,10
+ fadds 24,24,10
+ fadds 7,20,7
+ fsubs 21,18,8
+ fadds 16,16,0
+ fsubs 10,29,8
+ fadds 0,5,8
+ fsubs 8,8,29
+ fsubs 23,31,23
+ fadds 30,9,12
+ fsubs 1,1,12
+ fsubs 22,24,22
+ fsubs 9,7,12
+ fsubs 12,12,7
+ fadds 21,21,2
+ lfs 5,.LC12-.LCTOC1(30)
+ stfs 16,1152(3)
+ fadds 29,0,2
+ fadds 1,1,13
+ fadds 30,30,13
+ fadds 13,10,23
+ fadds 8,8,23
+ fadds 7,9,22
+ fadds 12,12,22
+ fmuls 28,28,5
+ lfs 10,32(6)
+ lfs 0,128(4)
+ fmuls 19,19,10
+ fadds 19,19,0
+ stfs 19,1024(3)
+ lfs 0,116(6)
+ lfs 10,272(4)
+ fmuls 0,0,27
+ stfs 0,144(4)
+ lfs 0,112(6)
+ fmuls 27,0,27
+ stfs 27,128(4)
+ lfs 9,68(6)
+ fmuls 9,17,9
+ fadds 9,9,10
+ stfs 9,2176(3)
+ lfs 9,.LC11-.LCTOC1(30)
+ lfs 10,0(6)
+ lfs 0,0(4)
+ fmuls 9,15,9
+ fmuls 17,17,10
+ fsubs 2,26,9
+ fadds 9,9,26
+ lfs 10,.LC13-.LCTOC1(30)
+ fadds 17,17,0
+ fmuls 10,1,10
+ stfs 17,0(3)
+ fsubs 1,21,10
+ fadds 10,10,21
+ lfs 0,148(6)
+ lfs 27,160(4)
+ fmuls 0,0,4
+ stfs 0,272(4)
+ lfs 0,80(6)
+ fmuls 4,0,4
+ lfs 0,.LC14-.LCTOC1(30)
+ stfs 4,0(4)
+ fsubs 4,3,28
+ fadds 3,28,3
+ fmuls 30,30,0
+ lfs 31,40(6)
+ fsubs 5,29,30
+ fadds 0,30,29
+ fmuls 31,2,31
+ fadds 31,31,27
+ stfs 31,1280(3)
+ lfs 30,28(6)
+ lfs 31,112(4)
+ fmuls 2,2,30
+ fadds 2,2,31
+ stfs 2,896(3)
+ lfs 31,120(6)
+ lfs 30,256(4)
+ fmuls 31,31,9
+ stfs 31,160(4)
+ lfs 31,108(6)
+ fmuls 9,31,9
+ stfs 9,112(4)
+ lfs 2,64(6)
+ fmuls 2,4,2
+ fadds 2,2,30
+ stfs 2,2048(3)
+ lfs 2,4(6)
+ lfs 9,16(4)
+ fmuls 4,4,2
+ fadds 4,4,9
+ stfs 4,128(3)
+ lfs 9,144(6)
+ lfs 2,176(4)
+ fmuls 9,9,3
+ stfs 9,256(4)
+ lfs 9,84(6)
+ fmuls 3,9,3
+ stfs 3,16(4)
+ lfs 4,44(6)
+ fmuls 4,1,4
+ fadds 4,4,2
+ lfs 2,.LC15-.LCTOC1(30)
+ stfs 4,1408(3)
+ lfs 9,24(6)
+ lfs 4,96(4)
+ fmuls 9,1,9
+ fadds 9,9,4
+ lfs 4,.LC16-.LCTOC1(30)
+ stfs 9,768(3)
+ fmuls 12,12,4
+ lfs 9,124(6)
+ lfs 3,240(4)
+ fsubs 4,8,12
+ fadds 8,8,12
+ fmuls 9,9,10
+ stfs 9,176(4)
+ lfs 9,104(6)
+ fmuls 9,9,10
+ fmuls 10,7,2
+ stfs 9,96(4)
+ fsubs 9,13,10
+ fadds 10,13,10
+ lfs 7,60(6)
+ fmuls 7,5,7
+ fadds 7,7,3
+ stfs 7,1920(3)
+ lfs 7,8(6)
+ lfs 12,32(4)
+ fmuls 5,5,7
+ fadds 5,5,12
+ stfs 5,256(3)
+ lfs 12,140(6)
+ lfs 5,192(4)
+ fmuls 12,12,0
+ stfs 12,240(4)
+ lfs 12,88(6)
+ fmuls 0,12,0
+ stfs 0,32(4)
+ lfs 7,48(6)
+ fmuls 7,9,7
+ fadds 7,7,5
+ stfs 7,1536(3)
+ lfs 12,20(6)
+ lfs 0,80(4)
+ fmuls 9,9,12
+ fadds 9,9,0
+ stfs 9,640(3)
+ lfs 0,128(6)
+ lfs 12,224(4)
+ fmuls 0,0,10
+ stfs 0,192(4)
+ lfs 0,100(6)
+ fmuls 10,0,10
+ stfs 10,80(4)
+ lfs 10,56(6)
+ fmuls 10,4,10
+ fadds 10,10,12
+ stfs 10,1792(3)
+ lfs 0,12(6)
+ lfs 12,48(4)
+ fmuls 0,4,0
+ fadds 0,0,12
+ lfs 12,.LC17-.LCTOC1(30)
+ stfs 0,384(3)
+ fmuls 11,11,12
+ lfs 0,136(6)
+ lfs 10,208(4)
+ fmuls 0,0,8
+ stfs 0,224(4)
+ lfs 0,92(6)
+ fmuls 8,0,8
+ fsubs 0,6,11
+ fadds 6,11,6
+ stfs 8,48(4)
+ lfs 12,52(6)
+ fmuls 12,0,12
+ fadds 12,12,10
+ stfs 12,1664(3)
+ lfs 11,16(6)
+ lfs 12,64(4)
+ fmuls 0,0,11
+ fadds 0,0,12
+ stfs 0,512(3)
+ lfs 0,132(6)
+ fmuls 0,0,6
+ stfs 0,208(4)
+ lfs 0,96(6)
+ fmuls 6,0,6
+ stfs 6,64(4)
+ ld 30,-152(1)
+ lfd 15,-136(1)
+ lfd 16,-128(1)
+ lfd 17,-120(1)
+ lfd 18,-112(1)
+ lfd 19,-104(1)
+ lfd 20,-96(1)
+ lfd 21,-88(1)
+ lfd 22,-80(1)
+ lfd 23,-72(1)
+ lfd 24,-64(1)
+ lfd 25,-56(1)
+ lfd 26,-48(1)
+ lfd 27,-40(1)
+ lfd 28,-32(1)
+ lfd 29,-24(1)
+ lfd 30,-16(1)
+ lfd 31,-8(1)
blr
- .long 0
- .quad 0
-.Lfunc_end0:
- .size imdct36, .Lfunc_end0-.Lfunc_begin0
- # -- End function
- .type icos36h,@object # @icos36h
- .section .rodata,"a",@progbits
- .p2align 2
-icos36h:
- .long 1048608043 # float 0.250954956
- .long 1048871918 # float 0.258819044
- .long 1049443197 # float 0.275844485
- .long 1050427991 # float 0.305193633
- .long 1052050675 # float 0.353553385
- .long 1054812484 # float 0.435861707
- .long 1050111961 # float 0.295775205
- .long 1056392938 # float 0.482962906
- .long 0 # float 0
- .size icos36h, 36
-
- .type icos36,@object # @icos36
- .p2align 2
-icos36:
- .long 1056996651 # float 0.501909912
- .long 1057260526 # float 0.517638087
- .long 1057831805 # float 0.551688969
- .long 1058816599 # float 0.610387265
- .long 1060439283 # float 0.707106769
- .long 1063201092 # float 0.871723413
- .long 1066889177 # float 1.18310082
- .long 1073170154 # float 1.93185163
- .long 1085772884 # float 5.73685646
- .size icos36, 36
-
-
- .ident "clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"
- .section ".note.GNU-stack","",@progbits
-# .addrsig
-# .addrsig_sym imdct36
-# .addrsig_sym icos36h
-# .addrsig_sym icos36
+ .long 0
+ .byte 0,0,2,0,17,2,0,0
+ .size imdct36,.-imdct36
+ .ident "GCC: (GNU) 10.3.0"
+ .section .note.GNU-stack,"",@progbits
--- /dev/null
+# # ffmpeg lgpl 2.1 or later
+#
+# some instructions could be saved by using fmac (sv.fmadds, sv.fnmsubs)
+# but the accuracy is so high it produces different results. this
+# demo therefore uses fmuls followed by fmsub/fmadd in map-reduce mode
+# also note, the FP registers are overwritten, not saved on stack yet.
+# at some point 128 registers will be available, meaning that an EABI
+# will be defined where there will be plenty of temporaries and no need
+# to store 24 FP regs on the stack.
+
+# ints
+.set out, 3
+.set buf, 4
+.set in, 5
+.set win, 6
+
+.set i, 7
+.set vin, 8
+.set vin1, 9
+.set vin2, 11
+.set pred, 30
+
+# floats
+
+ .machine libresoc
+ .text
+ .abiversion 2
+ .file "imdct36_standalone.c"
+ .section .rodata.cst4,"aM",@progbits,4
+ .p2align 2 # -- Begin function imdct36
+.LC_zero:
+ .long 0 # float 0
+.LC_2_0:
+ .long 0x40000000 # float 2
+.LC_0_5:
+ .long 1056964608 # float 0.5
+.LCPI0_2:
+ .long 1064341426 # float 0.939692616
+.LCPI0_3:
+ .long 3190935764 # float -0.173648179
+.LCPI0_4:
+ .long 3208911741 # float -0.766044437
+.LCPI0_5:
+ .long 3210589143 # float -0.866025388
+.LCPI0_6:
+ .long 1065098332 # float 0.984807729
+.LCPI0_7:
+ .long 3199147332 # float -0.342020154
+.LCPI0_8:
+ .long 1063105495 # float 0.866025388
+.LCPI0_9:
+ .long 3206843835 # float -0.642787635
+ .text
+ .globl imdct36
+ .p2align 4
+ .type imdct36,@function
+imdct36: # @imdct36
+.Lfunc_begin0:
+.Lfunc_gep0:
+ addis 2, 12, .TOC.-.Lfunc_gep0@ha
+ addi 2, 2, .TOC.-.Lfunc_gep0@l
+.Lfunc_lep0:
+ .localentry imdct36, .Lfunc_lep0-.Lfunc_gep0
+# %bb.0:
+ std 30, -16(1) # 8-byte Folded Spill
+ std 3, -24(1)
+ std 4, -32(1)
+ std 5, -40(1)
+ std 6, -48(1)
+
+.loop1:
+ setvl 0,0,18,0,1,1 # Set VL to 18 elements
+ # Load 18 floats from (in)
+ sv.lfs *vin, 0(in)
+ # equivalent to: for (i = 17; i >= 1; i--) in[i] += in[i-1];
+ sv.fadds/mrr *vin1, *vin1, *vin
+ # SETVL to 16 as the next loop is from 1-17 floats to (out)
+ setvl 0,0,16,0,1,1
+ li 30, 0
+ ori 30, 30, 0xaaaa # Predicate mask 0b1010101010101010
+ # equivalent to: for (i = 17; i >= 3; i -= 2) in[i] += in[i-2];
+ sv.fadds/mrr/m=pred *vin2, *vin2, *vin1
+ # Use SETVL again as we want to store 18 floats to (out)
+ setvl 0,0,18,0,1,1
+ sv.stfs *vin, 0(out)
+
+ # Load 2.0f constant in register 29, will be needed for SHR macro
+ # fmvis 29, 0x4000
+
+ # Use SETVL 2 for the next loop and calculate first the temporary variables, t1,t2,t3
+ # equivalent to:
+ # for (j = 0; j < 2; j++) {
+ # in1 = in + j;
+ # t1 = in1[2*0] - in1[2*6];
+ # t2 = in1[2*4] + in1[2*8] - in1[2*2];
+ # t3 = in1[2*8] + SHR(in1[2*6],1);
+ # t4 = t1 - SHR(t2, 1);
+ # t5 = t1 + t2;
+ # }
+ # t1 -> r32-r34
+ # t2 -> r35-r37
+ # t3 -> r38-r40
+ # t4 -> r41-r43
+ # t5 -> r44-r46
+ # Similarly, the values of 'in' array are already in registers 8-26
+ setvl 0,0,2,0,1,1
+ # t1
+ sv.fsubs 32.v, 8.v, 20.v
+ # t2
+ sv.fadds 35.v, 16.v, 24.v
+ sv.fsubs 35.v, 35.v, 12.v
+ # t3, SHR(a,b) = a * 1.0f/(1 << (1)) = a / 2 essentially fdiv a, a, 2.0
+ sv.fdivs 38.v, 20.v, 29
+ sv.fadds 38.v, 38.v, 8.v
+ # t4, essentially fdiv 41.v, 35.v, 29
+ sv.fdivs 41.v, 35.v, 29
+ sv.fsubs 41.v, 32.v, 41.v
+ # t5
+ sv.fadds 44.v, 32.v, 35.v
+
+ # Use SETVL again as we want to store 18 floats to (out)
+ setvl 0,0,18,0,1,1
+ sv.stfs 32.v, 0(3)
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size imdct36, .Lfunc_end0-.Lfunc_begin0
+ # -- End function
+ .type icos36h,@object # @icos36h
+ .section .rodata,"a",@progbits
+ .p2align 2
+icos36h:
+ .long 1048608043 # float 0.250954956
+ .long 1048871918 # float 0.258819044
+ .long 1049443197 # float 0.275844485
+ .long 1050427991 # float 0.305193633
+ .long 1052050675 # float 0.353553385
+ .long 1054812484 # float 0.435861707
+ .long 1050111961 # float 0.295775205
+ .long 1056392938 # float 0.482962906
+ .long 0 # float 0
+ .size icos36h, 36
+
+ .type icos36,@object # @icos36
+ .p2align 2
+icos36:
+ .long 1056996651 # float 0.501909912
+ .long 1057260526 # float 0.517638087
+ .long 1057831805 # float 0.551688969
+ .long 1058816599 # float 0.610387265
+ .long 1060439283 # float 0.707106769
+ .long 1063201092 # float 0.871723413
+ .long 1066889177 # float 1.18310082
+ .long 1073170154 # float 1.93185163
+ .long 1085772884 # float 5.73685646
+ .size icos36, 36
+
+
+ .ident "clang version 7.0.1-8+deb10u2 (tags/RELEASE_701/final)"
+ .section ".note.GNU-stack","",@progbits
+# .addrsig
+# .addrsig_sym imdct36
+# .addrsig_sym icos36h
+# .addrsig_sym icos36