# floats
.set sum, 0
.set sum2, 1
+.set tmpsum, 2
+.set tmpsum2, 3
.machine power9
.abiversion 2
# sv.lfs/els fv1.v, 256(p)
# sv.fmadds/mr sum, fv0.v, fv1.v, sum
+ lfiwax tmpsum, 0, 9 # zero it
addi p, buf, 192
addi win, win, 128
# SUM8(MLSS, sum, w + 32, p)
# sv.lfs/els fv0.v, 256(win)
# sv.lfs/els fv1.v, 256(p)
- # sv.fmsubs/mr sum, fv0.v, fv1.v, sum
+ # sv.fmadds/mr tmpsum, fv0.v, fv1.v, tmpsum
+ fsubs sum, sum, tmpsum
subi win, win, 128
stfs sum, 0(out)
# sv.lfs/els fv1.v, 256(win)
# sv.lfs/els fv2.v, 256(win2)
# sv.fmadds/mr sum, fv0.v, fv1.v, sum
- # sv.fmsubs/mr sum2, fv0.v, fv2.v, sum2
+ # sv.fmadds/mr sum2, fv0.v, fv2.v, sum2
+ fneg sum2, sum2
addi p, buf, 192
subf p, i, p
+ lfiwax tmpsum, 0, 9 # zero it
+ lfiwax tmpsum2, 0, 9 # zero it
addi win, win, 128
# sv.addi win2, win2, 128
# sv.lfs/els fv0.v, 256(p)
# sv.lfs/els fv1.v, 256(win)
# sv.lfs/els fv2.v, 256(win2)
- # sv.fmsubs/mr sum, fv0.v, fv1.v, sum
- # sv.fmsubs/mr sum2, fv0.v, fv2.v, sum2
+ # sv.fmadds/mr tmpsum, fv0.v, fv1.v, tmpsum
+ # sv.fmadds/mr tmpsum2, fv0.v, fv2.v, tmpsum2
+ fsubs sum, sum, tmpsum
+ fsubs sum2, sum2, tmpsum2
subi win, win, 128
# sv.addi win2, win2, -128
addi p, buf, 128
addi win, win, 128
+ lfiwax tmpsum, 0, 9 # zero it
lfiwax sum, 0, 9 # zero it
# SUM8(MLSS, sum, w + 32, p)
# sv.lfs/els fv0.v, 256(win)
# sv.lfs/els fv1.v, 256(p)
- # sv.fmsubs/mr sum, fv0.v, fv1.v, sum
- # XXX no? possible bug? fneg sum, sum
+ # sv.fmadds/mr tmpsum, fv0.v, fv1.v, tmpsum
+ fsubs sum, sum, tmpsum
stfs sum, 0(out)