benchmarks/vec-vvadd/vec_vvadd_asm.S

   1 #*****************************************************************************
   2 # vvadd function (assembly version)
   3 #-----------------------------------------------------------------------------
   4
   5
   6 #--------------------------------------------------------------------------
   7 # Headers and Defines
   8 #--------------------------------------------------------------------------
   9
  10 # Here are some defines that make writing assembly code easier.
  11
  12 # I'm using the knowledge that rN will be placed in register a0, rA will be
  13 # placed into register a1, etc., based on the calling convention for functions.
  14
  15 #define rN      a0
  16 #define rA      a1
  17 #define rB      a2
  18 #define rC      a3
  19
  20 #define rVLen   a4
  21
  22 # WARNING: do not write to the s0,...,s9 registers without first saving them to
  23 # the stack.
  24
  25 #--------------------------------------------------------------------------
  26 # void scalar_vvadd_asm( int n, float a[], float b[], float c[] )
  27 #--------------------------------------------------------------------------
  28
  29         .text
  30         .align 2
  31         .globl scalar_vvadd_asm
  32         .type  scalar_vvadd_asm,@function
  33
  34 scalar_vvadd_asm:
  35
  36         # *****   Scalar Example   *****
  37
  38         beq rN, zero, done    # exit early if n == 0
  39
  40 loop:
  41         flw  f2, 0(rA)
  42         flw  f3, 0(rB)
  43         fadd.s f2, f2, f3
  44         fsw  f2, 0(rC)
  45         addi rN, rN, -1
  46         addi rA, rA, 4
  47         addi rB, rB, 4
  48         addi rC, rC, 4
  49         bne  rN, zero, loop
  50 done:
  51         ret
  52
  53
  54 #--------------------------------------------------------------------------
  55 # void vt_vvadd_asm( int n, float a[], float b[], float c[] )
  56 #--------------------------------------------------------------------------
  57
  58
  59         # ***** Vector-Thread Example *****
  60
  61         .globl vt_vvadd_asm
  62         .type  vt_vvadd_asm,@function
  63
  64 vt_vvadd_asm:
  65
  66         beq rN, zero, cpdone
  67         la a5, vtcode
  68
  69         # First, configure the vector unit.
  70         # rd (given vlen), desired vlen, num of x-regs, num of f-regs
  71         # For vvadd, we do not need to use any x-registers, and only two
  72         # floating point registers. By using fewer registers, hwacha can give us a longer vector length!
  73         # But make sure to use registers starting from x0, f0!
  74         # WARNING: there is a BUG if you tell it you want 0 registers of any type!
  75         # So here I'm asking for 1 x-register, even though I don't use any of them.
  76         vvcfgivl rVLen, rN, 1, 2
  77
  78
  79 stripmineloop:
  80         vsetvl rVLen, rN   # set the vector length
  81                            # rN is the desired (application) vector length
  82                            # rVLen is what vector length we were given
  83
  84         vflw vf0, rA       # vector loads
  85         vflw vf1, rB
  86         vf 0(a5)           # jump to vector-fetch code
  87         vfsw vf0, rC       # vector store
  88
  89         sub rN, rN, rVLen  # book keeping
  90         slli a6, rVLen, 2  # turn num_elements into num_bytes
  91         add rA, rA, a6
  92         add rB, rB, a6
  93         add rC, rC, a6
  94         bne rN, zero, stripmineloop
  95
  96 cpdone:
  97         fence.v.l          # make stores visible to the control processor
  98         ret
  99
 100 vtcode:
 101         fadd.s f0, f0, f1
 102         stop
 103
 104         # The C code uses a jalr instruction to call this function
 105         # so we can use a jr to return back to where the function
 106         # was called.  Also known as "ret", for "return".
 107
 108         ret