benchmarks/vec-vvadd/vec_vvadd_asm.S

   1 # See LICENSE for license details.
   2
   3 #*****************************************************************************
   4 # vvadd function (assembly version)
   5 #-----------------------------------------------------------------------------
   6
   7
   8 #--------------------------------------------------------------------------
   9 # Headers and Defines
  10 #--------------------------------------------------------------------------
  11
  12 # Here are some defines that make writing assembly code easier.
  13
  14 # I'm using the knowledge that rN will be placed in register a0, rA will be
  15 # placed into register a1, etc., based on the calling convention for functions.
  16
  17 #define rN      a0
  18 #define rA      a1
  19 #define rB      a2
  20 #define rC      a3
  21
  22 #define rVLen   a4
  23
  24 # WARNING: do not write to the s0,...,s9 registers without first saving them to
  25 # the stack.
  26
  27 #--------------------------------------------------------------------------
  28 # void scalar_vvadd_asm( int n, float a[], float b[], float c[] )
  29 #--------------------------------------------------------------------------
  30
  31         .text
  32         .align 2
  33         .globl scalar_vvadd_asm
  34         .type  scalar_vvadd_asm,@function
  35
  36 scalar_vvadd_asm:
  37
  38         # *****   Scalar Example   *****
  39
  40         beq rN, zero, done    # exit early if n == 0
  41
  42 loop:
  43         flw  f2, 0(rA)
  44         flw  f3, 0(rB)
  45         fadd.s f2, f2, f3
  46         fsw  f2, 0(rC)
  47         addi rN, rN, -1
  48         addi rA, rA, 4
  49         addi rB, rB, 4
  50         addi rC, rC, 4
  51         bne  rN, zero, loop
  52 done:
  53         ret
  54
  55
  56 #--------------------------------------------------------------------------
  57 # void vt_vvadd_asm( int n, float a[], float b[], float c[] )
  58 #--------------------------------------------------------------------------
  59
  60
  61         # ***** Vector-Thread Example *****
  62
  63         .globl vt_vvadd_asm
  64         .type  vt_vvadd_asm,@function
  65
  66 vt_vvadd_asm:
  67
  68         beq rN, zero, cpdone
  69         la a5, vtcode
  70
  71         # First, configure the vector unit.
  72         # rd (given vlen), desired vlen, num of x-regs, num of f-regs
  73         # For vvadd, we do not need to use any x-registers, and only two
  74         # floating point registers. By using fewer registers, hwacha can give us a longer vector length!
  75         # But make sure to use registers starting from x0, f0!
  76         # WARNING: there is a BUG if you tell it you want 0 registers of any type!
  77         # So here I'm asking for 1 x-register, even though I don't use any of them.
  78         vvcfgivl rVLen, rN, 1, 2
  79
  80
  81 stripmineloop:
  82         vsetvl rVLen, rN   # set the vector length
  83                            # rN is the desired (application) vector length
  84                            # rVLen is what vector length we were given
  85
  86         vflw vf0, rA       # vector loads
  87         vflw vf1, rB
  88         vf 0(a5)           # jump to vector-fetch code
  89         vfsw vf0, rC       # vector store
  90
  91         sub rN, rN, rVLen  # book keeping
  92         slli a6, rVLen, 2  # turn num_elements into num_bytes
  93         add rA, rA, a6
  94         add rB, rB, a6
  95         add rC, rC, a6
  96         bne rN, zero, stripmineloop
  97
  98 cpdone:
  99         fence.v.l          # make stores visible to the control processor
 100         ret
 101
 102 vtcode:
 103         fadd.s f0, f0, f1
 104         stop
 105
 106         # The C code uses a jalr instruction to call this function
 107         # so we can use a jr to return back to where the function
 108         # was called.  Also known as "ret", for "return".
 109
 110         ret