openpower/sv/int_fp_mv/appendix.mdwn

   1 # SimpleV SVP64 polymorphic element width overrides
   2
   3 SimpleV, the Draft Cray-style Vectorisation for OpenPOWER, may
   4 independently override both or either of the source or destination
   5 register bitwidth in the base operation used to create the Vector
   6 operation.  In the case of IEEE754 FP operands this gives an
   7 opportunity to add `FP16` as well.as `BF16` to the Power ISA
   8 with no actual new Scalar opcodes.
   9
  10 However there is the potential for confusion as to the definition
  11 of what Single and Double mean when the operand width has been
  12 over-ridden.  Simple-V therefore sets the following
  13  "reinterpretation" rules:
  14
  15 * any operation whose assembler mnemonic does not end in "s"
  16   (being defined in v3.0B as a "double" operation) is
  17   instead an operation at the overridden elwidth for the
  18   relevant operand, instead of a 64 bit "Double"
  19 * any operation nominally defined as a "single" FP operation
  20   is redefined to be **half the elwidth** rather than
  21   "half of 64 bit" (32 bit, aka "Single")
  22
  23 Examples:
  24
  25 * `sv.fmvtg/sw=32 RT.v, FRA.v` is defined as treating FRA
  26    as a vector of *FP32* source operands each *32* bits wide
  27    which are to be placed into *64* bit integer destination elements.
  28 * `sv.fmvfgs/dw=32 FRT.v, RA.v` is defined as taking the bottom
  29    32 bits of each RA integer source, then performing a **32 bit**
  30    FP32 to **FP16** conversion and storing the result in the
  31    **32 bits** of an FRT destination element.
  32
  33 "Single" is therefore redefined in SVP64 to be "half elwidth"
  34 rather than Double width hardcoded to 64 and Single width
  35 hardcoded to 32.  This allows a full range of conversions
  36 between FP64, FP32, FP16 and BF16.
  37
  38 Note however that attempts to perform "Single" operations on
  39 FP16 elwidths will raise an illegal instruction trap: Half
  40 of FP16 is FP8, which is not defined as a legal IEEE754 format.
  41
  42 # Simple-V SVP64 Saturation
  43
  44 SVP64 also allows for Saturation, such that the result is truncated
  45 to the maximum or minimum range of the result operand rather than
  46 overflowing.
  47
  48 There will be some interaction here with Conversion routines which
  49 will need careful application of the SVP64 Saturation rules: some
  50 work will be duplicated by the operation itself, but in some cases
  51 it will change the result.
  52
  53 The critical thing to note is that SVP64 Saturation is to be considered
  54 as the "priority override" where the operation should take place at
  55 "Infinite bitwidth followed by a result post-analysis phase".
  56
  57 Thus if by chance an unsigned conversion to INT was carried out,
  58 with a destination override to 16 bit results, in combination
  59 with a **signed** SVP64 Saturation override, the result would
  60 be truncated to within the range 0 to 0x7FFF.  The actual
  61 operation itself, being an *Unsigned* conversion, would set the
  62 minimum value to zero, whilst the SVP64 *Signed* Saturation
  63 would set the maximum to a Signed 16 bit integer.
  64
  65 As always with SVP64, some thought and care has to be put into
  66 how the override behaviour will interact with the base scalar
  67 operation.
  68
  69 # Equivalent OpenPower ISA v3.0 Assembly Language for FP -> Integer Conversion Modes
  70
  71 ## c (IEEE754 standard compliant)
  72
  73 ```
  74 int32_t toInt32(double number)
  75 {
  76     uint32_t result = (int32_t)number;
  77     return result;
  78 }
  79 ```
  80
  81 ### 64-bit float -> 32-bit signed integer
  82
  83 ```
  84 toInt32(double):
  85         fctiwz 1,1
  86         addi 9,1,-16
  87         stfiwx 1,0,9
  88         lwz 3,-16(1)
  89         extsw 3,3
  90         blr
  91         .long 0
  92         .byte 0,9,0,0,0,0,0,0
  93 ```
  94
  95 ## Rust
  96
  97 ```pub fn fcvttgd_rust(v: f64) -> i64 {
  98     v as i64
  99 }
 100
 101 pub fn fcvttgud_rust(v: f64) -> u64 {
 102     v as u64
 103 }
 104
 105 pub fn fcvttgw_rust(v: f64) -> i32 {
 106     v as i32
 107 }
 108
 109 pub fn fcvttguw_rust(v: f64) -> u32 {
 110     v as u32
 111 }
 112 ```
 113
 114 ### 64-bit float -> 64-bit signed integer
 115
 116 ```
 117 .LCPI0_0:
 118         .long   0xdf000000
 119 .LCPI0_1:
 120         .quad   0x43dfffffffffffff
 121 example::fcvttgd_rust:
 122 .Lfunc_gep0:
 123         addis 2, 12, .TOC.-.Lfunc_gep0@ha
 124         addi 2, 2, .TOC.-.Lfunc_gep0@l
 125         addis 3, 2, .LCPI0_0@toc@ha
 126         fctidz 2, 1
 127         fcmpu 5, 1, 1
 128         li 4, 1
 129         li 5, -1
 130         lfs 0, .LCPI0_0@toc@l(3)
 131         addis 3, 2, .LCPI0_1@toc@ha
 132         rldic 4, 4, 63, 0
 133         fcmpu 0, 1, 0
 134         lfd 0, .LCPI0_1@toc@l(3)
 135         stfd 2, -8(1)
 136         ld 3, -8(1)
 137         fcmpu 1, 1, 0
 138         cror 24, 0, 3
 139         isel 3, 4, 3, 24
 140         rldic 4, 5, 0, 1
 141         isel 3, 4, 3, 5
 142         isel 3, 0, 3, 23
 143         blr
 144         .long   0
 145         .quad   0
 146 ```
 147
 148 ### 64-bit float -> 64-bit unsigned integer
 149
 150 ```
 151 .LCPI1_0:
 152         .long   0x00000000
 153 .LCPI1_1:
 154         .quad   0x43efffffffffffff
 155 example::fcvttgud_rust:
 156 .Lfunc_gep1:
 157         addis 2, 12, .TOC.-.Lfunc_gep1@ha
 158         addi 2, 2, .TOC.-.Lfunc_gep1@l
 159         addis 3, 2, .LCPI1_0@toc@ha
 160         fctiduz 2, 1
 161         li 4, -1
 162         lfs 0, .LCPI1_0@toc@l(3)
 163         addis 3, 2, .LCPI1_1@toc@ha
 164         fcmpu 0, 1, 0
 165         lfd 0, .LCPI1_1@toc@l(3)
 166         stfd 2, -8(1)
 167         ld 3, -8(1)
 168         fcmpu 1, 1, 0
 169         cror 20, 0, 3
 170         isel 3, 0, 3, 20
 171         isel 3, 4, 3, 5
 172         blr
 173         .long   0
 174         .quad   0
 175 ```
 176
 177 ### 64-bit float -> 32-bit signed integer
 178
 179 ```
 180 .LCPI2_0:
 181         .long   0xcf000000
 182 .LCPI2_1:
 183         .quad   0x41dfffffffc00000
 184 example::fcvttgw_rust:
 185 .Lfunc_gep2:
 186         addis 2, 12, .TOC.-.Lfunc_gep2@ha
 187         addi 2, 2, .TOC.-.Lfunc_gep2@l
 188         addis 3, 2, .LCPI2_0@toc@ha
 189         fctiwz 2, 1
 190         lis 4, -32768
 191         lis 5, 32767
 192         lfs 0, .LCPI2_0@toc@l(3)
 193         addis 3, 2, .LCPI2_1@toc@ha
 194         fcmpu 0, 1, 0
 195         lfd 0, .LCPI2_1@toc@l(3)
 196         addi 3, 1, -4
 197         stfiwx 2, 0, 3
 198         fcmpu 5, 1, 1
 199         lwz 3, -4(1)
 200         fcmpu 1, 1, 0
 201         cror 24, 0, 3
 202         isel 3, 4, 3, 24
 203         ori 4, 5, 65535
 204         isel 3, 4, 3, 5
 205         isel 3, 0, 3, 23
 206         blr
 207         .long   0
 208         .quad   0
 209 ```
 210
 211 ### 64-bit float -> 32-bit unsigned integer
 212
 213 ```
 214 .LCPI3_0:
 215         .long   0x00000000
 216 .LCPI3_1:
 217         .quad   0x41efffffffe00000
 218 example::fcvttguw_rust:
 219 .Lfunc_gep3:
 220         addis 2, 12, .TOC.-.Lfunc_gep3@ha
 221         addi 2, 2, .TOC.-.Lfunc_gep3@l
 222         addis 3, 2, .LCPI3_0@toc@ha
 223         fctiwuz 2, 1
 224         li 4, -1
 225         lfs 0, .LCPI3_0@toc@l(3)
 226         addis 3, 2, .LCPI3_1@toc@ha
 227         fcmpu 0, 1, 0
 228         lfd 0, .LCPI3_1@toc@l(3)
 229         addi 3, 1, -4
 230         stfiwx 2, 0, 3
 231         lwz 3, -4(1)
 232         fcmpu 1, 1, 0
 233         cror 20, 0, 3
 234         isel 3, 0, 3, 20
 235         isel 3, 4, 3, 5
 236         blr
 237         .long   0
 238         .quad   0
 239 ```
 240
 241 ## JavaScript
 242
 243 ```
 244 #include <stdint.h>
 245
 246 namespace WTF {
 247 template<typename Target, typename Src>
 248 inline Target bitwise_cast(Src v) {
 249     union {
 250         Src s;
 251         Target t;
 252     } u;
 253     u.s = v;
 254 …    if (exp < 32) {
 255         int32_t missingOne = 1 << exp;
 256         result &= missingOne - 1;
 257         result += missingOne;
 258     }
 259
 260     // If the input value was negative (we could test either 'number' or 'bits',
 261     // but testing 'bits' is likely faster) invert the result appropriately.
 262     return bits < 0 ? -result : result;
 263 }
 264 ```
 265
 266 ### 64-bit float -> 32-bit signed integer
 267
 268 ```
 269 toInt32(double):
 270         stfd 1,-16(1)
 271         li 3,0
 272         ori 2,2,0
 273         ld 9,-16(1)
 274         rldicl 8,9,12,53
 275         addi 10,8,-1023
 276         cmplwi 7,10,83
 277         bgtlr 7
 278         cmpwi 7,10,52
 279         bgt 7,.L7
 280         cmpwi 7,10,31
 281         subfic 3,10,52
 282         srad 3,9,3
 283         extsw 3,3
 284         bgt 7,.L4
 285         li 8,1
 286         slw 10,8,10
 287         addi 8,10,-1
 288         and 3,8,3
 289         add 10,10,3
 290         extsw 3,10
 291 .L4:
 292         cmpdi 7,9,0
 293         bgelr 7
 294 .L8:
 295         neg 3,3
 296         extsw 3,3
 297         blr
 298 .L7:
 299         cmpdi 7,9,0
 300         addi 3,8,-1075
 301         sld 3,9,3
 302         extsw 3,3
 303         bgelr 7
 304         b .L8
 305         .long 0
 306         .byte 0,9,0,0,0,0,0,0
 307 ```