From b1a838ceee6c2873b14af2c1f0d476a2e4bf2979 Mon Sep 17 00:00:00 2001 From: Xan Date: Thu, 26 Apr 2018 14:19:01 +0100 Subject: [PATCH] --- ...RVP_vs_Andes_Packed_SIMD_ISA_proposal.mdwn | 187 ------------------ 1 file changed, 187 deletions(-) diff --git a/Comparative_analysis_Harmonised_RVP_vs_Andes_Packed_SIMD_ISA_proposal.mdwn b/Comparative_analysis_Harmonised_RVP_vs_Andes_Packed_SIMD_ISA_proposal.mdwn index 5ddb03066..8b1378917 100644 --- a/Comparative_analysis_Harmonised_RVP_vs_Andes_Packed_SIMD_ISA_proposal.mdwn +++ b/Comparative_analysis_Harmonised_RVP_vs_Andes_Packed_SIMD_ISA_proposal.mdwn @@ -1,188 +1 @@ -# Comparative analysis with Andes Packed ISA proposal -## Register file - -The harmonised RVP register file is divided into a lower bank of Vector[INT8] and an upper bank of Vector[INT16] - -| Register | Andes ISA | Harmonised RVP ISA | -| ------------------ | ------------------------- | ------------------- | -| v0 | Hardwired zero | Hardwired zero | -| v1 | 32bit GPR or Vector[4xINT8 or 2xINT16] | Predicate mask | -| | | | -| v2 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xSINT8] | -| v3 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xSINT8] | -| v4 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xSINT8] | -| v5 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xSINT8] | -| v6 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xSINT8] | -| v7 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xSINT8] | -| v8 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v9 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v10 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v11 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v12 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v13 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v14 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| v15 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[4xUINT8] | -| | | | -| v16 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v17 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v18 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v19 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v20 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v21 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v22 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v23 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xSINT16] | -| v24 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xUINT16] | -| v25 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xUINT16] | -| v26 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xUINT16] | -| v27 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xUINT16] | -| v28 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xUINT16] | -| v29 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[2xUINT16] | -| | | | -| v30 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[1xSINT32] | -| v31 | 32bit GPR or Vector[4xINT8 or 2xINT16] | 32bit GPR or Vector[1xSINT32] | - - -## 16-bit Arithmetic - -| Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| ADD16 rt, ra, rb | Add | VADD (v16 <= rt,ra,rb <= v29), mm=00| -| RADD16 rt, ra, rb | Signed Halving add | RADD (v16 <= rt,ra,rb <= v23), mm=00| -| URADD16 rt, ra, rb | Unsigned Halving add | RADD (v24 <= rt,ra,rb <= v29), mm=00| -| KADD16 rt, ra, rb | Signed Saturating add | VADD (v16 <= rt,ra,rb <= v23), mm=01| -| UKADD16 rt, ra, rb | Unsigned Saturating add | VADD (v24 <= rt,ra,rb <= v29), mm=01| -| SUB16 rt, ra, rb | Subtract | VSUB (v16 <= rt,ra,rb <= v29), mm=00| -| RSUB16 rt, ra, rb | Signed Halving sub | RSUB (v16 <= rt,ra,rb <= v23), mm=00| -| URSUB16 rt, ra, rb | Unsigned Halving sub | RSUB (v24 <= rt,ra,rb <= v29), mm=00| -| KSUB16 rt, ra, rb | Signed Saturating sub | VSUB (v16 <= rt,ra,rb <= v23), mm=01| -| UKSUB16 rt, ra, rb | Unsigned Saturating sub | VSUB (v24 <= rt,ra,rb <= v29), mm=01| -| CRAS16 rt, ra, rb | Cross Add & Sub | | -| RCRAS16 rt, ra, rb | Signed Halving Cross Add & Sub | | -| URCRAS16 rt, ra, rb| Unsigned Halving Cross Add & Sub | | -| KCRAS16 rt, ra, rb | Signed Saturating Cross Add & Sub | | -| UKCRAS16 rt, ra, rb| Unsigned Saturating Cross Add & Sub | | -| CRSA16 rt, ra, rb | Cross Sub & Add | | -| RCRSA16 rt, ra, rb | Signed Halving Cross Sub & Add | | -| URCRSA16 rt, ra, rb| Unsigned Halving Cross Sub & Add | | -| KCRSA16 rt, ra, rb | Signed Saturating Cross Sub & Add | | -| UKCRSA16 rt, ra, rb| Unsigned Saturating Cross Sub & Add | | - -## 8-bit Arithmetic - -| Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| ADD8 rt, ra, rb | Add | VADD (v2 <= rt,ra,rb <= v15), mm=00 | -| RADD8 rt, ra, rb | Signed Halving add | RADD (v2 <= rt,ra,rb <= v7), mm=00 | -| URADD8 rt, ra, rb | Unsigned Halving add | RADD (v8 <= rt,ra,rb <= v15), mm=00 | -| KADD8 rt, ra, rb | Signed Saturating add | VADD (v2 <= rt,ra,rb <= v7), mm=01 | -| UKADD8 rt, ra, rb | Unsigned Saturating add | VADD (v8 <= rt,ra,rb <= v15), mm=01 | -| SUB8 rt, ra, rb | Subtract | VSUB (v2 <= rt,ra,rb <= v15), mm=00 | -| RSUB8 rt, ra, rb | Signed Halving sub | RSUB (v2 <= rt,ra,rb <= v7), mm=00 | -| URSUB8 rt, ra, rb | Unsigned Halving sub | RSUB (v8 <= rt,ra,rb <= v15), mm=00 | -| KSUB8 rt, ra, rb | Signed Saturating sub | VSUB (v2 <= rt,ra,rb <= v7), mm=01 | -| UKSUB8 rt, ra, rb | Unsigned Saturating sub | VSUB (v8 <= rt,ra,rb <= v15), mm=01 | - -## 16-bit Shifts - -SRA[I]16/SRL[I]16/SLL[I]16 to be mapped to VOP shift instructions in same manner as ADD16/SUB16 - -The “K” (Saturation) and “u” (Rounding) variants could be encoded using VOP’s mm field (mm=01 is saturated or rounded shift, mm=00 is standard VOP shift) - -| Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| SRA16 rt, ra, rb | Shift right arithmetic | VSRA (v16 <= rt,ra,rb <= v29), mm=00| -| SRAI16 rt, ra, im | Shift right arithmetic imm | VSRAI (v16 <= rt,ra <= v29), mm=00| -| SRA16.u rt, ra, rb | Rounding Shift right arithmetic | VSRA (v16 <= rt,ra,rb <= v29), mm=01| -| SRAI16.u rt, ra, im | Rounding Shift right arithmetic imm | VSRAI (v16 <= rt,ra <= v29), mm=01| -| SRL16 rt, ra, rb | Shift right logical | VSRL (v16 <= rt,ra,rb <= v29), mm=00| -| SRLI16 rt, ra, im | Shift right logical imm | VSRLI (v16 <= rt,ra <= v29), mm=00| -| SRL16.u rt, ra, rb | Rounding Shift right logical | VSRL (v16 <= rt,ra,rb <= v29), mm=01| -| SRLI16.u rt, ra, im | Rounding Shift right logical imm | VSLRI (v16 <= rt,ra <= v29), mm=01| -| SLL16 rt, ra, rb | Shift left logical | VSLL (v16 <= rt,ra,rb <= v29), mm=00| -| SLLI16 rt, ra, im | Shift left logical imm | VSLLI (v16 <= rt,ra <= v29), mm=00| -| KSLL16 rt, ra, rb | Saturating Shift left logical | VSLL (v16 <= rt,ra,rb <= v29), mm=01| -| KSLLI16 rt, ra, im | Saturating Shift left logical imm | VSLLI (v16 <= rt,ra <= v29), mm=01| -| KSLRA16 rt, ra, rb | Saturating Shift left logical or Shift right arithmetic || -| KSLRA16.u rt, ra, rb | Saturating Shift left logical or Rounding Shift right arithmetic || - - -## 8-bit Shifts - -Andes SIMD Packed ISA omits 8 bit shifts, but these can be encoded in Harmonised RVP as follows: - -| Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| n/a | Shift right arithmetic | VSRA (v2 <= rt,ra,rb <= v15), mm=00| -| n/a | Shift right arithmetic imm | VSRAI (v2 <= rt,ra <= v15), mm=00| -| n/a | Rounding Shift right arithmetic | VSRA (v2 <= rt,ra,rb <= v15), mm=01| -| n/a | Rounding Shift right arithmetic imm | VSRAI (v2 <= rt,ra <= v15), mm=01| -| n/a | Shift right logical | VSRL (v2 <= rt,ra,rb <= v15), mm=00| -| n/a | Shift right logical imm | VSRLI (v2 <= rt,ra <= v15), mm=00| -| n/a | Rounding Shift right logical | VSRL (v2 <= rt,ra,rb <= v15), mm=01| -| n/a | Rounding Shift right logical imm | VSLRI (v2 <= rt,ra <= v15), mm=01| -| n/a | Shift left logical | VSLL (v2 <= rt,ra,rb <= v15), mm=00| -| n/a | Shift left logical imm | VSLLI (v2 <= rt,ra <= v15), mm=00| -| n/a | Saturating Shift left logical | VSLL (v2 <= rt,ra,rb <= v15), mm=01| -| n/a | Saturating Shift left logical imm | VSLLI (v2 <= rt,ra <= v15), mm=01| - -## 16-bit Comparison instructions - -| Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| CMPEQ16 rt, ra, rb | Compare equal | VSEQ (v16 <= rt,ra,rb <= v29), mm=00| -| SCMPLT16 rt, ra, rb | Signed Compare less than | !VSGT (v16 <= rt,ra,rb <= v23), mm=00| -| SCMPLE16 rt, ra, rb | Signed Compare less or equal | VSLE (v16 <= rt,ra,rb <= v23), mm=00| -| UCMPLT16 rt, ra, rb | Unsigned Compare less than | !VSGT (v24 <= rt,ra,rb <= v29), mm=00| -| UCMPLE16 rt, ra, rb | Unsigned Compare less or equal | VSLE (v24 <= rt,ra,rb <= v29), mm=00| - -## 8-bit Comparison instructions - -| Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| CMPEQ8 rt, ra, rb | Compare equal | VSEQ (v2 <= rt,ra,rb <= v7), mm=00| -| SCMPLT8 rt, ra, rb | Signed Compare less than | !VSGT (v2 <= rt,ra,rb <= v7), mm=00| -| SCMPLE8 rt, ra, rb | Signed Compare less or equal | VSLE (v2 <= rt,ra,rb <= v7), mm=00| -| UCMPLT8 rt, ra, rb | Unsigned Compare less than | !VSGT (v8 <= rt,ra,rb <= v15), mm=00| -| UCMPLE8 rt, ra, rb | Unsigned Compare less or equal | VSLE (v8 <= rt,ra,rb <= v15), mm=00| - -## 16-bit Miscellaneous instructions - -| Andes Mnemonic | 16-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------ | ------------------- | -| SMIN16 rt, ra, rb | Signed minimum | VMIN (v16 <= rt,ra,rb <= v23), mm=00| -| UMIN16 rt, ra, rb | Unsigned minimum | VMIN (v24 <= rt,ra,rb <= v29), mm=00| -| SMAX16 rt, ra, rb | Signed maximum | VMAX (v16 <= rt,ra,rb <= v23), mm=00| -| UMAX16 rt, ra, rb | Unsigned maximum | VMAX (v24 <= rt,ra,rb <= v29), mm=00| -| SCLIP16 rt, ra, im | Signed clip | ?VCLIP (v16 <= rt,ra,rb <= v23), mm=01| -| UCLIP16 rt, ra, im | Unsigned clip | ?VCLIP (v24 <= rt,ra,rb <= v29), mm=01| -| KMUL16 rt, ra, rb | Signed multiply 16x16->16 | VMUL (v16 <= rt,ra,rb <= v23), mm=01| -| KMULX16 rt, ra, rb | Signed crossed multiply 16x16->16 | | -| SMUL16 rt, ra, rb | Signed multiply 16x16->32 | VMUL (v30 <= rt <= v31, v16 <= ra,rb <= v23), mm=00| -| SMULX16 rt, ra, rb | Signed crossed multiply 16x16->32 | | -| UMUL16 rt, ra, rb | Signed multiply 16x16->32 | VMUL (v30 <= rt <= v31, v24 <= ra,rb <= r31), mm=00| -| UMULX16 rt, ra, rb | Signed crossed multiply 16x16->32 | | -| KABS16 rt, ra | Saturated absolute value | VSGNX (v16 <= rt <= v29, v16 <= ra,rb <= v23, mm=01) | - -## 8-bit Miscellaneous instructions - -| Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| SMIN8 rt, ra, rb | Signed minimum | VMIN (v2 <= rt,ra,rb <= v7), mm=00| -| UMIN8 rt, ra, rb | Unsigned minimum | VMIN (v8 <= rt,ra,rb <= v15), mm=00| -| SMAX8 rt, ra, rb | Signed maximum | VMAX (v2 <= rt,ra,rb <= v7), mm=00| -| UMAX8 rt, ra, rb | Unsigned maximum | VMAX (v8 <= rt,ra,rb <= v15), mm=00| -| KABS8 rt, ra | Saturated absolute value | VSGNX (v2 <= rt <= v15, v2 <= ra,rb <= v8, mm=01) | - -## 8-bit Unpacking instructions - -| Andes Mnemonic | 8-bit Instruction | Harmonised RVP Equivalent | -| ------------------ | ------------------------- | ------------------- | -| SUNPKD810 rt, ra | Signed unpack bytes 1 & 0 | VMV (v16<= rt <= 23, v2 <= ra <= v7), mm=00| -| SUNPKD820 rt, ra | Signed unpack bytes 2 & 0 | | -| SUNPKD830 rt, ra | Signed unpack bytes 3 & 0 | | -| SUNPKD831 rt, ra | Signed unpack bytes 3 & 1 | | -| ZUNPKD810 rt, ra | Unsigned unpack bytes 1 & 0 | VMV (v24<= rt <= 31, v8 <= ra <= v15), mm=00| -| ZUNPKD820 rt, ra | Unsigned unpack bytes 2 & 0 | | -| ZUNPKD830 rt, ra | Unsigned unpack bytes 3 & 0 | | -| ZUNPKD831 rt, ra | Unsigned unpack bytes 3 & 1 | | -- 2.30.2