9af15e6f2a380dd1c64a79f80b79d287ecb80857
[libreriscv.git] / simple_v_extension / specification / bitmanip.mdwn
1 [[!tag standards]]
2
3 # Bitmanip opcodes
4
5 These are bit manipulation opcodes that, if provided, augment SimpleV for
6 the purposes of efficiently accelerating Vector Processing, 3D Graphics
7 and Video Processing.
8
9 The justification for their inclusion in BitManip is identical to the
10 significant justification that went into their inclusion in the
11 RISC-V Vector Extension (under the "Predicate Mask" opcodes section)
12
13 See
14 <https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#vector-mask-instructions>
15 for details.
16
17 # Predicate Masks
18
19 SV uses standard integer scalar registers as a predicate bitmask. Therefore,
20 the majority of RISC-V RV32I / RV64I bit-level instructions are perfectly
21 adequate. Some exceptions however present themselves from RVV.
22
23 ## logical bit-wise instructions
24
25 These are the available bitwise instructions in RVV:
26
27 vmand.mm vd, vs2, vs1 # vd[i] = vs2[i].LSB && vs1[i].LSB
28 vmnand.mm vd, vs2, vs1 # vd[i] = !(vs2[i].LSB && vs1[i].LSB)
29 vmandnot.mm vd, vs2, vs1 # vd[i] = vs2[i].LSB && !vs1[i].LSB
30 vmxor.mm vd, vs2, vs1 # vd[i] = vs2[i].LSB ^^ vs1[i].LSB
31 vmor.mm vd, vs2, vs1 # vd[i] = vs2[i].LSB || vs1[i].LSB
32 vmnor.mm vd, vs2, vs1 # vd[i] = !(vs2[i[.LSB || vs1[i].LSB)
33 vmornot.mm vd, vs2, vs1 # vd[i] = vs2[i].LSB || !vs1[i].LSB
34 vmxnor.mm vd, vs2, vs1 # vd[i] = !(vs2[i].LSB ^^ vs1[i].LSB)
35
36 The ones that exist in scalar RISC-V are:
37
38 AND rd, rs1, rs2 # rd = rs1 & rs2
39 OR rd, rs1, rs2 # rd = rs1 | rs2
40 XOR rd, rs1, rs2 # rd = rs1 ^ rs2
41
42 The ones in Bitmanip are:
43
44 ANDN rd, rs1, rs2 # rd = rs1 & ~rs2
45 ORN rd, rs1, rs2 # rd = rs1 | ~rs2
46 XORN rd, rs1, rs2 # rd = rs1 ^ ~rs2
47
48 This leaves:
49
50 NOR
51 NAND
52
53 These are currently listed as "pseudo-ops" in BitManip-Draft (0.91)
54 They need to be actual opcodes.
55
56
57 TODO: there is an extensive table in RVV of bit-level operations:
58
59 output instruction pseudoinstruction
60
61 | 0 | 1 | 2 | 3 | instruction | pseudoinstruction |
62 | - | - | - | - | -------------------------- | ----------------- |
63 | 0 | 0 | 0 | 0 | vmxor.mm vd, vd, vd | vmclr.m vd |
64 | 1 | 0 | 0 | 0 | vmnor.mm vd, src1, src2 | |
65 | 0 | 1 | 0 | 0 | vmandnot.mm vd, src2, src1 | |
66 | 1 | 1 | 0 | 0 | vmnand.mm vd, src1, src1 | vmnot.m vd, src1 |
67 | 0 | 0 | 1 | 0 | vmandnot.mm vd, src1, src2 | |
68 | 1 | 0 | 1 | 0 | vmnand.mm vd, src2, src2 | vmnot.m vd, src2 |
69 | 0 | 1 | 1 | 0 | vmxor.mm vd, src1, src2 | |
70 | 1 | 1 | 1 | 0 | vmnand.mm vd, src1, src2 | |
71 | 0 | 0 | 0 | 1 | vmand.mm vd, src1, src2 | |
72 | 1 | 0 | 0 | 1 | vmxnor.mm vd, src1, src2 | |
73 | 0 | 1 | 0 | 1 | vmand.mm vd, src2, src2 | vmcpy.m vd, src2 |
74 | 1 | 1 | 0 | 1 | vmornot.mm vd, src2, src1 | |
75 | 0 | 0 | 1 | 1 | vmand.mm vd, src1, src1 | vmcpy.m vd, src1 |
76 | 1 | 0 | 1 | 1 | vmornot.mm vd, src1, src2 | |
77 | 1 | 1 | 1 | 1 | vmxnor.mm vd, vd, vd | vmset.m vd |
78
79 ## pcnt - population count
80
81 population-count.
82
83 Pseudocode:
84
85 unsigned int v; // count the number of bits set in v
86 unsigned int c; // c accumulates the total bits set in v
87 for (c = 0; v; c++)
88 {
89 v &= v - 1; // clear the least significant bit set
90 }
91
92 This instruction is present in BitManip.
93
94 ## ffirst - find first bit
95
96 finds the first bit set as an index.
97
98 Pseudocode:
99
100
101 uint_xlen_t clz(uint_xlen_t rs1)
102 {
103 for (int count = 0; count < XLEN; count++)
104 if ((rs1 << count) >> (XLEN - 1))
105 return count;
106 return XLEN; // -1
107 }
108
109 This is similar but not identical to BitManip "CLZ". CLZ returns XLEN when no bits are set, whereas RVV returns -1.
110
111 ## sbf - set before first bit
112
113 Sets all LSBs leading up to where an LSB in the src is set. If the second
114 operand is non-zero, this process begins each time (including the first time) from where 1s are set in the
115 second operand.
116
117 # Example
118
119 7 6 5 4 3 2 1 0 Bit number
120
121 1 0 0 1 0 1 0 0 a3 contents
122 sbf a2, a3, x0
123 0 0 0 0 0 0 1 1 a2 contents
124
125 1 0 0 1 0 1 0 1 a3 contents
126 sbf a2, a3, x0
127 0 0 0 0 0 0 0 0 a2
128
129 0 0 0 0 0 0 0 0 a3 contents
130 sbf a2, a3, x0
131 1 1 1 1 1 1 1 1 a2
132
133 1 1 0 0 0 0 1 1 a0 vcontents
134 1 0 0 1 0 1 0 0 a3 contents
135 sbf a2, a3, a0
136 0 1 0 0 0 0 1 1 a2 contents
137
138 Pseudo-code:
139
140 def sbf(rd, rs1, rs2):
141 rd = 0
142 # start setting if no predicate or if 1st predicate bit set
143 setting_mode = rs2 == x0 or (regs[rs2] & 1)
144 while i < XLEN:
145 bit = 1<<i
146 if rs2 != x0 and (regs[rs2] & bit):
147 # reset searching
148 setting_mode = False
149 if setting_mode:
150 if regs[rs1] & bit: # found a bit in rs1: stop setting rd
151 setting_mode = False
152 else:
153 regs[rd] |= bit
154 else if rs2 != x0: # searching mode
155 if (regs[rs2] & bit):
156 setting_mode = True # back into "setting" mode
157 i += 1
158
159 ## sif - set including first bit
160
161 Similar to sbf except including the bit which ends a run.
162
163 # Example
164
165 7 6 5 4 3 2 1 0 Element number
166
167 1 0 0 1 0 1 0 0 a3 contents
168 sif a2, a3
169 0 0 0 0 0 1 1 1 a2 contents
170
171 1 0 0 1 0 1 0 1 a3 contents
172 sif a2, a3
173 0 0 0 0 0 0 0 1 a2
174
175 1 1 0 0 0 0 1 1 a0 vcontents
176 1 0 0 1 0 1 0 0 a3 contents
177 sif a2, a3, a0
178 1 1 x x x x 1 1 a2 contents
179
180 Pseudo-code:
181
182 def sif(rd, rs1, rs2):
183 rd = 0
184 # start setting if no predicate or if 1st predicate bit set
185 setting_mode = rs2 == x0 or (regs[rs2] & 1)
186 while i < XLEN:
187 bit = 1<<i
188 if rs2 != x0 and (regs[rs2] & bit):
189 # reset searching
190 setting_mode = False
191 if setting_mode:
192 regs[rd] |= bit
193 if regs[rs1] & bit: # found a bit in rs1: stop setting rd
194 setting_mode = False
195 else if rs2 != x0: # searching mode
196 if (regs[rs2] & bit):
197 setting_mode = True # back into "setting" mode
198 i += 1
199
200 ## sof - set only first bit
201
202 # Example
203
204 7 6 5 4 3 2 1 0 Element number
205
206 1 0 0 1 0 1 0 0 a3 contents
207 sof a2, a3
208 0 0 0 0 0 1 0 0 a2 contents
209
210 1 0 0 1 0 1 0 1 a3 contents
211 sof a2, a3
212 0 0 0 0 0 0 0 1 a2
213
214 1 1 0 0 0 0 1 1 a0 vcontents
215 1 1 0 1 0 1 0 0 a3 contents
216 sof a2, a3, a0
217 0 1 x x x x 0 0 a2 contents
218
219 Pseudo-code:
220
221 def sof(rd, rs1, rs2):
222 rd = 0
223 setting_mode = rs2 == x0 or (regs[rs2] & 1)
224
225 while i < XLEN:
226 bit = 1<<i
227
228 # only reenable when predicate in use, and bit valid
229 if !setting_mode && rs2 != x0:
230 if (regs[rs2] & bit):
231 # back into "setting" mode
232 setting_mode = True
233
234 # skipping mode
235 if !setting_mode:
236 # skip any more 1s
237 if regs[rs1] & bit == 1:
238 i += 1
239 continue
240
241 # setting mode, search for 1
242 if regs[rs1] & bit: # found a bit in rs1:
243 # set bit, exit setting mode immediately
244 regs[rd] |= bit
245 setting_mode = False
246
247 i += 1
248