2 * (C) Copyright IBM Corporation 2008
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Real-time assembly generation interface for Cell B.E. SPEs.
28 * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
30 * \author Ian Romanick <idr@us.ibm.com>
34 #ifndef RTASM_PPC_SPE_H
35 #define RTASM_PPC_SPE_H
37 /** 4 bytes per instruction */
38 #define SPE_INST_SIZE 4
40 /** number of general-purpose SIMD registers */
41 #define SPE_NUM_REGS 128
43 /** Return Address register */
46 /** Stack Pointer register */
52 uint32_t *store
; /**< instruction buffer */
57 * Mask of used / unused registers
59 * Each set bit corresponds to an available register. Each cleared bit
60 * corresponds to an allocated register.
63 * spe_allocate_register, spe_allocate_available_register,
64 * spe_release_register
66 uint64_t regs
[SPE_NUM_REGS
/ 64];
68 boolean print
; /**< print/dump instructions as they're emitted? */
69 int indent
; /**< number of spaces to indent */
73 extern void spe_init_func(struct spe_function
*p
, unsigned code_size
);
74 extern void spe_release_func(struct spe_function
*p
);
76 extern int spe_allocate_available_register(struct spe_function
*p
);
77 extern int spe_allocate_register(struct spe_function
*p
, int reg
);
78 extern void spe_release_register(struct spe_function
*p
, int reg
);
80 extern void spe_print_code(struct spe_function
*p
, boolean enable
);
81 extern void spe_indent(struct spe_function
*p
, int spaces
);
82 extern void spe_comment(struct spe_function
*p
, int rel_indent
, const char *s
);
85 #endif /* RTASM_PPC_SPE_H */
88 #define EMIT_(name, _op) \
89 extern void _name (struct spe_function *p, unsigned rT)
90 #define EMIT_R(_name, _op) \
91 extern void _name (struct spe_function *p, unsigned rT, unsigned rA)
92 #define EMIT_RR(_name, _op) \
93 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
95 #define EMIT_RRR(_name, _op) \
96 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
97 unsigned rB, unsigned rC)
98 #define EMIT_RI7(_name, _op) \
99 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
101 #define EMIT_RI8(_name, _op, bias) \
102 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
104 #define EMIT_RI10(_name, _op) \
105 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
107 #define EMIT_RI16(_name, _op) \
108 extern void _name (struct spe_function *p, unsigned rT, int imm)
109 #define EMIT_RI18(_name, _op) \
110 extern void _name (struct spe_function *p, unsigned rT, int imm)
111 #define EMIT_I16(_name, _op) \
112 extern void _name (struct spe_function *p, int imm)
113 #define UNDEF_EMIT_MACROS
117 /* Memory load / store instructions
119 EMIT_RI10(spe_lqd
, 0x034);
120 EMIT_RR (spe_lqx
, 0x1c4);
121 EMIT_RI16(spe_lqa
, 0x061);
122 EMIT_RI16(spe_lqr
, 0x067);
123 EMIT_RI10(spe_stqd
, 0x024);
124 EMIT_RR (spe_stqx
, 0x144);
125 EMIT_RI16(spe_stqa
, 0x041);
126 EMIT_RI16(spe_stqr
, 0x047);
127 EMIT_RI7 (spe_cbd
, 0x1f4);
128 EMIT_RR (spe_cbx
, 0x1d4);
129 EMIT_RI7 (spe_chd
, 0x1f5);
130 EMIT_RI7 (spe_chx
, 0x1d5);
131 EMIT_RI7 (spe_cwd
, 0x1f6);
132 EMIT_RI7 (spe_cwx
, 0x1d6);
133 EMIT_RI7 (spe_cdd
, 0x1f7);
134 EMIT_RI7 (spe_cdx
, 0x1d7);
137 /* Constant formation instructions
139 EMIT_RI16(spe_ilh
, 0x083);
140 EMIT_RI16(spe_ilhu
, 0x082);
141 EMIT_RI16(spe_il
, 0x081);
142 EMIT_RI18(spe_ila
, 0x021);
143 EMIT_RI16(spe_iohl
, 0x0c1);
144 EMIT_RI16(spe_fsmbi
, 0x065);
148 /* Integer and logical instructions
150 EMIT_RR (spe_ah
, 0x0c8);
151 EMIT_RI10(spe_ahi
, 0x01d);
152 EMIT_RR (spe_a
, 0x0c0);
153 EMIT_RI10(spe_ai
, 0x01c);
154 EMIT_RR (spe_sfh
, 0x048);
155 EMIT_RI10(spe_sfhi
, 0x00d);
156 EMIT_RR (spe_sf
, 0x040);
157 EMIT_RI10(spe_sfi
, 0x00c);
158 EMIT_RR (spe_addx
, 0x340);
159 EMIT_RR (spe_cg
, 0x0c2);
160 EMIT_RR (spe_cgx
, 0x342);
161 EMIT_RR (spe_sfx
, 0x341);
162 EMIT_RR (spe_bg
, 0x042);
163 EMIT_RR (spe_bgx
, 0x343);
164 EMIT_RR (spe_mpy
, 0x3c4);
165 EMIT_RR (spe_mpyu
, 0x3cc);
166 EMIT_RI10(spe_mpyi
, 0x074);
167 EMIT_RI10(spe_mpyui
, 0x075);
168 EMIT_RRR (spe_mpya
, 0x00c);
169 EMIT_RR (spe_mpyh
, 0x3c5);
170 EMIT_RR (spe_mpys
, 0x3c7);
171 EMIT_RR (spe_mpyhh
, 0x3c6);
172 EMIT_RR (spe_mpyhha
, 0x346);
173 EMIT_RR (spe_mpyhhu
, 0x3ce);
174 EMIT_RR (spe_mpyhhau
, 0x34e);
175 EMIT_R (spe_clz
, 0x2a5);
176 EMIT_R (spe_cntb
, 0x2b4);
177 EMIT_R (spe_fsmb
, 0x1b6);
178 EMIT_R (spe_fsmh
, 0x1b5);
179 EMIT_R (spe_fsm
, 0x1b4);
180 EMIT_R (spe_gbb
, 0x1b2);
181 EMIT_R (spe_gbh
, 0x1b1);
182 EMIT_R (spe_gb
, 0x1b0);
183 EMIT_RR (spe_avgb
, 0x0d3);
184 EMIT_RR (spe_absdb
, 0x053);
185 EMIT_RR (spe_sumb
, 0x253);
186 EMIT_R (spe_xsbh
, 0x2b6);
187 EMIT_R (spe_xshw
, 0x2ae);
188 EMIT_R (spe_xswd
, 0x2a6);
189 EMIT_RR (spe_and
, 0x0c1);
190 EMIT_RR (spe_andc
, 0x2c1);
191 EMIT_RI10(spe_andbi
, 0x016);
192 EMIT_RI10(spe_andhi
, 0x015);
193 EMIT_RI10(spe_andi
, 0x014);
194 EMIT_RR (spe_or
, 0x041);
195 EMIT_RR (spe_orc
, 0x2c9);
196 EMIT_RI10(spe_orbi
, 0x006);
197 EMIT_RI10(spe_orhi
, 0x005);
198 EMIT_RI10(spe_ori
, 0x004);
199 EMIT_R (spe_orx
, 0x1f0);
200 EMIT_RR (spe_xor
, 0x241);
201 EMIT_RI10(spe_xorbi
, 0x026);
202 EMIT_RI10(spe_xorhi
, 0x025);
203 EMIT_RI10(spe_xori
, 0x024);
204 EMIT_RR (spe_nand
, 0x0c9);
205 EMIT_RR (spe_nor
, 0x049);
206 EMIT_RR (spe_eqv
, 0x249);
207 EMIT_RRR (spe_selb
, 0x008);
208 EMIT_RRR (spe_shufb
, 0x00b);
211 /* Shift and rotate instructions
213 EMIT_RR (spe_shlh
, 0x05f);
214 EMIT_RI7 (spe_shlhi
, 0x07f);
215 EMIT_RR (spe_shl
, 0x05b);
216 EMIT_RI7 (spe_shli
, 0x07b);
217 EMIT_RR (spe_shlqbi
, 0x1db);
218 EMIT_RI7 (spe_shlqbii
, 0x1fb);
219 EMIT_RR (spe_shlqby
, 0x1df);
220 EMIT_RI7 (spe_shlqbyi
, 0x1ff);
221 EMIT_RR (spe_shlqbybi
, 0x1cf);
222 EMIT_RR (spe_roth
, 0x05c);
223 EMIT_RI7 (spe_rothi
, 0x07c);
224 EMIT_RR (spe_rot
, 0x058);
225 EMIT_RI7 (spe_roti
, 0x078);
226 EMIT_RR (spe_rotqby
, 0x1dc);
227 EMIT_RI7 (spe_rotqbyi
, 0x1fc);
228 EMIT_RR (spe_rotqbybi
, 0x1cc);
229 EMIT_RR (spe_rotqbi
, 0x1d8);
230 EMIT_RI7 (spe_rotqbii
, 0x1f8);
231 EMIT_RR (spe_rothm
, 0x05d);
232 EMIT_RI7 (spe_rothmi
, 0x07d);
233 EMIT_RR (spe_rotm
, 0x059);
234 EMIT_RI7 (spe_rotmi
, 0x079);
235 EMIT_RR (spe_rotqmby
, 0x1dd);
236 EMIT_RI7 (spe_rotqmbyi
, 0x1fd);
237 EMIT_RR (spe_rotqmbybi
, 0x1cd);
238 EMIT_RR (spe_rotqmbi
, 0x1c9);
239 EMIT_RI7 (spe_rotqmbii
, 0x1f9);
240 EMIT_RR (spe_rotmah
, 0x05e);
241 EMIT_RI7 (spe_rotmahi
, 0x07e);
242 EMIT_RR (spe_rotma
, 0x05a);
243 EMIT_RI7 (spe_rotmai
, 0x07a);
246 /* Compare, branch, and halt instructions
248 EMIT_RR (spe_heq
, 0x3d8);
249 EMIT_RI10(spe_heqi
, 0x07f);
250 EMIT_RR (spe_hgt
, 0x258);
251 EMIT_RI10(spe_hgti
, 0x04f);
252 EMIT_RR (spe_hlgt
, 0x2d8);
253 EMIT_RI10(spe_hlgti
, 0x05f);
254 EMIT_RR (spe_ceqb
, 0x3d0);
255 EMIT_RI10(spe_ceqbi
, 0x07e);
256 EMIT_RR (spe_ceqh
, 0x3c8);
257 EMIT_RI10(spe_ceqhi
, 0x07d);
258 EMIT_RR (spe_ceq
, 0x3c0);
259 EMIT_RI10(spe_ceqi
, 0x07c);
260 EMIT_RR (spe_cgtb
, 0x250);
261 EMIT_RI10(spe_cgtbi
, 0x04e);
262 EMIT_RR (spe_cgth
, 0x248);
263 EMIT_RI10(spe_cgthi
, 0x04d);
264 EMIT_RR (spe_cgt
, 0x240);
265 EMIT_RI10(spe_cgti
, 0x04c);
266 EMIT_RR (spe_clgtb
, 0x2d0);
267 EMIT_RI10(spe_clgtbi
, 0x05e);
268 EMIT_RR (spe_clgth
, 0x2c8);
269 EMIT_RI10(spe_clgthi
, 0x05d);
270 EMIT_RR (spe_clgt
, 0x2c0);
271 EMIT_RI10(spe_clgti
, 0x05c);
272 EMIT_I16 (spe_br
, 0x064);
273 EMIT_I16 (spe_bra
, 0x060);
274 EMIT_RI16(spe_brsl
, 0x066);
275 EMIT_RI16(spe_brasl
, 0x062);
276 EMIT_RI16(spe_brnz
, 0x042);
277 EMIT_RI16(spe_brz
, 0x040);
278 EMIT_RI16(spe_brhnz
, 0x046);
279 EMIT_RI16(spe_brhz
, 0x044);
281 extern void spe_bi(struct spe_function
*p
, unsigned rA
, int d
, int e
);
282 extern void spe_iret(struct spe_function
*p
, unsigned rA
, int d
, int e
);
283 extern void spe_bisled(struct spe_function
*p
, unsigned rT
, unsigned rA
,
285 extern void spe_bisl(struct spe_function
*p
, unsigned rT
, unsigned rA
,
287 extern void spe_biz(struct spe_function
*p
, unsigned rT
, unsigned rA
,
289 extern void spe_binz(struct spe_function
*p
, unsigned rT
, unsigned rA
,
291 extern void spe_bihz(struct spe_function
*p
, unsigned rT
, unsigned rA
,
293 extern void spe_bihnz(struct spe_function
*p
, unsigned rT
, unsigned rA
,
297 /** Load/splat immediate float into rT. */
299 spe_load_float(struct spe_function
*p
, unsigned rT
, float x
);
301 /** Load/splat immediate int into rT. */
303 spe_load_int(struct spe_function
*p
, unsigned rT
, int i
);
305 /** Load/splat immediate unsigned int into rT. */
307 spe_load_uint(struct spe_function
*p
, unsigned rT
, unsigned int ui
);
309 /** Replicate word 0 of rA across rT. */
311 spe_splat(struct spe_function
*p
, unsigned rT
, unsigned rA
);
313 /** Complement/invert all bits in rT. */
315 spe_complement(struct spe_function
*p
, unsigned rT
);
319 spe_move(struct spe_function
*p
, unsigned rT
, unsigned rA
);
321 /** rT = {0,0,0,0}. */
323 spe_zero(struct spe_function
*p
, unsigned rT
);
325 /** rT = splat(rA, word) */
327 spe_splat_word(struct spe_function
*p
, unsigned rT
, unsigned rA
, int word
);
329 /** rT = float min(rA, rB) */
331 spe_float_min(struct spe_function
*p
, unsigned rT
, unsigned rA
, unsigned rB
);
333 /** rT = float max(rA, rB) */
335 spe_float_max(struct spe_function
*p
, unsigned rT
, unsigned rA
, unsigned rB
);
338 /* Floating-point instructions
340 EMIT_RR (spe_fa
, 0x2c4);
341 EMIT_RR (spe_dfa
, 0x2cc);
342 EMIT_RR (spe_fs
, 0x2c5);
343 EMIT_RR (spe_dfs
, 0x2cd);
344 EMIT_RR (spe_fm
, 0x2c6);
345 EMIT_RR (spe_dfm
, 0x2ce);
346 EMIT_RRR (spe_fma
, 0x00e);
347 EMIT_RR (spe_dfma
, 0x35c);
348 EMIT_RRR (spe_fnms
, 0x00d);
349 EMIT_RR (spe_dfnms
, 0x35e);
350 EMIT_RRR (spe_fms
, 0x00f);
351 EMIT_RR (spe_dfms
, 0x35d);
352 EMIT_RR (spe_dfnma
, 0x35f);
353 EMIT_R (spe_frest
, 0x1b8);
354 EMIT_R (spe_frsqest
, 0x1b9);
355 EMIT_RR (spe_fi
, 0x3d4);
356 EMIT_RI8 (spe_csflt
, 0x1da, 155);
357 EMIT_RI8 (spe_cflts
, 0x1d8, 173);
358 EMIT_RI8 (spe_cuflt
, 0x1db, 155);
359 EMIT_RI8 (spe_cfltu
, 0x1d9, 173);
360 EMIT_R (spe_frds
, 0x3b9);
361 EMIT_R (spe_fesd
, 0x3b8);
362 EMIT_RR (spe_dfceq
, 0x3c3);
363 EMIT_RR (spe_dfcmeq
, 0x3cb);
364 EMIT_RR (spe_dfcgt
, 0x2c3);
365 EMIT_RR (spe_dfcmgt
, 0x2cb);
366 EMIT_RI7 (spe_dftsv
, 0x3bf);
367 EMIT_RR (spe_fceq
, 0x3c2);
368 EMIT_RR (spe_fcmeq
, 0x3ca);
369 EMIT_RR (spe_fcgt
, 0x2c2);
370 EMIT_RR (spe_fcmgt
, 0x2ca);
371 EMIT_R (spe_fscrwr
, 0x3ba);
372 EMIT_ (spe_fscrrd
, 0x398);
375 /* Channel instructions
377 EMIT_R (spe_rdch
, 0x00d);
378 EMIT_R (spe_rdchcnt
, 0x00f);
379 EMIT_R (spe_wrch
, 0x10d);
382 #ifdef UNDEF_EMIT_MACROS
393 #undef UNDEF_EMIT_MACROS