cell: implement function calls from shader code. fslight demo runs now.
[mesa.git] / src / gallium / auxiliary / rtasm / rtasm_ppc_spe.h
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file
27 * Real-time assembly generation interface for Cell B.E. SPEs.
28 * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
29 *
30 * \author Ian Romanick <idr@us.ibm.com>
31 * \author Brian Paul
32 */
33
34 #ifndef RTASM_PPC_SPE_H
35 #define RTASM_PPC_SPE_H
36
37 /** 4 bytes per instruction */
38 #define SPE_INST_SIZE 4
39
40 /** number of general-purpose SIMD registers */
41 #define SPE_NUM_REGS 128
42
43 /** Return Address register (aka $lr / Link Register) */
44 #define SPE_REG_RA 0
45
46 /** Stack Pointer register (aka $sp) */
47 #define SPE_REG_SP 1
48
49
50 struct spe_function
51 {
52 uint32_t *store; /**< instruction buffer */
53 uint num_inst;
54 uint max_inst;
55
56 /**
57 * The "set count" reflects the number of nested register sets
58 * are allowed. In the unlikely case that we exceed the set count,
59 * register allocation will start to be confused, which is critical
60 * enough that we check for it.
61 */
62 unsigned char set_count;
63
64 /**
65 * Flags for used and unused registers. Each byte corresponds to a
66 * register; a 0 in that byte means that the register is available.
67 * A value of 1 means that the register was allocated in the current
68 * register set. Any other value N means that the register was allocated
69 * N register sets ago.
70 *
71 * \sa
72 * spe_allocate_register, spe_allocate_available_register,
73 * spe_allocate_register_set, spe_release_register_set, spe_release_register,
74 */
75 unsigned char regs[SPE_NUM_REGS];
76
77 boolean print; /**< print/dump instructions as they're emitted? */
78 int indent; /**< number of spaces to indent */
79 };
80
81
82 extern void spe_init_func(struct spe_function *p, unsigned code_size);
83 extern void spe_release_func(struct spe_function *p);
84 extern unsigned spe_code_size(const struct spe_function *p);
85
86 extern int spe_allocate_available_register(struct spe_function *p);
87 extern int spe_allocate_register(struct spe_function *p, int reg);
88 extern void spe_release_register(struct spe_function *p, int reg);
89 extern void spe_allocate_register_set(struct spe_function *p);
90 extern void spe_release_register_set(struct spe_function *p);
91
92 extern unsigned
93 spe_get_registers_used(const struct spe_function *p, ubyte used[]);
94
95 extern void spe_print_code(struct spe_function *p, boolean enable);
96 extern void spe_indent(struct spe_function *p, int spaces);
97 extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
98
99
100 #endif /* RTASM_PPC_SPE_H */
101
102 #ifndef EMIT_
103 #define EMIT_(name, _op) \
104 extern void _name (struct spe_function *p, unsigned rT)
105 #define EMIT_R(_name, _op) \
106 extern void _name (struct spe_function *p, unsigned rT, unsigned rA)
107 #define EMIT_RR(_name, _op) \
108 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
109 unsigned rB)
110 #define EMIT_RRR(_name, _op) \
111 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
112 unsigned rB, unsigned rC)
113 #define EMIT_RI7(_name, _op) \
114 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
115 int imm)
116 #define EMIT_RI8(_name, _op, bias) \
117 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
118 int imm)
119 #define EMIT_RI10(_name, _op) \
120 extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \
121 int imm)
122 #define EMIT_RI16(_name, _op) \
123 extern void _name (struct spe_function *p, unsigned rT, int imm)
124 #define EMIT_RI18(_name, _op) \
125 extern void _name (struct spe_function *p, unsigned rT, int imm)
126 #define EMIT_I16(_name, _op) \
127 extern void _name (struct spe_function *p, int imm)
128 #define UNDEF_EMIT_MACROS
129 #endif /* EMIT_ */
130
131
132 /* Memory load / store instructions
133 */
134 EMIT_RR (spe_lqx, 0x1c4);
135 EMIT_RI16(spe_lqa, 0x061);
136 EMIT_RI16(spe_lqr, 0x067);
137 EMIT_RR (spe_stqx, 0x144);
138 EMIT_RI16(spe_stqa, 0x041);
139 EMIT_RI16(spe_stqr, 0x047);
140 EMIT_RI7 (spe_cbd, 0x1f4);
141 EMIT_RR (spe_cbx, 0x1d4);
142 EMIT_RI7 (spe_chd, 0x1f5);
143 EMIT_RI7 (spe_chx, 0x1d5);
144 EMIT_RI7 (spe_cwd, 0x1f6);
145 EMIT_RI7 (spe_cwx, 0x1d6);
146 EMIT_RI7 (spe_cdd, 0x1f7);
147 EMIT_RI7 (spe_cdx, 0x1d7);
148
149
150 /* Constant formation instructions
151 */
152 EMIT_RI16(spe_ilh, 0x083);
153 EMIT_RI16(spe_ilhu, 0x082);
154 EMIT_RI16(spe_il, 0x081);
155 EMIT_RI18(spe_ila, 0x021);
156 EMIT_RI16(spe_iohl, 0x0c1);
157 EMIT_RI16(spe_fsmbi, 0x065);
158
159
160
161 /* Integer and logical instructions
162 */
163 EMIT_RR (spe_ah, 0x0c8);
164 EMIT_RI10(spe_ahi, 0x01d);
165 EMIT_RR (spe_a, 0x0c0);
166 EMIT_RI10(spe_ai, 0x01c);
167 EMIT_RR (spe_sfh, 0x048);
168 EMIT_RI10(spe_sfhi, 0x00d);
169 EMIT_RR (spe_sf, 0x040);
170 EMIT_RI10(spe_sfi, 0x00c);
171 EMIT_RR (spe_addx, 0x340);
172 EMIT_RR (spe_cg, 0x0c2);
173 EMIT_RR (spe_cgx, 0x342);
174 EMIT_RR (spe_sfx, 0x341);
175 EMIT_RR (spe_bg, 0x042);
176 EMIT_RR (spe_bgx, 0x343);
177 EMIT_RR (spe_mpy, 0x3c4);
178 EMIT_RR (spe_mpyu, 0x3cc);
179 EMIT_RI10(spe_mpyi, 0x074);
180 EMIT_RI10(spe_mpyui, 0x075);
181 EMIT_RRR (spe_mpya, 0x00c);
182 EMIT_RR (spe_mpyh, 0x3c5);
183 EMIT_RR (spe_mpys, 0x3c7);
184 EMIT_RR (spe_mpyhh, 0x3c6);
185 EMIT_RR (spe_mpyhha, 0x346);
186 EMIT_RR (spe_mpyhhu, 0x3ce);
187 EMIT_RR (spe_mpyhhau, 0x34e);
188 EMIT_R (spe_clz, 0x2a5);
189 EMIT_R (spe_cntb, 0x2b4);
190 EMIT_R (spe_fsmb, 0x1b6);
191 EMIT_R (spe_fsmh, 0x1b5);
192 EMIT_R (spe_fsm, 0x1b4);
193 EMIT_R (spe_gbb, 0x1b2);
194 EMIT_R (spe_gbh, 0x1b1);
195 EMIT_R (spe_gb, 0x1b0);
196 EMIT_RR (spe_avgb, 0x0d3);
197 EMIT_RR (spe_absdb, 0x053);
198 EMIT_RR (spe_sumb, 0x253);
199 EMIT_R (spe_xsbh, 0x2b6);
200 EMIT_R (spe_xshw, 0x2ae);
201 EMIT_R (spe_xswd, 0x2a6);
202 EMIT_RR (spe_and, 0x0c1);
203 EMIT_RR (spe_andc, 0x2c1);
204 EMIT_RI10(spe_andbi, 0x016);
205 EMIT_RI10(spe_andhi, 0x015);
206 EMIT_RI10(spe_andi, 0x014);
207 EMIT_RR (spe_or, 0x041);
208 EMIT_RR (spe_orc, 0x2c9);
209 EMIT_RI10(spe_orbi, 0x006);
210 EMIT_RI10(spe_orhi, 0x005);
211 EMIT_RI10(spe_ori, 0x004);
212 EMIT_R (spe_orx, 0x1f0);
213 EMIT_RR (spe_xor, 0x241);
214 EMIT_RI10(spe_xorbi, 0x026);
215 EMIT_RI10(spe_xorhi, 0x025);
216 EMIT_RI10(spe_xori, 0x024);
217 EMIT_RR (spe_nand, 0x0c9);
218 EMIT_RR (spe_nor, 0x049);
219 EMIT_RR (spe_eqv, 0x249);
220 EMIT_RRR (spe_selb, 0x008);
221 EMIT_RRR (spe_shufb, 0x00b);
222
223
224 /* Shift and rotate instructions
225 */
226 EMIT_RR (spe_shlh, 0x05f);
227 EMIT_RI7 (spe_shlhi, 0x07f);
228 EMIT_RR (spe_shl, 0x05b);
229 EMIT_RI7 (spe_shli, 0x07b);
230 EMIT_RR (spe_shlqbi, 0x1db);
231 EMIT_RI7 (spe_shlqbii, 0x1fb);
232 EMIT_RR (spe_shlqby, 0x1df);
233 EMIT_RI7 (spe_shlqbyi, 0x1ff);
234 EMIT_RR (spe_shlqbybi, 0x1cf);
235 EMIT_RR (spe_roth, 0x05c);
236 EMIT_RI7 (spe_rothi, 0x07c);
237 EMIT_RR (spe_rot, 0x058);
238 EMIT_RI7 (spe_roti, 0x078);
239 EMIT_RR (spe_rotqby, 0x1dc);
240 EMIT_RI7 (spe_rotqbyi, 0x1fc);
241 EMIT_RR (spe_rotqbybi, 0x1cc);
242 EMIT_RR (spe_rotqbi, 0x1d8);
243 EMIT_RI7 (spe_rotqbii, 0x1f8);
244 EMIT_RR (spe_rothm, 0x05d);
245 EMIT_RI7 (spe_rothmi, 0x07d);
246 EMIT_RR (spe_rotm, 0x059);
247 EMIT_RI7 (spe_rotmi, 0x079);
248 EMIT_RR (spe_rotqmby, 0x1dd);
249 EMIT_RI7 (spe_rotqmbyi, 0x1fd);
250 EMIT_RR (spe_rotqmbybi, 0x1cd);
251 EMIT_RR (spe_rotqmbi, 0x1c9);
252 EMIT_RI7 (spe_rotqmbii, 0x1f9);
253 EMIT_RR (spe_rotmah, 0x05e);
254 EMIT_RI7 (spe_rotmahi, 0x07e);
255 EMIT_RR (spe_rotma, 0x05a);
256 EMIT_RI7 (spe_rotmai, 0x07a);
257
258
259 /* Compare, branch, and halt instructions
260 */
261 EMIT_RR (spe_heq, 0x3d8);
262 EMIT_RI10(spe_heqi, 0x07f);
263 EMIT_RR (spe_hgt, 0x258);
264 EMIT_RI10(spe_hgti, 0x04f);
265 EMIT_RR (spe_hlgt, 0x2d8);
266 EMIT_RI10(spe_hlgti, 0x05f);
267 EMIT_RR (spe_ceqb, 0x3d0);
268 EMIT_RI10(spe_ceqbi, 0x07e);
269 EMIT_RR (spe_ceqh, 0x3c8);
270 EMIT_RI10(spe_ceqhi, 0x07d);
271 EMIT_RR (spe_ceq, 0x3c0);
272 EMIT_RI10(spe_ceqi, 0x07c);
273 EMIT_RR (spe_cgtb, 0x250);
274 EMIT_RI10(spe_cgtbi, 0x04e);
275 EMIT_RR (spe_cgth, 0x248);
276 EMIT_RI10(spe_cgthi, 0x04d);
277 EMIT_RR (spe_cgt, 0x240);
278 EMIT_RI10(spe_cgti, 0x04c);
279 EMIT_RR (spe_clgtb, 0x2d0);
280 EMIT_RI10(spe_clgtbi, 0x05e);
281 EMIT_RR (spe_clgth, 0x2c8);
282 EMIT_RI10(spe_clgthi, 0x05d);
283 EMIT_RR (spe_clgt, 0x2c0);
284 EMIT_RI10(spe_clgti, 0x05c);
285 EMIT_I16 (spe_br, 0x064);
286 EMIT_I16 (spe_bra, 0x060);
287 EMIT_RI16(spe_brsl, 0x066);
288 EMIT_RI16(spe_brasl, 0x062);
289 EMIT_RI16(spe_brnz, 0x042);
290 EMIT_RI16(spe_brz, 0x040);
291 EMIT_RI16(spe_brhnz, 0x046);
292 EMIT_RI16(spe_brhz, 0x044);
293
294 extern void
295 spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
296
297 extern void
298 spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset);
299
300 extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e);
301 extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e);
302 extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA,
303 int d, int e);
304 extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA,
305 int d, int e);
306 extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA,
307 int d, int e);
308 extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA,
309 int d, int e);
310 extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA,
311 int d, int e);
312 extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA,
313 int d, int e);
314
315
316 /** Load/splat immediate float into rT. */
317 extern void
318 spe_load_float(struct spe_function *p, unsigned rT, float x);
319
320 /** Load/splat immediate int into rT. */
321 extern void
322 spe_load_int(struct spe_function *p, unsigned rT, int i);
323
324 /** Load/splat immediate unsigned int into rT. */
325 extern void
326 spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui);
327
328 /** And immediate value into rT. */
329 extern void
330 spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
331
332 /** Xor immediate value into rT. */
333 extern void
334 spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
335
336 /** Compare equal with immediate value. */
337 extern void
338 spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
339
340 /** Compare greater with immediate value. */
341 extern void
342 spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui);
343
344 /** Replicate word 0 of rA across rT. */
345 extern void
346 spe_splat(struct spe_function *p, unsigned rT, unsigned rA);
347
348 /** rT = complement_all_bits(rA). */
349 extern void
350 spe_complement(struct spe_function *p, unsigned rT, unsigned rA);
351
352 /** rT = rA. */
353 extern void
354 spe_move(struct spe_function *p, unsigned rT, unsigned rA);
355
356 /** rT = {0,0,0,0}. */
357 extern void
358 spe_zero(struct spe_function *p, unsigned rT);
359
360 /** rT = splat(rA, word) */
361 extern void
362 spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word);
363
364 /** rT = float min(rA, rB) */
365 extern void
366 spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
367
368 /** rT = float max(rA, rB) */
369 extern void
370 spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB);
371
372
373 /* Floating-point instructions
374 */
375 EMIT_RR (spe_fa, 0x2c4);
376 EMIT_RR (spe_dfa, 0x2cc);
377 EMIT_RR (spe_fs, 0x2c5);
378 EMIT_RR (spe_dfs, 0x2cd);
379 EMIT_RR (spe_fm, 0x2c6);
380 EMIT_RR (spe_dfm, 0x2ce);
381 EMIT_RRR (spe_fma, 0x00e);
382 EMIT_RR (spe_dfma, 0x35c);
383 EMIT_RRR (spe_fnms, 0x00d);
384 EMIT_RR (spe_dfnms, 0x35e);
385 EMIT_RRR (spe_fms, 0x00f);
386 EMIT_RR (spe_dfms, 0x35d);
387 EMIT_RR (spe_dfnma, 0x35f);
388 EMIT_R (spe_frest, 0x1b8);
389 EMIT_R (spe_frsqest, 0x1b9);
390 EMIT_RR (spe_fi, 0x3d4);
391 EMIT_RI8 (spe_csflt, 0x1da, 155);
392 EMIT_RI8 (spe_cflts, 0x1d8, 173);
393 EMIT_RI8 (spe_cuflt, 0x1db, 155);
394 EMIT_RI8 (spe_cfltu, 0x1d9, 173);
395 EMIT_R (spe_frds, 0x3b9);
396 EMIT_R (spe_fesd, 0x3b8);
397 EMIT_RR (spe_dfceq, 0x3c3);
398 EMIT_RR (spe_dfcmeq, 0x3cb);
399 EMIT_RR (spe_dfcgt, 0x2c3);
400 EMIT_RR (spe_dfcmgt, 0x2cb);
401 EMIT_RI7 (spe_dftsv, 0x3bf);
402 EMIT_RR (spe_fceq, 0x3c2);
403 EMIT_RR (spe_fcmeq, 0x3ca);
404 EMIT_RR (spe_fcgt, 0x2c2);
405 EMIT_RR (spe_fcmgt, 0x2ca);
406 EMIT_R (spe_fscrwr, 0x3ba);
407 EMIT_ (spe_fscrrd, 0x398);
408
409
410 /* Channel instructions
411 */
412 EMIT_R (spe_rdch, 0x00d);
413 EMIT_R (spe_rdchcnt, 0x00f);
414 EMIT_R (spe_wrch, 0x10d);
415
416
417 #ifdef UNDEF_EMIT_MACROS
418 #undef EMIT_
419 #undef EMIT_R
420 #undef EMIT_RR
421 #undef EMIT_RRR
422 #undef EMIT_RI7
423 #undef EMIT_RI8
424 #undef EMIT_RI10
425 #undef EMIT_RI16
426 #undef EMIT_RI18
427 #undef EMIT_I16
428 #undef UNDEF_EMIT_MACROS
429 #endif /* EMIT_ */