remove stray tab
[mesa.git] / src / mesa / shader / slang / slang_execute_x86.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_execute_x86.c
27 * x86 back end compiler
28 * \author Michal Krol, Keith Whitwell
29 */
30
31 #include "imports.h"
32 #include "slang_compile.h"
33 #include "slang_execute.h"
34 #include "slang_library_noise.h"
35 #include "slang_library_texsample.h"
36
37 #if defined(USE_X86_ASM) || defined(SLANG_X86)
38
39 #include "x86/rtasm/x86sse.h"
40
41 typedef struct
42 {
43 GLuint index;
44 GLubyte *csr;
45 } fixup;
46
47 typedef struct
48 {
49 struct x86_function f;
50 struct x86_reg r_eax;
51 struct x86_reg r_ecx;
52 struct x86_reg r_edx;
53 struct x86_reg r_ebx;
54 struct x86_reg r_esp;
55 struct x86_reg r_ebp;
56 struct x86_reg r_st0;
57 struct x86_reg r_st1;
58 struct x86_reg r_st2;
59 struct x86_reg r_st3;
60 struct x86_reg r_st4;
61 fixup *fixups;
62 GLuint fixup_count;
63 GLubyte **labels;
64 slang_machine *mach;
65 GLubyte *l_discard;
66 GLubyte *l_exit;
67 GLshort fpucntl;
68 } codegen_ctx;
69
70 static GLvoid
71 add_fixup(codegen_ctx * G, GLuint index, GLubyte * csr)
72 {
73 G->fixups =
74 (fixup *) slang_alloc_realloc(G->fixups, G->fixup_count * sizeof(fixup),
75 (G->fixup_count + 1) * sizeof(fixup));
76 G->fixups[G->fixup_count].index = index;
77 G->fixups[G->fixup_count].csr = csr;
78 G->fixup_count++;
79 }
80
81 #ifdef NO_FAST_MATH
82 #define RESTORE_FPU (DEFAULT_X86_FPU)
83 #define RND_NEG_FPU (DEFAULT_X86_FPU | 0x400)
84 #else
85 #define RESTORE_FPU (FAST_X86_FPU)
86 #define RND_NEG_FPU (FAST_X86_FPU | 0x400)
87 #endif
88
89 #if 0
90
91 /*
92 * XXX
93 * These should produce a valid code that computes powers.
94 * Unfortunately, it does not.
95 */
96 static void
97 set_fpu_round_neg_inf(codegen_ctx * G)
98 {
99 if (G->fpucntl != RND_NEG_FPU) {
100 G->fpucntl = RND_NEG_FPU;
101 x87_fnclex(&G->f);
102 x86_mov_reg_imm(&G->f, G->r_eax,
103 (GLint) & G->mach->x86.fpucntl_rnd_neg);
104 x87_fldcw(&G->f, x86_deref(G->r_eax));
105 }
106 }
107
108 static void
109 emit_x87_ex2(codegen_ctx * G)
110 {
111 set_fpu_round_neg_inf(G);
112
113 x87_fld(&G->f, G->r_st0); /* a a */
114 x87_fprndint(&G->f); /* int(a) a */
115 x87_fld(&G->f, G->r_st0); /* int(a) int(a) a */
116 x87_fstp(&G->f, G->r_st3); /* int(a) a int(a) */
117 x87_fsubp(&G->f, G->r_st1); /* frac(a) int(a) */
118 x87_f2xm1(&G->f); /* (2^frac(a))-1 int(a) */
119 x87_fld1(&G->f); /* 1 (2^frac(a))-1 int(a) */
120 x87_faddp(&G->f, G->r_st1); /* 2^frac(a) int(a) */
121 x87_fscale(&G->f); /* 2^a */
122 }
123
124 static void
125 emit_pow(codegen_ctx * G)
126 {
127 x87_fld(&G->f, x86_deref(G->r_esp));
128 x87_fld(&G->f, x86_make_disp(G->r_esp, 4));
129 x87_fyl2x(&G->f);
130 emit_x87_ex2(G);
131 }
132
133 #endif
134
135 static GLfloat
136 do_ceilf(GLfloat x)
137 {
138 return CEILF(x);
139 }
140
141 static GLfloat
142 do_floorf(GLfloat x)
143 {
144 return FLOORF(x);
145 }
146
147 static GLfloat
148 do_ftoi(GLfloat x)
149 {
150 return (GLfloat) ((GLint) (x));
151 }
152
153 static GLfloat
154 do_powf(GLfloat y, GLfloat x)
155 {
156 return (GLfloat) _mesa_pow((GLdouble) x, (GLdouble) y);
157 }
158
159 static GLvoid
160 ensure_infolog_created(slang_info_log ** infolog)
161 {
162 if (*infolog == NULL) {
163 *infolog = slang_alloc_malloc(sizeof(slang_info_log));
164 if (*infolog == NULL)
165 return;
166 slang_info_log_construct(*infolog);
167 }
168 }
169
170 static GLvoid
171 do_print_float(slang_info_log ** infolog, GLfloat x)
172 {
173 _mesa_printf("slang print: %f\n", x);
174 ensure_infolog_created(infolog);
175 slang_info_log_print(*infolog, "%f", x);
176 }
177
178 static GLvoid
179 do_print_int(slang_info_log ** infolog, GLfloat x)
180 {
181 _mesa_printf("slang print: %d\n", (GLint) (x));
182 ensure_infolog_created(infolog);
183 slang_info_log_print(*infolog, "%d", (GLint) (x));
184 }
185
186 static GLvoid
187 do_print_bool(slang_info_log ** infolog, GLfloat x)
188 {
189 _mesa_printf("slang print: %s\n", (GLint) (x) ? "true" : "false");
190 ensure_infolog_created(infolog);
191 slang_info_log_print(*infolog, "%s", (GLint) (x) ? "true" : "false");
192 }
193
194 #define FLOAT_ONE 0x3f800000
195 #define FLOAT_ZERO 0
196
197 static GLvoid
198 codegen_assem(codegen_ctx * G, slang_assembly * a, slang_info_log ** infolog)
199 {
200 GLint disp, i;
201
202 switch (a->type) {
203 case slang_asm_none:
204 break;
205 case slang_asm_float_copy:
206 case slang_asm_int_copy:
207 case slang_asm_bool_copy:
208 x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, a->param[0]));
209 x86_pop(&G->f, G->r_ecx);
210 x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1]), G->r_ecx);
211 break;
212 case slang_asm_float_move:
213 case slang_asm_int_move:
214 case slang_asm_bool_move:
215 x86_lea(&G->f, G->r_eax, x86_make_disp(G->r_esp, a->param[1]));
216 x86_add(&G->f, G->r_eax, x86_deref(G->r_esp));
217 x86_mov(&G->f, G->r_eax, x86_deref(G->r_eax));
218 x86_mov(&G->f, x86_make_disp(G->r_esp, a->param[0]), G->r_eax);
219 break;
220 case slang_asm_float_push:
221 case slang_asm_int_push:
222 case slang_asm_bool_push:
223 /* TODO: use push imm32 */
224 x86_mov_reg_imm(&G->f, G->r_eax, *((GLint *) & a->literal));
225 x86_push(&G->f, G->r_eax);
226 break;
227 case slang_asm_float_deref:
228 case slang_asm_int_deref:
229 case slang_asm_bool_deref:
230 case slang_asm_addr_deref:
231 x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp));
232 x86_mov(&G->f, G->r_eax, x86_deref(G->r_eax));
233 x86_mov(&G->f, x86_deref(G->r_esp), G->r_eax);
234 break;
235 case slang_asm_float_add:
236 x87_fld(&G->f, x86_make_disp(G->r_esp, 4));
237 x87_fld(&G->f, x86_deref(G->r_esp));
238 x87_faddp(&G->f, G->r_st1);
239 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
240 x87_fstp(&G->f, x86_deref(G->r_esp));
241 break;
242 case slang_asm_float_multiply:
243 x87_fld(&G->f, x86_make_disp(G->r_esp, 4));
244 x87_fld(&G->f, x86_deref(G->r_esp));
245 x87_fmulp(&G->f, G->r_st1);
246 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
247 x87_fstp(&G->f, x86_deref(G->r_esp));
248 break;
249 case slang_asm_float_divide:
250 x87_fld(&G->f, x86_make_disp(G->r_esp, 4));
251 x87_fld(&G->f, x86_deref(G->r_esp));
252 x87_fdivp(&G->f, G->r_st1);
253 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
254 x87_fstp(&G->f, x86_deref(G->r_esp));
255 break;
256 case slang_asm_float_negate:
257 x87_fld(&G->f, x86_deref(G->r_esp));
258 x87_fchs(&G->f);
259 x87_fstp(&G->f, x86_deref(G->r_esp));
260 break;
261 case slang_asm_float_less:
262 x87_fld(&G->f, x86_make_disp(G->r_esp, 4));
263 x87_fcomp(&G->f, x86_deref(G->r_esp));
264 x87_fnstsw(&G->f, G->r_eax);
265 /* TODO: use test r8,imm8 */
266 x86_mov_reg_imm(&G->f, G->r_ecx, 0x100);
267 x86_test(&G->f, G->r_eax, G->r_ecx);
268 {
269 GLubyte *lab0, *lab1;
270 /* TODO: use jcc rel8 */
271 lab0 = x86_jcc_forward(&G->f, cc_E);
272 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE);
273 /* TODO: use jmp rel8 */
274 lab1 = x86_jmp_forward(&G->f);
275 x86_fixup_fwd_jump(&G->f, lab0);
276 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO);
277 x86_fixup_fwd_jump(&G->f, lab1);
278 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
279 x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx);
280 }
281 break;
282 case slang_asm_float_equal_exp:
283 x87_fld(&G->f, x86_make_disp(G->r_esp, 4));
284 x87_fcomp(&G->f, x86_deref(G->r_esp));
285 x87_fnstsw(&G->f, G->r_eax);
286 /* TODO: use test r8,imm8 */
287 x86_mov_reg_imm(&G->f, G->r_ecx, 0x4000);
288 x86_test(&G->f, G->r_eax, G->r_ecx);
289 {
290 GLubyte *lab0, *lab1;
291 /* TODO: use jcc rel8 */
292 lab0 = x86_jcc_forward(&G->f, cc_E);
293 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE);
294 /* TODO: use jmp rel8 */
295 lab1 = x86_jmp_forward(&G->f);
296 x86_fixup_fwd_jump(&G->f, lab0);
297 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO);
298 x86_fixup_fwd_jump(&G->f, lab1);
299 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
300 x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx);
301 }
302 break;
303 case slang_asm_float_equal_int:
304 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, -4));
305 x87_fld(&G->f, x86_make_disp(G->r_esp, a->param[0] + 4));
306 x87_fcomp(&G->f, x86_make_disp(G->r_esp, a->param[1] + 4));
307 x87_fnstsw(&G->f, G->r_eax);
308 /* TODO: use test r8,imm8 */
309 x86_mov_reg_imm(&G->f, G->r_ecx, 0x4000);
310 x86_test(&G->f, G->r_eax, G->r_ecx);
311 {
312 GLubyte *lab0, *lab1;
313 /* TODO: use jcc rel8 */
314 lab0 = x86_jcc_forward(&G->f, cc_E);
315 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE);
316 /* TODO: use jmp rel8 */
317 lab1 = x86_jmp_forward(&G->f);
318 x86_fixup_fwd_jump(&G->f, lab0);
319 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO);
320 x86_fixup_fwd_jump(&G->f, lab1);
321 x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx);
322 }
323 break;
324 case slang_asm_float_to_int:
325 /* TODO: use fistp without rounding */
326 x86_call(&G->f, (GLubyte *) (do_ftoi));
327 x87_fstp(&G->f, x86_deref(G->r_esp));
328 break;
329 case slang_asm_float_sine:
330 /* TODO: use fsin */
331 x86_call(&G->f, (GLubyte *) _mesa_sinf);
332 x87_fstp(&G->f, x86_deref(G->r_esp));
333 break;
334 case slang_asm_float_arcsine:
335 /* TODO: use fpatan (?) */
336 x86_call(&G->f, (GLubyte *) _mesa_asinf);
337 x87_fstp(&G->f, x86_deref(G->r_esp));
338 break;
339 case slang_asm_float_arctan:
340 /* TODO: use fpatan */
341 x86_call(&G->f, (GLubyte *) _mesa_atanf);
342 x87_fstp(&G->f, x86_deref(G->r_esp));
343 break;
344 case slang_asm_float_power:
345 /* TODO: use emit_pow() */
346 x86_call(&G->f, (GLubyte *) do_powf);
347 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
348 x87_fstp(&G->f, x86_deref(G->r_esp));
349 break;
350 case slang_asm_float_log2:
351 x87_fld1(&G->f);
352 x87_fld(&G->f, x86_deref(G->r_esp));
353 x87_fyl2x(&G->f);
354 x87_fstp(&G->f, x86_deref(G->r_esp));
355 break;
356 case slang_asm_float_floor:
357 x86_call(&G->f, (GLubyte *) do_floorf);
358 x87_fstp(&G->f, x86_deref(G->r_esp));
359 break;
360 case slang_asm_float_ceil:
361 x86_call(&G->f, (GLubyte *) do_ceilf);
362 x87_fstp(&G->f, x86_deref(G->r_esp));
363 break;
364 case slang_asm_float_noise1:
365 x86_call(&G->f, (GLubyte *) _slang_library_noise1);
366 x87_fstp(&G->f, x86_deref(G->r_esp));
367 break;
368 case slang_asm_float_noise2:
369 x86_call(&G->f, (GLubyte *) _slang_library_noise2);
370 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
371 x87_fstp(&G->f, x86_deref(G->r_esp));
372 break;
373 case slang_asm_float_noise3:
374 x86_call(&G->f, (GLubyte *) _slang_library_noise4);
375 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 8));
376 x87_fstp(&G->f, x86_deref(G->r_esp));
377 break;
378 case slang_asm_float_noise4:
379 x86_call(&G->f, (GLubyte *) _slang_library_noise4);
380 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 12));
381 x87_fstp(&G->f, x86_deref(G->r_esp));
382 break;
383 case slang_asm_int_to_float:
384 break;
385 case slang_asm_int_to_addr:
386 x87_fld(&G->f, x86_deref(G->r_esp));
387 x87_fistp(&G->f, x86_deref(G->r_esp));
388 break;
389 case slang_asm_addr_copy:
390 x86_pop(&G->f, G->r_eax);
391 x86_mov(&G->f, G->r_ecx, x86_deref(G->r_esp));
392 x86_mov(&G->f, x86_deref(G->r_ecx), G->r_eax);
393 break;
394 case slang_asm_addr_push:
395 /* TODO: use push imm32 */
396 x86_mov_reg_imm(&G->f, G->r_eax, (GLint) a->param[0]);
397 x86_push(&G->f, G->r_eax);
398 break;
399 case slang_asm_addr_add:
400 x86_pop(&G->f, G->r_eax);
401 x86_add(&G->f, x86_deref(G->r_esp), G->r_eax);
402 break;
403 case slang_asm_addr_multiply:
404 x86_pop(&G->f, G->r_ecx);
405 x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp));
406 x86_mul(&G->f, G->r_ecx);
407 x86_mov(&G->f, x86_deref(G->r_esp), G->r_eax);
408 break;
409 case slang_asm_vec4_tex1d:
410 x86_call(&G->f, (GLubyte *) _slang_library_tex1d);
411 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 12));
412 break;
413 case slang_asm_vec4_tex2d:
414 x86_call(&G->f, (GLubyte *) _slang_library_tex2d);
415 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16));
416 break;
417 case slang_asm_vec4_tex3d:
418 x86_call(&G->f, (GLubyte *) _slang_library_tex3d);
419 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20));
420 break;
421 case slang_asm_vec4_texcube:
422 x86_call(&G->f, (GLubyte *) _slang_library_texcube);
423 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20));
424 break;
425 case slang_asm_vec4_shad1d:
426 x86_call(&G->f, (GLubyte *) _slang_library_shad1d);
427 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20));
428 break;
429 case slang_asm_vec4_shad2d:
430 x86_call(&G->f, (GLubyte *) _slang_library_shad2d);
431 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 20));
432 break;
433 case slang_asm_jump:
434 add_fixup(G, a->param[0], x86_jmp_forward(&G->f));
435 break;
436 case slang_asm_jump_if_zero:
437 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
438 x86_xor(&G->f, G->r_eax, G->r_eax);
439 x86_cmp(&G->f, G->r_eax, x86_make_disp(G->r_esp, -4));
440 {
441 GLubyte *lab0;
442 /* TODO: use jcc rel8 */
443 lab0 = x86_jcc_forward(&G->f, cc_NE);
444 add_fixup(G, a->param[0], x86_jmp_forward(&G->f));
445 x86_fixup_fwd_jump(&G->f, lab0);
446 }
447 break;
448 case slang_asm_enter:
449 /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */
450 assert(a->param[0] != 0);
451 x86_push(&G->f, G->r_ebp);
452 x86_lea(&G->f, G->r_ebp, x86_make_disp(G->r_esp, (GLint) a->param[0]));
453 break;
454 case slang_asm_leave:
455 x86_pop(&G->f, G->r_ebp);
456 break;
457 case slang_asm_local_alloc:
458 /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */
459 assert(a->param[0] != 0);
460 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, -(GLint) a->param[0]));
461 break;
462 case slang_asm_local_free:
463 /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */
464 assert(a->param[0] != 0);
465 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, (GLint) a->param[0]));
466 break;
467 case slang_asm_local_addr:
468 disp = -(GLint) (a->param[0] + a->param[1]) + 4;
469 if (disp != 0) {
470 x86_lea(&G->f, G->r_eax, x86_make_disp(G->r_ebp, disp));
471 x86_push(&G->f, G->r_eax);
472 }
473 else
474 x86_push(&G->f, G->r_ebp);
475 break;
476 case slang_asm_global_addr:
477 /* TODO: use push imm32 */
478 x86_mov_reg_imm(&G->f, G->r_eax, (GLint) & G->mach->mem + a->param[0]);
479 x86_push(&G->f, G->r_eax);
480 break;
481 case slang_asm_call:
482 add_fixup(G, a->param[0], x86_call_forward(&G->f));
483 break;
484 case slang_asm_return:
485 x86_ret(&G->f);
486 break;
487 case slang_asm_discard:
488 x86_jmp(&G->f, G->l_discard);
489 break;
490 case slang_asm_exit:
491 x86_jmp(&G->f, G->l_exit);
492 break;
493 /* GL_MESA_shader_debug */
494 case slang_asm_float_print:
495 /* TODO: use push imm32 */
496 x86_mov_reg_imm(&G->f, G->r_eax, (GLint) (infolog));
497 x86_push(&G->f, G->r_eax);
498 x86_call(&G->f, (GLubyte *) (do_print_float));
499 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
500 break;
501 case slang_asm_int_print:
502 /* TODO: use push imm32 */
503 x86_mov_reg_imm(&G->f, G->r_eax, (GLint) (infolog));
504 x86_push(&G->f, G->r_eax);
505 x86_call(&G->f, (GLubyte *) do_print_int);
506 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
507 break;
508 case slang_asm_bool_print:
509 /* TODO: use push imm32 */
510 x86_mov_reg_imm(&G->f, G->r_eax, (GLint) (infolog));
511 x86_push(&G->f, G->r_eax);
512 x86_call(&G->f, (GLubyte *) do_print_bool);
513 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
514 break;
515 /* vec4 */
516 case slang_asm_float_to_vec4:
517 /* [vec4] | float > [vec4] */
518 x87_fld(&G->f, x86_deref(G->r_esp));
519 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 4));
520 x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp));
521 x87_fst(&G->f, x86_make_disp(G->r_eax, 12));
522 x87_fst(&G->f, x86_make_disp(G->r_eax, 8));
523 x87_fst(&G->f, x86_make_disp(G->r_eax, 4));
524 x87_fstp(&G->f, x86_deref(G->r_eax));
525 break;
526 case slang_asm_vec4_add:
527 /* [vec4] | vec4 > [vec4] */
528 x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16));
529 for (i = 0; i < 4; i++)
530 x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4));
531 for (i = 0; i < 4; i++)
532 x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4));
533 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16));
534 for (i = 0; i < 4; i++)
535 x87_faddp(&G->f, G->r_st4);
536 for (i = 0; i < 4; i++)
537 x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4));
538 break;
539 case slang_asm_vec4_subtract:
540 /* [vec4] | vec4 > [vec4] */
541 x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16));
542 for (i = 0; i < 4; i++)
543 x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4));
544 for (i = 0; i < 4; i++)
545 x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4));
546 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16));
547 for (i = 0; i < 4; i++)
548 x87_fsubp(&G->f, G->r_st4);
549 for (i = 0; i < 4; i++)
550 x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4));
551 break;
552 case slang_asm_vec4_multiply:
553 /* [vec4] | vec4 > [vec4] */
554 x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16));
555 for (i = 0; i < 4; i++)
556 x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4));
557 for (i = 0; i < 4; i++)
558 x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4));
559 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16));
560 for (i = 0; i < 4; i++)
561 x87_fmulp(&G->f, G->r_st4);
562 for (i = 0; i < 4; i++)
563 x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4));
564 break;
565 case slang_asm_vec4_divide:
566 /* [vec4] | vec4 > [vec4] */
567 x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, 16));
568 for (i = 0; i < 4; i++)
569 x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4));
570 for (i = 0; i < 4; i++)
571 x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4));
572 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16));
573 for (i = 0; i < 4; i++)
574 x87_fdivp(&G->f, G->r_st4);
575 for (i = 0; i < 4; i++)
576 x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4));
577 break;
578 case slang_asm_vec4_negate:
579 /* [vec4] > [vec4] */
580 x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp));
581 for (i = 0; i < 4; i++)
582 x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4));
583 for (i = 0; i < 4; i++) {
584 x87_fchs(&G->f);
585 x87_fstp(&G->f, x86_make_disp(G->r_eax, 12 - i * 4));
586 }
587 break;
588 case slang_asm_vec4_dot:
589 /* [vec4] | vec4 > [float] */
590 for (i = 0; i < 4; i++)
591 x87_fld(&G->f, x86_make_disp(G->r_esp, i * 4));
592 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, 16));
593 x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp));
594 for (i = 0; i < 4; i++)
595 x87_fld(&G->f, x86_make_disp(G->r_eax, i * 4));
596 for (i = 0; i < 4; i++)
597 x87_fmulp(&G->f, G->r_st4);
598 for (i = 0; i < 3; i++)
599 x87_faddp(&G->f, G->r_st1);
600 x87_fstp(&G->f, x86_deref(G->r_eax));
601 break;
602 case slang_asm_vec4_copy:
603 /* [vec4] | vec4 > [vec4] */
604 x86_mov(&G->f, G->r_eax, x86_make_disp(G->r_esp, a->param[0]));
605 x86_pop(&G->f, G->r_ecx);
606 x86_pop(&G->f, G->r_edx);
607 x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1]), G->r_ecx);
608 x86_pop(&G->f, G->r_ebx);
609 x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1] + 4), G->r_edx);
610 x86_pop(&G->f, G->r_ecx);
611 x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1] + 8), G->r_ebx);
612 x86_mov(&G->f, x86_make_disp(G->r_eax, a->param[1] + 12), G->r_ecx);
613 break;
614 case slang_asm_vec4_deref:
615 /* [vec4] > vec4 */
616 x86_mov(&G->f, G->r_eax, x86_deref(G->r_esp));
617 x86_mov(&G->f, G->r_ecx, x86_make_disp(G->r_eax, 12));
618 x86_mov(&G->f, G->r_edx, x86_make_disp(G->r_eax, 8));
619 x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx);
620 x86_mov(&G->f, G->r_ebx, x86_make_disp(G->r_eax, 4));
621 x86_push(&G->f, G->r_edx);
622 x86_mov(&G->f, G->r_ecx, x86_deref(G->r_eax));
623 x86_push(&G->f, G->r_ebx);
624 x86_push(&G->f, G->r_ecx);
625 break;
626 case slang_asm_vec4_equal_int:
627 x86_lea(&G->f, G->r_esp, x86_make_disp(G->r_esp, -4));
628 x86_mov_reg_imm(&G->f, G->r_edx, 0x4000);
629 for (i = 0; i < 4; i++) {
630 x87_fld(&G->f, x86_make_disp(G->r_esp, a->param[0] + 4 + i * 4));
631 x87_fcomp(&G->f, x86_make_disp(G->r_esp, a->param[1] + 4 + i * 4));
632 x87_fnstsw(&G->f, G->r_eax);
633 x86_and(&G->f, G->r_edx, G->r_eax);
634 }
635 /* TODO: use test r8,imm8 */
636 x86_mov_reg_imm(&G->f, G->r_ecx, 0x4000);
637 x86_test(&G->f, G->r_edx, G->r_ecx);
638 {
639 GLubyte *lab0, *lab1;
640
641 /* TODO: use jcc rel8 */
642 lab0 = x86_jcc_forward(&G->f, cc_E);
643 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ONE);
644 /* TODO: use jmp rel8 */
645 lab1 = x86_jmp_forward(&G->f);
646 x86_fixup_fwd_jump(&G->f, lab0);
647 x86_mov_reg_imm(&G->f, G->r_ecx, FLOAT_ZERO);
648 x86_fixup_fwd_jump(&G->f, lab1);
649 x86_mov(&G->f, x86_deref(G->r_esp), G->r_ecx);
650 }
651 break;
652 default:
653 _mesa_problem(NULL, "Unexpected switch case in codegen_assem");
654 }
655 }
656
657 GLboolean
658 _slang_x86_codegen(slang_machine * mach, slang_assembly_file * file,
659 GLuint start)
660 {
661 codegen_ctx G;
662 GLubyte *j_body, *j_exit;
663 GLuint i;
664
665 /* Free the old code - if any.
666 */
667 if (mach->x86.compiled_func != NULL) {
668 _mesa_exec_free(mach->x86.compiled_func);
669 mach->x86.compiled_func = NULL;
670 }
671
672 /*
673 * We need as much as 1M because *all* assembly, including built-in library, is
674 * being translated to x86.
675 * The built-in library occupies 450K, so we can be safe for now.
676 * It is going to change in the future, when we get assembly analysis running.
677 */
678 x86_init_func_size(&G.f, 1048576);
679 G.r_eax = x86_make_reg(file_REG32, reg_AX);
680 G.r_ecx = x86_make_reg(file_REG32, reg_CX);
681 G.r_edx = x86_make_reg(file_REG32, reg_DX);
682 G.r_ebx = x86_make_reg(file_REG32, reg_BX);
683 G.r_esp = x86_make_reg(file_REG32, reg_SP);
684 G.r_ebp = x86_make_reg(file_REG32, reg_BP);
685 G.r_st0 = x86_make_reg(file_x87, 0);
686 G.r_st1 = x86_make_reg(file_x87, 1);
687 G.r_st2 = x86_make_reg(file_x87, 2);
688 G.r_st3 = x86_make_reg(file_x87, 3);
689 G.r_st4 = x86_make_reg(file_x87, 4);
690 G.fixups = NULL;
691 G.fixup_count = 0;
692 G.labels =
693 (GLubyte **) slang_alloc_malloc(file->count * sizeof(GLubyte *));
694 G.mach = mach;
695 G.fpucntl = RESTORE_FPU;
696
697 mach->x86.fpucntl_rnd_neg = RND_NEG_FPU;
698 mach->x86.fpucntl_restore = RESTORE_FPU;
699
700 /* prepare stack and jump to start */
701 x86_push(&G.f, G.r_ebp);
702 x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & mach->x86.esp_restore);
703 x86_push(&G.f, G.r_esp);
704 x86_pop(&G.f, G.r_ecx);
705 x86_mov(&G.f, x86_deref(G.r_eax), G.r_ecx);
706 j_body = x86_jmp_forward(&G.f);
707
708 /* "discard" instructions jump to this label */
709 G.l_discard = x86_get_label(&G.f);
710 x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & G.mach->kill);
711 x86_mov_reg_imm(&G.f, G.r_ecx, 1);
712 x86_mov(&G.f, x86_deref(G.r_eax), G.r_ecx);
713 G.l_exit = x86_get_label(&G.f);
714 j_exit = x86_jmp_forward(&G.f);
715
716 for (i = 0; i < file->count; i++) {
717 G.labels[i] = x86_get_label(&G.f);
718 if (i == start)
719 x86_fixup_fwd_jump(&G.f, j_body);
720 codegen_assem(&G, &file->code[i], &mach->infolog);
721 }
722
723 /*
724 * Restore stack and return.
725 * This must be handled this way, because "discard" can be invoked from any
726 * place in the code.
727 */
728 x86_fixup_fwd_jump(&G.f, j_exit);
729 x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & mach->x86.esp_restore);
730 x86_mov(&G.f, G.r_esp, x86_deref(G.r_eax));
731 x86_pop(&G.f, G.r_ebp);
732 if (G.fpucntl != RESTORE_FPU) {
733 x87_fnclex(&G.f);
734 x86_mov_reg_imm(&G.f, G.r_eax, (GLint) & G.mach->x86.fpucntl_restore);
735 x87_fldcw(&G.f, x86_deref(G.r_eax));
736 }
737 x86_ret(&G.f);
738
739 /* fixup forward labels */
740 for (i = 0; i < G.fixup_count; i++) {
741 G.f.csr = G.labels[G.fixups[i].index];
742 x86_fixup_fwd_jump(&G.f, G.fixups[i].csr);
743 }
744
745 slang_alloc_free(G.fixups);
746 slang_alloc_free(G.labels);
747
748 /* install new code */
749 mach->x86.compiled_func = (GLvoid(*)(slang_machine *)) x86_get_func(&G.f);
750
751 return GL_TRUE;
752 }
753
754 #endif