2 * Mesa 3-D graphics library
5 * Copyright (C) 2006 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * \file slang_execute_x86.c
27 * x86 back end compiler
28 * \author Michal Krol, Keith Whitwell
32 #include "slang_compile.h"
33 #include "slang_execute.h"
34 #include "slang_library_noise.h"
35 #include "slang_library_texsample.h"
37 #if defined(USE_X86_ASM) || defined(SLANG_X86)
39 #include "x86/rtasm/x86sse.h"
49 struct x86_function f
;
71 add_fixup(codegen_ctx
* G
, GLuint index
, GLubyte
* csr
)
74 (fixup
*) slang_alloc_realloc(G
->fixups
, G
->fixup_count
* sizeof(fixup
),
75 (G
->fixup_count
+ 1) * sizeof(fixup
));
76 G
->fixups
[G
->fixup_count
].index
= index
;
77 G
->fixups
[G
->fixup_count
].csr
= csr
;
82 #define RESTORE_FPU (DEFAULT_X86_FPU)
83 #define RND_NEG_FPU (DEFAULT_X86_FPU | 0x400)
85 #define RESTORE_FPU (FAST_X86_FPU)
86 #define RND_NEG_FPU (FAST_X86_FPU | 0x400)
93 * These should produce a valid code that computes powers.
94 * Unfortunately, it does not.
97 set_fpu_round_neg_inf(codegen_ctx
* G
)
99 if (G
->fpucntl
!= RND_NEG_FPU
) {
100 G
->fpucntl
= RND_NEG_FPU
;
102 x86_mov_reg_imm(&G
->f
, G
->r_eax
,
103 (GLint
) & G
->mach
->x86
.fpucntl_rnd_neg
);
104 x87_fldcw(&G
->f
, x86_deref(G
->r_eax
));
109 emit_x87_ex2(codegen_ctx
* G
)
111 set_fpu_round_neg_inf(G
);
113 x87_fld(&G
->f
, G
->r_st0
); /* a a */
114 x87_fprndint(&G
->f
); /* int(a) a */
115 x87_fld(&G
->f
, G
->r_st0
); /* int(a) int(a) a */
116 x87_fstp(&G
->f
, G
->r_st3
); /* int(a) a int(a) */
117 x87_fsubp(&G
->f
, G
->r_st1
); /* frac(a) int(a) */
118 x87_f2xm1(&G
->f
); /* (2^frac(a))-1 int(a) */
119 x87_fld1(&G
->f
); /* 1 (2^frac(a))-1 int(a) */
120 x87_faddp(&G
->f
, G
->r_st1
); /* 2^frac(a) int(a) */
121 x87_fscale(&G
->f
); /* 2^a */
125 emit_pow(codegen_ctx
* G
)
127 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
128 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, 4));
150 return (GLfloat
) ((GLint
) (x
));
154 do_powf(GLfloat y
, GLfloat x
)
156 return (GLfloat
) _mesa_pow((GLdouble
) x
, (GLdouble
) y
);
160 ensure_infolog_created(slang_info_log
** infolog
)
162 if (*infolog
== NULL
) {
163 *infolog
= slang_alloc_malloc(sizeof(slang_info_log
));
164 if (*infolog
== NULL
)
166 slang_info_log_construct(*infolog
);
171 do_print_float(slang_info_log
** infolog
, GLfloat x
)
173 _mesa_printf("slang print: %f\n", x
);
174 ensure_infolog_created(infolog
);
175 slang_info_log_print(*infolog
, "%f", x
);
179 do_print_int(slang_info_log
** infolog
, GLfloat x
)
181 _mesa_printf("slang print: %d\n", (GLint
) (x
));
182 ensure_infolog_created(infolog
);
183 slang_info_log_print(*infolog
, "%d", (GLint
) (x
));
187 do_print_bool(slang_info_log
** infolog
, GLfloat x
)
189 _mesa_printf("slang print: %s\n", (GLint
) (x
) ? "true" : "false");
190 ensure_infolog_created(infolog
);
191 slang_info_log_print(*infolog
, "%s", (GLint
) (x
) ? "true" : "false");
194 #define FLOAT_ONE 0x3f800000
198 codegen_assem(codegen_ctx
* G
, slang_assembly
* a
, slang_info_log
** infolog
)
205 case slang_asm_float_copy
:
206 case slang_asm_int_copy
:
207 case slang_asm_bool_copy
:
208 x86_mov(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, a
->param
[0]));
209 x86_pop(&G
->f
, G
->r_ecx
);
210 x86_mov(&G
->f
, x86_make_disp(G
->r_eax
, a
->param
[1]), G
->r_ecx
);
212 case slang_asm_float_move
:
213 case slang_asm_int_move
:
214 case slang_asm_bool_move
:
215 x86_lea(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, a
->param
[1]));
216 x86_add(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
217 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_eax
));
218 x86_mov(&G
->f
, x86_make_disp(G
->r_esp
, a
->param
[0]), G
->r_eax
);
220 case slang_asm_float_push
:
221 case slang_asm_int_push
:
222 case slang_asm_bool_push
:
223 /* TODO: use push imm32 */
224 x86_mov_reg_imm(&G
->f
, G
->r_eax
, *((GLint
*) & a
->literal
));
225 x86_push(&G
->f
, G
->r_eax
);
227 case slang_asm_float_deref
:
228 case slang_asm_int_deref
:
229 case slang_asm_bool_deref
:
230 case slang_asm_addr_deref
:
231 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
232 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_eax
));
233 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_eax
);
235 case slang_asm_float_add
:
236 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, 4));
237 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
238 x87_faddp(&G
->f
, G
->r_st1
);
239 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
240 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
242 case slang_asm_float_multiply
:
243 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, 4));
244 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
245 x87_fmulp(&G
->f
, G
->r_st1
);
246 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
247 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
249 case slang_asm_float_divide
:
250 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, 4));
251 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
252 x87_fdivp(&G
->f
, G
->r_st1
);
253 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
254 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
256 case slang_asm_float_negate
:
257 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
259 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
261 case slang_asm_float_less
:
262 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, 4));
263 x87_fcomp(&G
->f
, x86_deref(G
->r_esp
));
264 x87_fnstsw(&G
->f
, G
->r_eax
);
265 /* TODO: use test r8,imm8 */
266 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, 0x100);
267 x86_test(&G
->f
, G
->r_eax
, G
->r_ecx
);
269 GLubyte
*lab0
, *lab1
;
270 /* TODO: use jcc rel8 */
271 lab0
= x86_jcc_forward(&G
->f
, cc_E
);
272 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ONE
);
273 /* TODO: use jmp rel8 */
274 lab1
= x86_jmp_forward(&G
->f
);
275 x86_fixup_fwd_jump(&G
->f
, lab0
);
276 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ZERO
);
277 x86_fixup_fwd_jump(&G
->f
, lab1
);
278 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
279 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_ecx
);
282 case slang_asm_float_equal_exp
:
283 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, 4));
284 x87_fcomp(&G
->f
, x86_deref(G
->r_esp
));
285 x87_fnstsw(&G
->f
, G
->r_eax
);
286 /* TODO: use test r8,imm8 */
287 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, 0x4000);
288 x86_test(&G
->f
, G
->r_eax
, G
->r_ecx
);
290 GLubyte
*lab0
, *lab1
;
291 /* TODO: use jcc rel8 */
292 lab0
= x86_jcc_forward(&G
->f
, cc_E
);
293 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ONE
);
294 /* TODO: use jmp rel8 */
295 lab1
= x86_jmp_forward(&G
->f
);
296 x86_fixup_fwd_jump(&G
->f
, lab0
);
297 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ZERO
);
298 x86_fixup_fwd_jump(&G
->f
, lab1
);
299 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
300 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_ecx
);
303 case slang_asm_float_equal_int
:
304 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, -4));
305 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, a
->param
[0] + 4));
306 x87_fcomp(&G
->f
, x86_make_disp(G
->r_esp
, a
->param
[1] + 4));
307 x87_fnstsw(&G
->f
, G
->r_eax
);
308 /* TODO: use test r8,imm8 */
309 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, 0x4000);
310 x86_test(&G
->f
, G
->r_eax
, G
->r_ecx
);
312 GLubyte
*lab0
, *lab1
;
313 /* TODO: use jcc rel8 */
314 lab0
= x86_jcc_forward(&G
->f
, cc_E
);
315 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ONE
);
316 /* TODO: use jmp rel8 */
317 lab1
= x86_jmp_forward(&G
->f
);
318 x86_fixup_fwd_jump(&G
->f
, lab0
);
319 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ZERO
);
320 x86_fixup_fwd_jump(&G
->f
, lab1
);
321 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_ecx
);
324 case slang_asm_float_to_int
:
325 /* TODO: use fistp without rounding */
326 x86_call(&G
->f
, (GLubyte
*) (do_ftoi
));
327 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
329 case slang_asm_float_sine
:
331 x86_call(&G
->f
, (GLubyte
*) _mesa_sinf
);
332 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
334 case slang_asm_float_arcsine
:
335 /* TODO: use fpatan (?) */
336 x86_call(&G
->f
, (GLubyte
*) _mesa_asinf
);
337 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
339 case slang_asm_float_arctan
:
340 /* TODO: use fpatan */
341 x86_call(&G
->f
, (GLubyte
*) _mesa_atanf
);
342 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
344 case slang_asm_float_power
:
345 /* TODO: use emit_pow() */
346 x86_call(&G
->f
, (GLubyte
*) do_powf
);
347 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
348 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
350 case slang_asm_float_log2
:
352 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
354 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
356 case slang_asm_float_floor
:
357 x86_call(&G
->f
, (GLubyte
*) do_floorf
);
358 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
360 case slang_asm_float_ceil
:
361 x86_call(&G
->f
, (GLubyte
*) do_ceilf
);
362 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
364 case slang_asm_float_noise1
:
365 x86_call(&G
->f
, (GLubyte
*) _slang_library_noise1
);
366 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
368 case slang_asm_float_noise2
:
369 x86_call(&G
->f
, (GLubyte
*) _slang_library_noise2
);
370 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
371 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
373 case slang_asm_float_noise3
:
374 x86_call(&G
->f
, (GLubyte
*) _slang_library_noise4
);
375 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 8));
376 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
378 case slang_asm_float_noise4
:
379 x86_call(&G
->f
, (GLubyte
*) _slang_library_noise4
);
380 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 12));
381 x87_fstp(&G
->f
, x86_deref(G
->r_esp
));
383 case slang_asm_int_to_float
:
385 case slang_asm_int_to_addr
:
386 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
387 x87_fistp(&G
->f
, x86_deref(G
->r_esp
));
389 case slang_asm_addr_copy
:
390 x86_pop(&G
->f
, G
->r_eax
);
391 x86_mov(&G
->f
, G
->r_ecx
, x86_deref(G
->r_esp
));
392 x86_mov(&G
->f
, x86_deref(G
->r_ecx
), G
->r_eax
);
394 case slang_asm_addr_push
:
395 /* TODO: use push imm32 */
396 x86_mov_reg_imm(&G
->f
, G
->r_eax
, (GLint
) a
->param
[0]);
397 x86_push(&G
->f
, G
->r_eax
);
399 case slang_asm_addr_add
:
400 x86_pop(&G
->f
, G
->r_eax
);
401 x86_add(&G
->f
, x86_deref(G
->r_esp
), G
->r_eax
);
403 case slang_asm_addr_multiply
:
404 x86_pop(&G
->f
, G
->r_ecx
);
405 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
406 x86_mul(&G
->f
, G
->r_ecx
);
407 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_eax
);
409 case slang_asm_vec4_tex1d
:
410 x86_call(&G
->f
, (GLubyte
*) _slang_library_tex1d
);
411 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 12));
413 case slang_asm_vec4_tex2d
:
414 x86_call(&G
->f
, (GLubyte
*) _slang_library_tex2d
);
415 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 16));
417 case slang_asm_vec4_tex3d
:
418 x86_call(&G
->f
, (GLubyte
*) _slang_library_tex3d
);
419 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 20));
421 case slang_asm_vec4_texcube
:
422 x86_call(&G
->f
, (GLubyte
*) _slang_library_texcube
);
423 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 20));
425 case slang_asm_vec4_shad1d
:
426 x86_call(&G
->f
, (GLubyte
*) _slang_library_shad1d
);
427 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 20));
429 case slang_asm_vec4_shad2d
:
430 x86_call(&G
->f
, (GLubyte
*) _slang_library_shad2d
);
431 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 20));
434 add_fixup(G
, a
->param
[0], x86_jmp_forward(&G
->f
));
436 case slang_asm_jump_if_zero
:
437 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
438 x86_xor(&G
->f
, G
->r_eax
, G
->r_eax
);
439 x86_cmp(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, -4));
442 /* TODO: use jcc rel8 */
443 lab0
= x86_jcc_forward(&G
->f
, cc_NE
);
444 add_fixup(G
, a
->param
[0], x86_jmp_forward(&G
->f
));
445 x86_fixup_fwd_jump(&G
->f
, lab0
);
448 case slang_asm_enter
:
449 /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */
450 assert(a
->param
[0] != 0);
451 x86_push(&G
->f
, G
->r_ebp
);
452 x86_lea(&G
->f
, G
->r_ebp
, x86_make_disp(G
->r_esp
, (GLint
) a
->param
[0]));
454 case slang_asm_leave
:
455 x86_pop(&G
->f
, G
->r_ebp
);
457 case slang_asm_local_alloc
:
458 /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */
459 assert(a
->param
[0] != 0);
460 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, -(GLint
) a
->param
[0]));
462 case slang_asm_local_free
:
463 /* FIXME: x86_make_disp(esp, 0) + x86_lea() generates bogus code */
464 assert(a
->param
[0] != 0);
465 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, (GLint
) a
->param
[0]));
467 case slang_asm_local_addr
:
468 disp
= -(GLint
) (a
->param
[0] + a
->param
[1]) + 4;
470 x86_lea(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_ebp
, disp
));
471 x86_push(&G
->f
, G
->r_eax
);
474 x86_push(&G
->f
, G
->r_ebp
);
476 case slang_asm_global_addr
:
477 /* TODO: use push imm32 */
478 x86_mov_reg_imm(&G
->f
, G
->r_eax
, (GLint
) & G
->mach
->mem
+ a
->param
[0]);
479 x86_push(&G
->f
, G
->r_eax
);
482 add_fixup(G
, a
->param
[0], x86_call_forward(&G
->f
));
484 case slang_asm_return
:
487 case slang_asm_discard
:
488 x86_jmp(&G
->f
, G
->l_discard
);
491 x86_jmp(&G
->f
, G
->l_exit
);
493 /* GL_MESA_shader_debug */
494 case slang_asm_float_print
:
495 /* TODO: use push imm32 */
496 x86_mov_reg_imm(&G
->f
, G
->r_eax
, (GLint
) (infolog
));
497 x86_push(&G
->f
, G
->r_eax
);
498 x86_call(&G
->f
, (GLubyte
*) (do_print_float
));
499 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
501 case slang_asm_int_print
:
502 /* TODO: use push imm32 */
503 x86_mov_reg_imm(&G
->f
, G
->r_eax
, (GLint
) (infolog
));
504 x86_push(&G
->f
, G
->r_eax
);
505 x86_call(&G
->f
, (GLubyte
*) do_print_int
);
506 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
508 case slang_asm_bool_print
:
509 /* TODO: use push imm32 */
510 x86_mov_reg_imm(&G
->f
, G
->r_eax
, (GLint
) (infolog
));
511 x86_push(&G
->f
, G
->r_eax
);
512 x86_call(&G
->f
, (GLubyte
*) do_print_bool
);
513 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
516 case slang_asm_float_to_vec4
:
517 /* [vec4] | float > [vec4] */
518 x87_fld(&G
->f
, x86_deref(G
->r_esp
));
519 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 4));
520 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
521 x87_fst(&G
->f
, x86_make_disp(G
->r_eax
, 12));
522 x87_fst(&G
->f
, x86_make_disp(G
->r_eax
, 8));
523 x87_fst(&G
->f
, x86_make_disp(G
->r_eax
, 4));
524 x87_fstp(&G
->f
, x86_deref(G
->r_eax
));
526 case slang_asm_vec4_add
:
527 /* [vec4] | vec4 > [vec4] */
528 x86_mov(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, 16));
529 for (i
= 0; i
< 4; i
++)
530 x87_fld(&G
->f
, x86_make_disp(G
->r_eax
, i
* 4));
531 for (i
= 0; i
< 4; i
++)
532 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, i
* 4));
533 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 16));
534 for (i
= 0; i
< 4; i
++)
535 x87_faddp(&G
->f
, G
->r_st4
);
536 for (i
= 0; i
< 4; i
++)
537 x87_fstp(&G
->f
, x86_make_disp(G
->r_eax
, 12 - i
* 4));
539 case slang_asm_vec4_subtract
:
540 /* [vec4] | vec4 > [vec4] */
541 x86_mov(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, 16));
542 for (i
= 0; i
< 4; i
++)
543 x87_fld(&G
->f
, x86_make_disp(G
->r_eax
, i
* 4));
544 for (i
= 0; i
< 4; i
++)
545 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, i
* 4));
546 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 16));
547 for (i
= 0; i
< 4; i
++)
548 x87_fsubp(&G
->f
, G
->r_st4
);
549 for (i
= 0; i
< 4; i
++)
550 x87_fstp(&G
->f
, x86_make_disp(G
->r_eax
, 12 - i
* 4));
552 case slang_asm_vec4_multiply
:
553 /* [vec4] | vec4 > [vec4] */
554 x86_mov(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, 16));
555 for (i
= 0; i
< 4; i
++)
556 x87_fld(&G
->f
, x86_make_disp(G
->r_eax
, i
* 4));
557 for (i
= 0; i
< 4; i
++)
558 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, i
* 4));
559 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 16));
560 for (i
= 0; i
< 4; i
++)
561 x87_fmulp(&G
->f
, G
->r_st4
);
562 for (i
= 0; i
< 4; i
++)
563 x87_fstp(&G
->f
, x86_make_disp(G
->r_eax
, 12 - i
* 4));
565 case slang_asm_vec4_divide
:
566 /* [vec4] | vec4 > [vec4] */
567 x86_mov(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, 16));
568 for (i
= 0; i
< 4; i
++)
569 x87_fld(&G
->f
, x86_make_disp(G
->r_eax
, i
* 4));
570 for (i
= 0; i
< 4; i
++)
571 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, i
* 4));
572 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 16));
573 for (i
= 0; i
< 4; i
++)
574 x87_fdivp(&G
->f
, G
->r_st4
);
575 for (i
= 0; i
< 4; i
++)
576 x87_fstp(&G
->f
, x86_make_disp(G
->r_eax
, 12 - i
* 4));
578 case slang_asm_vec4_negate
:
579 /* [vec4] > [vec4] */
580 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
581 for (i
= 0; i
< 4; i
++)
582 x87_fld(&G
->f
, x86_make_disp(G
->r_eax
, i
* 4));
583 for (i
= 0; i
< 4; i
++) {
585 x87_fstp(&G
->f
, x86_make_disp(G
->r_eax
, 12 - i
* 4));
588 case slang_asm_vec4_dot
:
589 /* [vec4] | vec4 > [float] */
590 for (i
= 0; i
< 4; i
++)
591 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, i
* 4));
592 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, 16));
593 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
594 for (i
= 0; i
< 4; i
++)
595 x87_fld(&G
->f
, x86_make_disp(G
->r_eax
, i
* 4));
596 for (i
= 0; i
< 4; i
++)
597 x87_fmulp(&G
->f
, G
->r_st4
);
598 for (i
= 0; i
< 3; i
++)
599 x87_faddp(&G
->f
, G
->r_st1
);
600 x87_fstp(&G
->f
, x86_deref(G
->r_eax
));
602 case slang_asm_vec4_copy
:
603 /* [vec4] | vec4 > [vec4] */
604 x86_mov(&G
->f
, G
->r_eax
, x86_make_disp(G
->r_esp
, a
->param
[0]));
605 x86_pop(&G
->f
, G
->r_ecx
);
606 x86_pop(&G
->f
, G
->r_edx
);
607 x86_mov(&G
->f
, x86_make_disp(G
->r_eax
, a
->param
[1]), G
->r_ecx
);
608 x86_pop(&G
->f
, G
->r_ebx
);
609 x86_mov(&G
->f
, x86_make_disp(G
->r_eax
, a
->param
[1] + 4), G
->r_edx
);
610 x86_pop(&G
->f
, G
->r_ecx
);
611 x86_mov(&G
->f
, x86_make_disp(G
->r_eax
, a
->param
[1] + 8), G
->r_ebx
);
612 x86_mov(&G
->f
, x86_make_disp(G
->r_eax
, a
->param
[1] + 12), G
->r_ecx
);
614 case slang_asm_vec4_deref
:
616 x86_mov(&G
->f
, G
->r_eax
, x86_deref(G
->r_esp
));
617 x86_mov(&G
->f
, G
->r_ecx
, x86_make_disp(G
->r_eax
, 12));
618 x86_mov(&G
->f
, G
->r_edx
, x86_make_disp(G
->r_eax
, 8));
619 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_ecx
);
620 x86_mov(&G
->f
, G
->r_ebx
, x86_make_disp(G
->r_eax
, 4));
621 x86_push(&G
->f
, G
->r_edx
);
622 x86_mov(&G
->f
, G
->r_ecx
, x86_deref(G
->r_eax
));
623 x86_push(&G
->f
, G
->r_ebx
);
624 x86_push(&G
->f
, G
->r_ecx
);
626 case slang_asm_vec4_equal_int
:
627 x86_lea(&G
->f
, G
->r_esp
, x86_make_disp(G
->r_esp
, -4));
628 x86_mov_reg_imm(&G
->f
, G
->r_edx
, 0x4000);
629 for (i
= 0; i
< 4; i
++) {
630 x87_fld(&G
->f
, x86_make_disp(G
->r_esp
, a
->param
[0] + 4 + i
* 4));
631 x87_fcomp(&G
->f
, x86_make_disp(G
->r_esp
, a
->param
[1] + 4 + i
* 4));
632 x87_fnstsw(&G
->f
, G
->r_eax
);
633 x86_and(&G
->f
, G
->r_edx
, G
->r_eax
);
635 /* TODO: use test r8,imm8 */
636 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, 0x4000);
637 x86_test(&G
->f
, G
->r_edx
, G
->r_ecx
);
639 GLubyte
*lab0
, *lab1
;
641 /* TODO: use jcc rel8 */
642 lab0
= x86_jcc_forward(&G
->f
, cc_E
);
643 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ONE
);
644 /* TODO: use jmp rel8 */
645 lab1
= x86_jmp_forward(&G
->f
);
646 x86_fixup_fwd_jump(&G
->f
, lab0
);
647 x86_mov_reg_imm(&G
->f
, G
->r_ecx
, FLOAT_ZERO
);
648 x86_fixup_fwd_jump(&G
->f
, lab1
);
649 x86_mov(&G
->f
, x86_deref(G
->r_esp
), G
->r_ecx
);
653 _mesa_problem(NULL
, "Unexpected switch case in codegen_assem");
658 _slang_x86_codegen(slang_machine
* mach
, slang_assembly_file
* file
,
662 GLubyte
*j_body
, *j_exit
;
665 /* Free the old code - if any.
667 if (mach
->x86
.compiled_func
!= NULL
) {
668 _mesa_exec_free(mach
->x86
.compiled_func
);
669 mach
->x86
.compiled_func
= NULL
;
673 * We need as much as 1M because *all* assembly, including built-in library, is
674 * being translated to x86.
675 * The built-in library occupies 450K, so we can be safe for now.
676 * It is going to change in the future, when we get assembly analysis running.
678 x86_init_func_size(&G
.f
, 1048576);
679 G
.r_eax
= x86_make_reg(file_REG32
, reg_AX
);
680 G
.r_ecx
= x86_make_reg(file_REG32
, reg_CX
);
681 G
.r_edx
= x86_make_reg(file_REG32
, reg_DX
);
682 G
.r_ebx
= x86_make_reg(file_REG32
, reg_BX
);
683 G
.r_esp
= x86_make_reg(file_REG32
, reg_SP
);
684 G
.r_ebp
= x86_make_reg(file_REG32
, reg_BP
);
685 G
.r_st0
= x86_make_reg(file_x87
, 0);
686 G
.r_st1
= x86_make_reg(file_x87
, 1);
687 G
.r_st2
= x86_make_reg(file_x87
, 2);
688 G
.r_st3
= x86_make_reg(file_x87
, 3);
689 G
.r_st4
= x86_make_reg(file_x87
, 4);
693 (GLubyte
**) slang_alloc_malloc(file
->count
* sizeof(GLubyte
*));
695 G
.fpucntl
= RESTORE_FPU
;
697 mach
->x86
.fpucntl_rnd_neg
= RND_NEG_FPU
;
698 mach
->x86
.fpucntl_restore
= RESTORE_FPU
;
700 /* prepare stack and jump to start */
701 x86_push(&G
.f
, G
.r_ebp
);
702 x86_mov_reg_imm(&G
.f
, G
.r_eax
, (GLint
) & mach
->x86
.esp_restore
);
703 x86_push(&G
.f
, G
.r_esp
);
704 x86_pop(&G
.f
, G
.r_ecx
);
705 x86_mov(&G
.f
, x86_deref(G
.r_eax
), G
.r_ecx
);
706 j_body
= x86_jmp_forward(&G
.f
);
708 /* "discard" instructions jump to this label */
709 G
.l_discard
= x86_get_label(&G
.f
);
710 x86_mov_reg_imm(&G
.f
, G
.r_eax
, (GLint
) & G
.mach
->kill
);
711 x86_mov_reg_imm(&G
.f
, G
.r_ecx
, 1);
712 x86_mov(&G
.f
, x86_deref(G
.r_eax
), G
.r_ecx
);
713 G
.l_exit
= x86_get_label(&G
.f
);
714 j_exit
= x86_jmp_forward(&G
.f
);
716 for (i
= 0; i
< file
->count
; i
++) {
717 G
.labels
[i
] = x86_get_label(&G
.f
);
719 x86_fixup_fwd_jump(&G
.f
, j_body
);
720 codegen_assem(&G
, &file
->code
[i
], &mach
->infolog
);
724 * Restore stack and return.
725 * This must be handled this way, because "discard" can be invoked from any
728 x86_fixup_fwd_jump(&G
.f
, j_exit
);
729 x86_mov_reg_imm(&G
.f
, G
.r_eax
, (GLint
) & mach
->x86
.esp_restore
);
730 x86_mov(&G
.f
, G
.r_esp
, x86_deref(G
.r_eax
));
731 x86_pop(&G
.f
, G
.r_ebp
);
732 if (G
.fpucntl
!= RESTORE_FPU
) {
734 x86_mov_reg_imm(&G
.f
, G
.r_eax
, (GLint
) & G
.mach
->x86
.fpucntl_restore
);
735 x87_fldcw(&G
.f
, x86_deref(G
.r_eax
));
739 /* fixup forward labels */
740 for (i
= 0; i
< G
.fixup_count
; i
++) {
741 G
.f
.csr
= G
.labels
[G
.fixups
[i
].index
];
742 x86_fixup_fwd_jump(&G
.f
, G
.fixups
[i
].csr
);
745 slang_alloc_free(G
.fixups
);
746 slang_alloc_free(G
.labels
);
748 /* install new code */
749 mach
->x86
.compiled_func
= (GLvoid(*)(slang_machine
*)) x86_get_func(&G
.f
);