From b60a3da43f9a5fecf05058aa04bff12f710851f1 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 2 Aug 2006 14:51:24 +0000 Subject: [PATCH] Enable vec4 extension for x86 back-end. --- src/mesa/shader/slang/slang_assemble.c | 11 +- src/mesa/shader/slang/slang_assemble.h | 11 +- src/mesa/shader/slang/slang_compile.c | 77 +++++++----- src/mesa/shader/slang/slang_compile.h | 13 +- src/mesa/shader/slang/slang_execute_x86.c | 145 +++++++++++++++++++++- 5 files changed, 220 insertions(+), 37 deletions(-) diff --git a/src/mesa/shader/slang/slang_assemble.c b/src/mesa/shader/slang/slang_assemble.c index 4e0395226db..9e5851383c9 100644 --- a/src/mesa/shader/slang/slang_assemble.c +++ b/src/mesa/shader/slang/slang_assemble.c @@ -652,10 +652,19 @@ static const struct { "vec4_texcube", slang_asm_vec4_texcube, slang_asm_none }, { "vec4_shad1d", slang_asm_vec4_shad1d, slang_asm_none }, { "vec4_shad2d", slang_asm_vec4_shad2d, slang_asm_none }, - /* mesa-specific extensions */ + /* GL_MESA_shader_debug */ { "float_print", slang_asm_float_deref, slang_asm_float_print }, { "int_print", slang_asm_int_deref, slang_asm_int_print }, { "bool_print", slang_asm_bool_deref, slang_asm_bool_print }, + /* vec4 */ + { "float_to_vec4", slang_asm_float_to_vec4, slang_asm_none }, + { "vec4_add", slang_asm_vec4_add, slang_asm_none }, + { "vec4_subtract", slang_asm_vec4_subtract, slang_asm_none }, + { "vec4_multiply", slang_asm_vec4_multiply, slang_asm_none }, + { "vec4_divide", slang_asm_vec4_divide, slang_asm_none }, + { "vec4_negate", slang_asm_vec4_negate, slang_asm_none }, + { "vec4_dot", slang_asm_vec4_dot, slang_asm_none }, + { NULL, slang_asm_none, slang_asm_none } }; diff --git a/src/mesa/shader/slang/slang_assemble.h b/src/mesa/shader/slang/slang_assemble.h index 5c3afcaa5d1..fbf88bd6d18 100644 --- a/src/mesa/shader/slang/slang_assemble.h +++ b/src/mesa/shader/slang/slang_assemble.h @@ -93,10 +93,19 @@ typedef enum slang_assembly_type_ slang_asm_return, slang_asm_discard, slang_asm_exit, - /* mesa-specific extensions */ + /* GL_MESA_shader_debug */ slang_asm_float_print, slang_asm_int_print, slang_asm_bool_print, + /* vec4 */ + slang_asm_float_to_vec4, + slang_asm_vec4_add, + slang_asm_vec4_subtract, + slang_asm_vec4_multiply, + slang_asm_vec4_divide, + slang_asm_vec4_negate, + slang_asm_vec4_dot, + /* not a real assembly instruction */ slang_asm__last } slang_assembly_type; diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c index 28c4120111c..bcbb7561827 100644 --- a/src/mesa/shader/slang/slang_compile.c +++ b/src/mesa/shader/slang/slang_compile.c @@ -80,9 +80,10 @@ _slang_code_unit_dtr (slang_code_unit *self) GLvoid _slang_code_object_ctr (slang_code_object *self) { - _slang_code_unit_ctr (&self->builtin[0], self); - _slang_code_unit_ctr (&self->builtin[1], self); - _slang_code_unit_ctr (&self->builtin[2], self); + GLuint i; + + for (i = 0; i < SLANG_BUILTIN_TOTAL; i++) + _slang_code_unit_ctr (&self->builtin[i], self); _slang_code_unit_ctr (&self->unit, self); _slang_assembly_file_ctr (&self->assembly); slang_machine_ctr (&self->machine); @@ -97,9 +98,10 @@ _slang_code_object_ctr (slang_code_object *self) GLvoid _slang_code_object_dtr (slang_code_object *self) { - _slang_code_unit_dtr (&self->builtin[0]); - _slang_code_unit_dtr (&self->builtin[1]); - _slang_code_unit_dtr (&self->builtin[2]); + GLuint i; + + for (i = 0; i < SLANG_BUILTIN_TOTAL; i++) + _slang_code_unit_dtr (&self->builtin[i]); _slang_code_unit_dtr (&self->unit); slang_assembly_file_destruct (&self->assembly); slang_machine_dtr (&self->machine); @@ -1675,18 +1677,22 @@ static int parse_init_declarator (slang_parse_ctx *C, slang_output_ctx *O, } /* initialize global variable */ - if (C->global_scope && var->initializer != NULL) - { - slang_assemble_ctx A; - - A.file = O->assembly; - A.mach = O->machine; - A.atoms = C->atoms; - A.space.funcs = O->funs; - A.space.structs = O->structs; - A.space.vars = O->vars; - if (!initialize_global (&A, var)) - return 0; + if (C->global_scope) { + if (var->initializer != NULL) { + slang_assemble_ctx A; + + A.file = O->assembly; + A.mach = O->machine; + A.atoms = C->atoms; + A.space.funcs = O->funs; + A.space.structs = O->structs; + A.space.vars = O->vars; + if (!initialize_global (&A, var)) + return 0; + } + else { + _mesa_memset ((GLubyte *) (O->machine->mem) + var->address, 0, var->size); + } } return 1; } @@ -1880,11 +1886,6 @@ parse_code_unit (slang_parse_ctx *C, slang_code_unit *unit) return 1; } -#define BUILTIN_CORE 0 -#define BUILTIN_COMMON 1 -#define BUILTIN_TARGET 2 -#define BUILTIN_TOTAL 3 - static GLboolean compile_binary (const byte *prod, slang_code_unit *unit, slang_unit_type type, slang_info_log *infolog, slang_code_unit *builtin, slang_code_unit *downlink) @@ -1935,7 +1936,7 @@ compile_with_grammar (grammar id, const char *source, slang_code_unit *unit, sla } /* syntax is okay - translate it to internal representation */ - if (!compile_binary (prod, unit, type, infolog, builtin, &builtin[BUILTIN_TARGET])) { + if (!compile_binary (prod, unit, type, infolog, builtin, &builtin[SLANG_BUILTIN_TOTAL - 1])) { grammar_alloc_free (prod); return GL_FALSE; } @@ -1964,6 +1965,12 @@ static const byte slang_vertex_builtin_gc[] = { #include "library/slang_vertex_builtin_gc.h" }; +#if defined(USE_X86_ASM) || defined(SLANG_X86) +static const byte slang_builtin_vec4_gc[] = { +#include "library/slang_builtin_vec4_gc.h" +}; +#endif + static GLboolean compile_object (grammar *id, const char *source, slang_code_object *object, slang_unit_type type, slang_info_log *infolog) @@ -1995,32 +2002,40 @@ compile_object (grammar *id, const char *source, slang_code_object *object, slan if (type == slang_unit_fragment_shader || type == slang_unit_vertex_shader) { /* compile core functionality first */ - if (!compile_binary (slang_core_gc, &object->builtin[BUILTIN_CORE], + if (!compile_binary (slang_core_gc, &object->builtin[SLANG_BUILTIN_CORE], slang_unit_fragment_builtin, infolog, NULL, NULL)) return GL_FALSE; /* compile common functions and variables, link to core */ - if (!compile_binary (slang_common_builtin_gc, &object->builtin[BUILTIN_COMMON], + if (!compile_binary (slang_common_builtin_gc, &object->builtin[SLANG_BUILTIN_COMMON], slang_unit_fragment_builtin, infolog, NULL, - &object->builtin[BUILTIN_CORE])) + &object->builtin[SLANG_BUILTIN_CORE])) return GL_FALSE; /* compile target-specific functions and variables, link to common */ if (type == slang_unit_fragment_shader) { - if (!compile_binary (slang_fragment_builtin_gc, &object->builtin[BUILTIN_TARGET], + if (!compile_binary (slang_fragment_builtin_gc, &object->builtin[SLANG_BUILTIN_TARGET], slang_unit_fragment_builtin, infolog, NULL, - &object->builtin[BUILTIN_COMMON])) + &object->builtin[SLANG_BUILTIN_COMMON])) return GL_FALSE; } else if (type == slang_unit_vertex_shader) { - if (!compile_binary (slang_vertex_builtin_gc, &object->builtin[BUILTIN_TARGET], + if (!compile_binary (slang_vertex_builtin_gc, &object->builtin[SLANG_BUILTIN_TARGET], slang_unit_vertex_builtin, infolog, NULL, - &object->builtin[BUILTIN_COMMON])) + &object->builtin[SLANG_BUILTIN_COMMON])) return GL_FALSE; } +#if defined(USE_X86_ASM) || defined(SLANG_X86) + /* compile x86 4-component vector overrides, link to target */ + if (!compile_binary (slang_builtin_vec4_gc, &object->builtin[SLANG_BUILTIN_VEC4], + slang_unit_fragment_builtin, infolog, NULL, + &object->builtin[SLANG_BUILTIN_TARGET])) + return GL_FALSE; +#endif + /* disable language extensions */ grammar_set_reg8 (*id, (const byte *) "parsing_builtin", 0); builtins = object->builtin; diff --git a/src/mesa/shader/slang/slang_compile.h b/src/mesa/shader/slang/slang_compile.h index 89c3dcfdba2..02987f4e1bc 100644 --- a/src/mesa/shader/slang/slang_compile.h +++ b/src/mesa/shader/slang/slang_compile.h @@ -64,9 +64,20 @@ _slang_code_unit_ctr (slang_code_unit *, struct slang_code_object_ *); extern GLvoid _slang_code_unit_dtr (slang_code_unit *); +#define SLANG_BUILTIN_CORE 0 +#define SLANG_BUILTIN_COMMON 1 +#define SLANG_BUILTIN_TARGET 2 + +#if defined(USE_X86_ASM) || defined(SLANG_X86) +#define SLANG_BUILTIN_VEC4 3 +#define SLANG_BUILTIN_TOTAL 4 +#else +#define SLANG_BUILTIN_TOTAL 3 +#endif + typedef struct slang_code_object_ { - slang_code_unit builtin[3]; + slang_code_unit builtin[SLANG_BUILTIN_TOTAL]; slang_code_unit unit; slang_assembly_file assembly; slang_machine machine; diff --git a/src/mesa/shader/slang/slang_execute_x86.c b/src/mesa/shader/slang/slang_execute_x86.c index 9d2967f2b73..59c3aadcaac 100644 --- a/src/mesa/shader/slang/slang_execute_x86.c +++ b/src/mesa/shader/slang/slang_execute_x86.c @@ -56,6 +56,7 @@ typedef struct struct x86_reg r_st1; struct x86_reg r_st2; struct x86_reg r_st3; + struct x86_reg r_st4; fixup *fixups; GLuint fixup_count; GLubyte **labels; @@ -481,7 +482,7 @@ static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log * case slang_asm_exit: x86_jmp (&G->f, G->l_exit); break; - /* mesa-specific extensions */ + /* GL_MESA_shader_debug */ case slang_asm_float_print: /* TODO: use push imm32 */ x86_mov_reg_imm (&G->f, G->r_eax, (GLint) (infolog)); @@ -503,6 +504,138 @@ static GLvoid codegen_assem (codegen_ctx *G, slang_assembly *a, slang_info_log * x86_call (&G->f, (GLubyte *) do_print_bool); x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); break; + /* vec4 */ + case slang_asm_float_to_vec4: + /* [vec4] | float */ + x87_fld (&G->f, x86_deref (G->r_esp)); + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 4)); + x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x87_fst (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fst (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fst (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; + case slang_asm_vec4_add: + /* [vec4] | vec4 */ + x87_fld (&G->f, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_deref (G->r_eax)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); + x87_faddp (&G->f, G->r_st4); + x87_faddp (&G->f, G->r_st4); + x87_faddp (&G->f, G->r_st4); + x87_faddp (&G->f, G->r_st4); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; + case slang_asm_vec4_subtract: + /* [vec4] | vec4 */ + x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); + x87_fld (&G->f, x86_deref (G->r_eax)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fld (&G->f, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x87_fsubp (&G->f, G->r_st4); + x87_fsubp (&G->f, G->r_st4); + x87_fsubp (&G->f, G->r_st4); + x87_fsubp (&G->f, G->r_st4); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; + case slang_asm_vec4_multiply: + /* [vec4] | vec4 */ + x87_fld (&G->f, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_deref (G->r_eax)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fmulp (&G->f, G->r_st4); + x87_fmulp (&G->f, G->r_st4); + x87_fmulp (&G->f, G->r_st4); + x87_fmulp (&G->f, G->r_st4); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; + case slang_asm_vec4_divide: + /* [vec4] | vec4 */ + x86_mov (&G->f, G->r_eax, x86_make_disp (G->r_esp, 16)); + x87_fld (&G->f, x86_deref (G->r_eax)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fld (&G->f, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x87_fdivp (&G->f, G->r_st4); + x87_fdivp (&G->f, G->r_st4); + x87_fdivp (&G->f, G->r_st4); + x87_fdivp (&G->f, G->r_st4); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; + case slang_asm_vec4_negate: + /* [vec4] */ + x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_deref (G->r_eax)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fchs (&G->f); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fchs (&G->f); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fchs (&G->f); + x87_fstp (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fchs (&G->f); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; + case slang_asm_vec4_dot: + /* [vec4] | vec4 */ + x87_fld (&G->f, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 4)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 8)); + x87_fld (&G->f, x86_make_disp (G->r_esp, 12)); + x86_lea (&G->f, G->r_esp, x86_make_disp (G->r_esp, 16)); + x86_mov (&G->f, G->r_eax, x86_deref (G->r_esp)); + x87_fld (&G->f, x86_deref (G->r_eax)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 4)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 8)); + x87_fld (&G->f, x86_make_disp (G->r_eax, 12)); + x87_fmulp (&G->f, G->r_st4); + x87_fmulp (&G->f, G->r_st4); + x87_fmulp (&G->f, G->r_st4); + x87_fmulp (&G->f, G->r_st4); + x87_faddp (&G->f, G->r_st1); + x87_faddp (&G->f, G->r_st1); + x87_faddp (&G->f, G->r_st1); + x87_fstp (&G->f, x86_deref (G->r_eax)); + break; default: assert (0); } @@ -514,6 +647,13 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL GLubyte *j_body, *j_exit; GLuint i; + /* Free the old code - if any. + */ + if (mach->x86.compiled_func != NULL) { + _mesa_exec_free (mach->x86.compiled_func); + mach->x86.compiled_func = NULL; + } + /* * We need as much as 1M because *all* assembly, including built-in library, is * being translated to x86. @@ -530,6 +670,7 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL G.r_st1 = x86_make_reg (file_x87, 1); G.r_st2 = x86_make_reg (file_x87, 2); G.r_st3 = x86_make_reg (file_x87, 3); + G.r_st4 = x86_make_reg (file_x87, 4); G.fixups = NULL; G.fixup_count = 0; G.labels = (GLubyte **) slang_alloc_malloc (file->count * sizeof (GLubyte *)); @@ -591,8 +732,6 @@ GLboolean _slang_x86_codegen (slang_machine *mach, slang_assembly_file *file, GL slang_alloc_free (G.labels); /* install new code */ - if (mach->x86.compiled_func != NULL) - _mesa_exec_free (mach->x86.compiled_func); mach->x86.compiled_func = (GLvoid (*) (slang_machine *)) x86_get_func (&G.f); return GL_TRUE; -- 2.30.2