draw: Use register names more consistently.
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_sse.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Brian Paul
32 */
33
34 #include "draw_vs.h"
35
36 #if defined(__i386__) || defined(__386__)
37
38 #include "pipe/p_util.h"
39 #include "pipe/p_shader_tokens.h"
40
41 #include "draw_private.h"
42 #include "draw_context.h"
43
44 #include "rtasm/rtasm_cpu.h"
45 #include "rtasm/rtasm_x86sse.h"
46 #include "tgsi/exec/tgsi_sse2.h"
47 #include "tgsi/util/tgsi_parse.h"
48
49 #define SSE_MAX_VERTICES 4
50
51 typedef void (XSTDCALL *codegen_function) (
52 const struct tgsi_exec_vector *input, /* 1 */
53 struct tgsi_exec_vector *output, /* 2 */
54 float (*constant)[4], /* 3 */
55 struct tgsi_exec_vector *temporary, /* 4 */
56 float (*immediates)[4], /* 5 */
57 const float (*aos_input)[4], /* 6 */
58 uint num_inputs, /* 7 */
59 uint input_stride, /* 8 */
60 float (*aos_output)[4], /* 9 */
61 uint num_outputs, /* 10 */
62 uint output_stride ); /* 11 */
63
64 struct draw_sse_vertex_shader {
65 struct draw_vertex_shader base;
66 struct x86_function sse2_program;
67
68 codegen_function func;
69
70 struct tgsi_exec_machine *machine;
71 };
72
73
74 static void
75 vs_sse_prepare( struct draw_vertex_shader *base,
76 struct draw_context *draw )
77 {
78 }
79
80
81
82 /* Simplified vertex shader interface for the pt paths. Given the
83 * complexity of code-generating all the above operations together,
84 * it's time to try doing all the other stuff separately.
85 */
86 static void
87 vs_sse_run_linear( struct draw_vertex_shader *base,
88 const float (*input)[4],
89 float (*output)[4],
90 const float (*constants)[4],
91 unsigned count,
92 unsigned input_stride,
93 unsigned output_stride )
94 {
95 struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
96 struct tgsi_exec_machine *machine = shader->machine;
97 unsigned int i;
98
99 for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
100 unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
101
102 /* run compiled shader
103 */
104 shader->func(machine->Inputs,
105 machine->Outputs,
106 (float (*)[4])constants,
107 machine->Temps,
108 (float (*)[4])shader->base.immediates,
109 input,
110 base->info.num_inputs,
111 input_stride,
112 output,
113 base->info.num_outputs,
114 output_stride );
115
116 input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
117 output = (float (*)[4])((char *)output + output_stride * max_vertices);
118 }
119 }
120
121
122
123
124 static void
125 vs_sse_delete( struct draw_vertex_shader *base )
126 {
127 struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
128
129 x86_release_func( &shader->sse2_program );
130
131 align_free(shader->base.immediates);
132
133 FREE( (void*) shader->base.state.tokens );
134 FREE( shader );
135 }
136
137
138 struct draw_vertex_shader *
139 draw_create_vs_sse(struct draw_context *draw,
140 const struct pipe_shader_state *templ)
141 {
142 struct draw_sse_vertex_shader *vs;
143
144 if (!rtasm_cpu_has_sse2())
145 return NULL;
146
147 vs = CALLOC_STRUCT( draw_sse_vertex_shader );
148 if (vs == NULL)
149 return NULL;
150
151 /* we make a private copy of the tokens */
152 vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
153 if (!vs->base.state.tokens)
154 goto fail;
155
156 tgsi_scan_shader(templ->tokens, &vs->base.info);
157
158 vs->base.draw = draw;
159 vs->base.create_varient = draw_vs_varient_aos_sse;
160 // vs->base.create_varient = draw_vs_varient_generic;
161 vs->base.prepare = vs_sse_prepare;
162 vs->base.run_linear = vs_sse_run_linear;
163 vs->base.delete = vs_sse_delete;
164
165 vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
166 sizeof(float), 16);
167
168 vs->machine = &draw->vs.machine;
169
170 x86_init_func( &vs->sse2_program );
171
172 if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
173 &vs->sse2_program,
174 (float (*)[4])vs->base.immediates,
175 TRUE ))
176 goto fail;
177
178 vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
179 if (!vs->func) {
180 goto fail;
181 }
182
183 return &vs->base;
184
185 fail:
186 fprintf(stderr, "tgsi_emit_sse2() failed, falling back to interpreter\n");
187
188 x86_release_func( &vs->sse2_program );
189
190 FREE(vs);
191 return NULL;
192 }
193
194
195
196 #else
197
198 struct draw_vertex_shader *
199 draw_create_vs_sse( struct draw_context *draw,
200 const struct pipe_shader_state *templ )
201 {
202 return (void *) 0;
203 }
204
205
206 #endif
207