draw: Eliminate stdio usage.
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_sse.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Brian Paul
32 */
33
34 #include "pipe/p_config.h"
35
36 #include "draw_vs.h"
37
38 #if defined(PIPE_ARCH_X86)
39
40 #include "pipe/p_util.h"
41 #include "pipe/p_shader_tokens.h"
42
43 #include "draw_private.h"
44 #include "draw_context.h"
45
46 #include "rtasm/rtasm_cpu.h"
47 #include "rtasm/rtasm_x86sse.h"
48 #include "tgsi/exec/tgsi_sse2.h"
49 #include "tgsi/util/tgsi_parse.h"
50
51 #define SSE_MAX_VERTICES 4
52
53 typedef void (XSTDCALL *codegen_function) (
54 const struct tgsi_exec_vector *input, /* 1 */
55 struct tgsi_exec_vector *output, /* 2 */
56 float (*constant)[4], /* 3 */
57 struct tgsi_exec_vector *temporary, /* 4 */
58 float (*immediates)[4], /* 5 */
59 const float (*aos_input)[4], /* 6 */
60 uint num_inputs, /* 7 */
61 uint input_stride, /* 8 */
62 float (*aos_output)[4], /* 9 */
63 uint num_outputs, /* 10 */
64 uint output_stride ); /* 11 */
65
66 struct draw_sse_vertex_shader {
67 struct draw_vertex_shader base;
68 struct x86_function sse2_program;
69
70 codegen_function func;
71
72 struct tgsi_exec_machine *machine;
73 };
74
75
76 static void
77 vs_sse_prepare( struct draw_vertex_shader *base,
78 struct draw_context *draw )
79 {
80 }
81
82
83
84 /* Simplified vertex shader interface for the pt paths. Given the
85 * complexity of code-generating all the above operations together,
86 * it's time to try doing all the other stuff separately.
87 */
88 static void
89 vs_sse_run_linear( struct draw_vertex_shader *base,
90 const float (*input)[4],
91 float (*output)[4],
92 const float (*constants)[4],
93 unsigned count,
94 unsigned input_stride,
95 unsigned output_stride )
96 {
97 struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
98 struct tgsi_exec_machine *machine = shader->machine;
99 unsigned int i;
100
101 for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
102 unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
103
104 /* run compiled shader
105 */
106 shader->func(machine->Inputs,
107 machine->Outputs,
108 (float (*)[4])constants,
109 machine->Temps,
110 (float (*)[4])shader->base.immediates,
111 input,
112 base->info.num_inputs,
113 input_stride,
114 output,
115 base->info.num_outputs,
116 output_stride );
117
118 input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
119 output = (float (*)[4])((char *)output + output_stride * max_vertices);
120 }
121 }
122
123
124
125
126 static void
127 vs_sse_delete( struct draw_vertex_shader *base )
128 {
129 struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
130
131 x86_release_func( &shader->sse2_program );
132
133 align_free(shader->base.immediates);
134
135 FREE( (void*) shader->base.state.tokens );
136 FREE( shader );
137 }
138
139
140 struct draw_vertex_shader *
141 draw_create_vs_sse(struct draw_context *draw,
142 const struct pipe_shader_state *templ)
143 {
144 struct draw_sse_vertex_shader *vs;
145
146 if (!rtasm_cpu_has_sse2())
147 return NULL;
148
149 vs = CALLOC_STRUCT( draw_sse_vertex_shader );
150 if (vs == NULL)
151 return NULL;
152
153 /* we make a private copy of the tokens */
154 vs->base.state.tokens = tgsi_dup_tokens(templ->tokens);
155 if (!vs->base.state.tokens)
156 goto fail;
157
158 tgsi_scan_shader(templ->tokens, &vs->base.info);
159
160 vs->base.draw = draw;
161 vs->base.create_varient = draw_vs_varient_aos_sse;
162 // vs->base.create_varient = draw_vs_varient_generic;
163 vs->base.prepare = vs_sse_prepare;
164 vs->base.run_linear = vs_sse_run_linear;
165 vs->base.delete = vs_sse_delete;
166
167 vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
168 sizeof(float), 16);
169
170 vs->machine = &draw->vs.machine;
171
172 x86_init_func( &vs->sse2_program );
173
174 if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
175 &vs->sse2_program,
176 (float (*)[4])vs->base.immediates,
177 TRUE ))
178 goto fail;
179
180 vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
181 if (!vs->func) {
182 goto fail;
183 }
184
185 return &vs->base;
186
187 fail:
188 debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n");
189
190 x86_release_func( &vs->sse2_program );
191
192 FREE(vs);
193 return NULL;
194 }
195
196
197
198 #else
199
200 struct draw_vertex_shader *
201 draw_create_vs_sse( struct draw_context *draw,
202 const struct pipe_shader_state *templ )
203 {
204 return (void *) 0;
205 }
206
207
208 #endif
209