gallium: fix typo in LINE() macro (replace i+1 with i1 var)
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_varient.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 */
32
33 #include "pipe/p_util.h"
34 #include "draw/draw_context.h"
35 #include "draw/draw_private.h"
36 #include "draw/draw_vbuf.h"
37 #include "draw/draw_vertex.h"
38 #include "draw/draw_vs.h"
39 #include "translate/translate.h"
40 #include "translate/translate_cache.h"
41
42 /* A first pass at incorporating vertex fetch/emit functionality into
43 */
44 struct draw_vs_varient_generic {
45 struct draw_vs_varient base;
46
47 struct draw_vertex_shader *shader;
48 struct draw_context *draw;
49
50 /* Basic plan is to run these two translate functions before/after
51 * the vertex shader's existing run_linear() routine to simulate
52 * the inclusion of this functionality into the shader...
53 *
54 * Next will look at actually including it.
55 */
56 struct translate *fetch;
57 struct translate *emit;
58
59 unsigned temp_vertex_stride;
60 };
61
62
63
64
65
66 static void vsvg_set_input( struct draw_vs_varient *varient,
67 unsigned buffer,
68 const void *ptr,
69 unsigned stride )
70 {
71 struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
72
73 vsvg->fetch->set_buffer(vsvg->fetch,
74 buffer,
75 ptr,
76 stride);
77 }
78
79
80 /* Mainly for debug at this stage:
81 */
82 static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
83 unsigned count,
84 void *output_buffer )
85 {
86 char *ptr = (char *)output_buffer;
87 const float *scale = vsvg->base.vs->draw->viewport.scale;
88 const float *trans = vsvg->base.vs->draw->viewport.translate;
89 unsigned stride = vsvg->temp_vertex_stride;
90 unsigned j;
91
92 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
93
94 for (j = 0; j < count; j++, ptr += stride) {
95 float *data = (float *)ptr;
96 float w = 1.0f / data[3];
97
98 data[0] = data[0] * w * scale[0] + trans[0];
99 data[1] = data[1] * w * scale[1] + trans[1];
100 data[2] = data[2] * w * scale[2] + trans[2];
101 data[3] = w;
102 }
103 }
104
105 static void do_viewport( struct draw_vs_varient_generic *vsvg,
106 unsigned count,
107 void *output_buffer )
108 {
109 char *ptr = (char *)output_buffer;
110 const float *scale = vsvg->base.vs->draw->viewport.scale;
111 const float *trans = vsvg->base.vs->draw->viewport.translate;
112 unsigned stride = vsvg->temp_vertex_stride;
113 unsigned j;
114
115 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
116
117 for (j = 0; j < count; j++, ptr += stride) {
118 float *data = (float *)ptr;
119
120 data[0] = data[0] * scale[0] + trans[0];
121 data[1] = data[1] * scale[1] + trans[1];
122 data[2] = data[2] * scale[2] + trans[2];
123 }
124 }
125
126
127 static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
128 const unsigned *elts,
129 unsigned count,
130 void *output_buffer)
131 {
132 struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
133 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
134 void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
135
136 if (0) debug_printf("%s %d \n", __FUNCTION__, count);
137
138 /* Want to do this in small batches for cache locality?
139 */
140
141 vsvg->fetch->run_elts( vsvg->fetch,
142 elts,
143 count,
144 temp_buffer );
145
146 vsvg->base.vs->run_linear( vsvg->base.vs,
147 temp_buffer,
148 temp_buffer,
149 (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
150 count,
151 temp_vertex_stride,
152 temp_vertex_stride);
153
154
155 if (vsvg->base.key.clip) {
156 /* not really handling clipping, just do the rhw so we can
157 * see the results...
158 */
159 do_rhw_viewport( vsvg,
160 count,
161 temp_buffer );
162 }
163 else if (vsvg->base.key.viewport) {
164 do_viewport( vsvg,
165 count,
166 temp_buffer );
167 }
168
169
170 vsvg->emit->set_buffer( vsvg->emit,
171 0,
172 temp_buffer,
173 temp_vertex_stride );
174
175 vsvg->emit->set_buffer( vsvg->emit,
176 1,
177 &vsvg->draw->rasterizer->point_size,
178 0);
179
180 vsvg->emit->run( vsvg->emit,
181 0, count,
182 output_buffer );
183
184 FREE(temp_buffer);
185 }
186
187
188 static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
189 unsigned start,
190 unsigned count,
191 void *output_buffer )
192 {
193 struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
194 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
195 void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
196
197 if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
198 vsvg->base.key.output_stride,
199 temp_vertex_stride);
200
201 vsvg->fetch->run( vsvg->fetch,
202 start,
203 count,
204 temp_buffer );
205
206 vsvg->base.vs->run_linear( vsvg->base.vs,
207 temp_buffer,
208 temp_buffer,
209 (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
210 count,
211 temp_vertex_stride,
212 temp_vertex_stride);
213
214 if (vsvg->base.key.clip) {
215 /* not really handling clipping, just do the rhw so we can
216 * see the results...
217 */
218 do_rhw_viewport( vsvg,
219 count,
220 temp_buffer );
221 }
222 else if (vsvg->base.key.viewport) {
223 do_viewport( vsvg,
224 count,
225 temp_buffer );
226 }
227
228 vsvg->emit->set_buffer( vsvg->emit,
229 0,
230 temp_buffer,
231 temp_vertex_stride );
232
233 vsvg->emit->set_buffer( vsvg->emit,
234 1,
235 &vsvg->draw->rasterizer->point_size,
236 0);
237
238 vsvg->emit->run( vsvg->emit,
239 0, count,
240 output_buffer );
241
242 FREE(temp_buffer);
243 }
244
245
246
247
248
249 static void vsvg_destroy( struct draw_vs_varient *varient )
250 {
251 FREE(varient);
252 }
253
254
255 struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
256 const struct draw_vs_varient_key *key )
257 {
258 unsigned i;
259 struct translate_key fetch, emit;
260
261 struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
262 if (vsvg == NULL)
263 return NULL;
264
265 vsvg->base.key = *key;
266 vsvg->base.vs = vs;
267 vsvg->base.set_input = vsvg_set_input;
268 vsvg->base.run_elts = vsvg_run_elts;
269 vsvg->base.run_linear = vsvg_run_linear;
270 vsvg->base.destroy = vsvg_destroy;
271
272 vsvg->draw = vs->draw;
273
274 vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
275 vsvg->base.vs->info.num_outputs) * 4 * sizeof(float);
276
277 /* Build free-standing fetch and emit functions:
278 */
279 fetch.nr_elements = key->nr_inputs;
280 fetch.output_stride = vsvg->temp_vertex_stride;
281 for (i = 0; i < key->nr_inputs; i++) {
282 fetch.element[i].input_format = key->element[i].in.format;
283 fetch.element[i].input_buffer = key->element[i].in.buffer;
284 fetch.element[i].input_offset = key->element[i].in.offset;
285 fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
286 fetch.element[i].output_offset = i * 4 * sizeof(float);
287 assert(fetch.element[i].output_offset < fetch.output_stride);
288 }
289
290
291 emit.nr_elements = key->nr_outputs;
292 emit.output_stride = key->output_stride;
293 for (i = 0; i < key->nr_outputs; i++) {
294 if (key->element[i].out.format != EMIT_1F_PSIZE)
295 {
296 emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
297 emit.element[i].input_buffer = 0;
298 emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
299 emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
300 emit.element[i].output_offset = key->element[i].out.offset;
301 assert(emit.element[i].input_offset <= fetch.output_stride);
302 }
303 else {
304 emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
305 emit.element[i].input_buffer = 1;
306 emit.element[i].input_offset = 0;
307 emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
308 emit.element[i].output_offset = key->element[i].out.offset;
309 }
310 }
311
312 vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
313 vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
314
315 return &vsvg->base;
316 }
317
318
319
320
321