1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "pipe/p_util.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "draw_private.h"
36 #include "draw_context.h"
43 * Fetch a float[4] vertex attribute from memory, doing format/type
44 * conversion as needed.
46 * This is probably needed/dupliocated elsewhere, eg format
47 * conversion, texture sampling etc.
49 #define FETCH_ATTRIB( NAME, SZ, CVT ) \
51 fetch_##NAME(const void *ptr, float *attrib) \
53 static const float defaults[4] = { 0,0,0,1 }; \
56 for (i = 0; i < SZ; i++) { \
60 for (; i < 4; i++) { \
61 attrib[i] = defaults[i]; \
65 #define CVT_64_FLOAT (float) ((double *) ptr)[i]
66 #define CVT_32_FLOAT ((float *) ptr)[i]
68 #define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
69 #define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
70 #define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
72 #define CVT_8_SSCALED (float) ((char *) ptr)[i]
73 #define CVT_16_SSCALED (float) ((short *) ptr)[i]
74 #define CVT_32_SSCALED (float) ((int *) ptr)[i]
76 #define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
77 #define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
78 #define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
80 #define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
81 #define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
82 #define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
84 FETCH_ATTRIB( R64G64B64A64_FLOAT
, 4, CVT_64_FLOAT
)
85 FETCH_ATTRIB( R64G64B64_FLOAT
, 3, CVT_64_FLOAT
)
86 FETCH_ATTRIB( R64G64_FLOAT
, 2, CVT_64_FLOAT
)
87 FETCH_ATTRIB( R64_FLOAT
, 1, CVT_64_FLOAT
)
89 FETCH_ATTRIB( R32G32B32A32_FLOAT
, 4, CVT_32_FLOAT
)
90 FETCH_ATTRIB( R32G32B32_FLOAT
, 3, CVT_32_FLOAT
)
91 FETCH_ATTRIB( R32G32_FLOAT
, 2, CVT_32_FLOAT
)
92 FETCH_ATTRIB( R32_FLOAT
, 1, CVT_32_FLOAT
)
94 FETCH_ATTRIB( R32G32B32A32_USCALED
, 4, CVT_32_USCALED
)
95 FETCH_ATTRIB( R32G32B32_USCALED
, 3, CVT_32_USCALED
)
96 FETCH_ATTRIB( R32G32_USCALED
, 2, CVT_32_USCALED
)
97 FETCH_ATTRIB( R32_USCALED
, 1, CVT_32_USCALED
)
99 FETCH_ATTRIB( R32G32B32A32_SSCALED
, 4, CVT_32_SSCALED
)
100 FETCH_ATTRIB( R32G32B32_SSCALED
, 3, CVT_32_SSCALED
)
101 FETCH_ATTRIB( R32G32_SSCALED
, 2, CVT_32_SSCALED
)
102 FETCH_ATTRIB( R32_SSCALED
, 1, CVT_32_SSCALED
)
104 FETCH_ATTRIB( R32G32B32A32_UNORM
, 4, CVT_32_UNORM
)
105 FETCH_ATTRIB( R32G32B32_UNORM
, 3, CVT_32_UNORM
)
106 FETCH_ATTRIB( R32G32_UNORM
, 2, CVT_32_UNORM
)
107 FETCH_ATTRIB( R32_UNORM
, 1, CVT_32_UNORM
)
109 FETCH_ATTRIB( R32G32B32A32_SNORM
, 4, CVT_32_SNORM
)
110 FETCH_ATTRIB( R32G32B32_SNORM
, 3, CVT_32_SNORM
)
111 FETCH_ATTRIB( R32G32_SNORM
, 2, CVT_32_SNORM
)
112 FETCH_ATTRIB( R32_SNORM
, 1, CVT_32_SNORM
)
114 FETCH_ATTRIB( R16G16B16A16_USCALED
, 4, CVT_16_USCALED
)
115 FETCH_ATTRIB( R16G16B16_USCALED
, 3, CVT_16_USCALED
)
116 FETCH_ATTRIB( R16G16_USCALED
, 2, CVT_16_USCALED
)
117 FETCH_ATTRIB( R16_USCALED
, 1, CVT_16_USCALED
)
119 FETCH_ATTRIB( R16G16B16A16_SSCALED
, 4, CVT_16_SSCALED
)
120 FETCH_ATTRIB( R16G16B16_SSCALED
, 3, CVT_16_SSCALED
)
121 FETCH_ATTRIB( R16G16_SSCALED
, 2, CVT_16_SSCALED
)
122 FETCH_ATTRIB( R16_SSCALED
, 1, CVT_16_SSCALED
)
124 FETCH_ATTRIB( R16G16B16A16_UNORM
, 4, CVT_16_UNORM
)
125 FETCH_ATTRIB( R16G16B16_UNORM
, 3, CVT_16_UNORM
)
126 FETCH_ATTRIB( R16G16_UNORM
, 2, CVT_16_UNORM
)
127 FETCH_ATTRIB( R16_UNORM
, 1, CVT_16_UNORM
)
129 FETCH_ATTRIB( R16G16B16A16_SNORM
, 4, CVT_16_SNORM
)
130 FETCH_ATTRIB( R16G16B16_SNORM
, 3, CVT_16_SNORM
)
131 FETCH_ATTRIB( R16G16_SNORM
, 2, CVT_16_SNORM
)
132 FETCH_ATTRIB( R16_SNORM
, 1, CVT_16_SNORM
)
134 FETCH_ATTRIB( R8G8B8A8_USCALED
, 4, CVT_8_USCALED
)
135 FETCH_ATTRIB( R8G8B8_USCALED
, 3, CVT_8_USCALED
)
136 FETCH_ATTRIB( R8G8_USCALED
, 2, CVT_8_USCALED
)
137 FETCH_ATTRIB( R8_USCALED
, 1, CVT_8_USCALED
)
139 FETCH_ATTRIB( R8G8B8A8_SSCALED
, 4, CVT_8_SSCALED
)
140 FETCH_ATTRIB( R8G8B8_SSCALED
, 3, CVT_8_SSCALED
)
141 FETCH_ATTRIB( R8G8_SSCALED
, 2, CVT_8_SSCALED
)
142 FETCH_ATTRIB( R8_SSCALED
, 1, CVT_8_SSCALED
)
144 FETCH_ATTRIB( R8G8B8A8_UNORM
, 4, CVT_8_UNORM
)
145 FETCH_ATTRIB( R8G8B8_UNORM
, 3, CVT_8_UNORM
)
146 FETCH_ATTRIB( R8G8_UNORM
, 2, CVT_8_UNORM
)
147 FETCH_ATTRIB( R8_UNORM
, 1, CVT_8_UNORM
)
149 FETCH_ATTRIB( R8G8B8A8_SNORM
, 4, CVT_8_SNORM
)
150 FETCH_ATTRIB( R8G8B8_SNORM
, 3, CVT_8_SNORM
)
151 FETCH_ATTRIB( R8G8_SNORM
, 2, CVT_8_SNORM
)
152 FETCH_ATTRIB( R8_SNORM
, 1, CVT_8_SNORM
)
154 FETCH_ATTRIB( A8R8G8B8_UNORM
, 4, CVT_8_UNORM
)
155 //FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
159 static fetch_func
get_fetch_func( enum pipe_format format
)
164 pf_sprint_name(tmp
, format
);
165 debug_printf("%s: %s\n", __FUNCTION__
, tmp
);
170 case PIPE_FORMAT_R64_FLOAT
:
171 return fetch_R64_FLOAT
;
172 case PIPE_FORMAT_R64G64_FLOAT
:
173 return fetch_R64G64_FLOAT
;
174 case PIPE_FORMAT_R64G64B64_FLOAT
:
175 return fetch_R64G64B64_FLOAT
;
176 case PIPE_FORMAT_R64G64B64A64_FLOAT
:
177 return fetch_R64G64B64A64_FLOAT
;
179 case PIPE_FORMAT_R32_FLOAT
:
180 return fetch_R32_FLOAT
;
181 case PIPE_FORMAT_R32G32_FLOAT
:
182 return fetch_R32G32_FLOAT
;
183 case PIPE_FORMAT_R32G32B32_FLOAT
:
184 return fetch_R32G32B32_FLOAT
;
185 case PIPE_FORMAT_R32G32B32A32_FLOAT
:
186 return fetch_R32G32B32A32_FLOAT
;
188 case PIPE_FORMAT_R32_UNORM
:
189 return fetch_R32_UNORM
;
190 case PIPE_FORMAT_R32G32_UNORM
:
191 return fetch_R32G32_UNORM
;
192 case PIPE_FORMAT_R32G32B32_UNORM
:
193 return fetch_R32G32B32_UNORM
;
194 case PIPE_FORMAT_R32G32B32A32_UNORM
:
195 return fetch_R32G32B32A32_UNORM
;
197 case PIPE_FORMAT_R32_USCALED
:
198 return fetch_R32_USCALED
;
199 case PIPE_FORMAT_R32G32_USCALED
:
200 return fetch_R32G32_USCALED
;
201 case PIPE_FORMAT_R32G32B32_USCALED
:
202 return fetch_R32G32B32_USCALED
;
203 case PIPE_FORMAT_R32G32B32A32_USCALED
:
204 return fetch_R32G32B32A32_USCALED
;
206 case PIPE_FORMAT_R32_SNORM
:
207 return fetch_R32_SNORM
;
208 case PIPE_FORMAT_R32G32_SNORM
:
209 return fetch_R32G32_SNORM
;
210 case PIPE_FORMAT_R32G32B32_SNORM
:
211 return fetch_R32G32B32_SNORM
;
212 case PIPE_FORMAT_R32G32B32A32_SNORM
:
213 return fetch_R32G32B32A32_SNORM
;
215 case PIPE_FORMAT_R32_SSCALED
:
216 return fetch_R32_SSCALED
;
217 case PIPE_FORMAT_R32G32_SSCALED
:
218 return fetch_R32G32_SSCALED
;
219 case PIPE_FORMAT_R32G32B32_SSCALED
:
220 return fetch_R32G32B32_SSCALED
;
221 case PIPE_FORMAT_R32G32B32A32_SSCALED
:
222 return fetch_R32G32B32A32_SSCALED
;
224 case PIPE_FORMAT_R16_UNORM
:
225 return fetch_R16_UNORM
;
226 case PIPE_FORMAT_R16G16_UNORM
:
227 return fetch_R16G16_UNORM
;
228 case PIPE_FORMAT_R16G16B16_UNORM
:
229 return fetch_R16G16B16_UNORM
;
230 case PIPE_FORMAT_R16G16B16A16_UNORM
:
231 return fetch_R16G16B16A16_UNORM
;
233 case PIPE_FORMAT_R16_USCALED
:
234 return fetch_R16_USCALED
;
235 case PIPE_FORMAT_R16G16_USCALED
:
236 return fetch_R16G16_USCALED
;
237 case PIPE_FORMAT_R16G16B16_USCALED
:
238 return fetch_R16G16B16_USCALED
;
239 case PIPE_FORMAT_R16G16B16A16_USCALED
:
240 return fetch_R16G16B16A16_USCALED
;
242 case PIPE_FORMAT_R16_SNORM
:
243 return fetch_R16_SNORM
;
244 case PIPE_FORMAT_R16G16_SNORM
:
245 return fetch_R16G16_SNORM
;
246 case PIPE_FORMAT_R16G16B16_SNORM
:
247 return fetch_R16G16B16_SNORM
;
248 case PIPE_FORMAT_R16G16B16A16_SNORM
:
249 return fetch_R16G16B16A16_SNORM
;
251 case PIPE_FORMAT_R16_SSCALED
:
252 return fetch_R16_SSCALED
;
253 case PIPE_FORMAT_R16G16_SSCALED
:
254 return fetch_R16G16_SSCALED
;
255 case PIPE_FORMAT_R16G16B16_SSCALED
:
256 return fetch_R16G16B16_SSCALED
;
257 case PIPE_FORMAT_R16G16B16A16_SSCALED
:
258 return fetch_R16G16B16A16_SSCALED
;
260 case PIPE_FORMAT_R8_UNORM
:
261 return fetch_R8_UNORM
;
262 case PIPE_FORMAT_R8G8_UNORM
:
263 return fetch_R8G8_UNORM
;
264 case PIPE_FORMAT_R8G8B8_UNORM
:
265 return fetch_R8G8B8_UNORM
;
266 case PIPE_FORMAT_R8G8B8A8_UNORM
:
267 return fetch_R8G8B8A8_UNORM
;
269 case PIPE_FORMAT_R8_USCALED
:
270 return fetch_R8_USCALED
;
271 case PIPE_FORMAT_R8G8_USCALED
:
272 return fetch_R8G8_USCALED
;
273 case PIPE_FORMAT_R8G8B8_USCALED
:
274 return fetch_R8G8B8_USCALED
;
275 case PIPE_FORMAT_R8G8B8A8_USCALED
:
276 return fetch_R8G8B8A8_USCALED
;
278 case PIPE_FORMAT_R8_SNORM
:
279 return fetch_R8_SNORM
;
280 case PIPE_FORMAT_R8G8_SNORM
:
281 return fetch_R8G8_SNORM
;
282 case PIPE_FORMAT_R8G8B8_SNORM
:
283 return fetch_R8G8B8_SNORM
;
284 case PIPE_FORMAT_R8G8B8A8_SNORM
:
285 return fetch_R8G8B8A8_SNORM
;
287 case PIPE_FORMAT_R8_SSCALED
:
288 return fetch_R8_SSCALED
;
289 case PIPE_FORMAT_R8G8_SSCALED
:
290 return fetch_R8G8_SSCALED
;
291 case PIPE_FORMAT_R8G8B8_SSCALED
:
292 return fetch_R8G8B8_SSCALED
;
293 case PIPE_FORMAT_R8G8B8A8_SSCALED
:
294 return fetch_R8G8B8A8_SSCALED
;
296 case PIPE_FORMAT_A8R8G8B8_UNORM
:
297 return fetch_A8R8G8B8_UNORM
;
300 return NULL
; /* not sure why this is needed */
310 transpose_4x4( float *out
, const float *in
)
312 /* This can be achieved in 12 sse instructions, plus the final
313 * stores I guess. This is probably a bit more than that - maybe
316 out
[0] = in
[0]; out
[1] = in
[4]; out
[2] = in
[8]; out
[3] = in
[12];
317 out
[4] = in
[1]; out
[5] = in
[5]; out
[6] = in
[9]; out
[7] = in
[13];
318 out
[8] = in
[2]; out
[9] = in
[6]; out
[10] = in
[10]; out
[11] = in
[14];
319 out
[12] = in
[3]; out
[13] = in
[7]; out
[14] = in
[11]; out
[15] = in
[15];
324 static void fetch_xyz_rgb( struct draw_context
*draw
,
325 struct tgsi_exec_machine
*machine
,
326 const unsigned *elts
,
329 const unsigned *pitch
= draw
->vertex_fetch
.pitch
;
330 const ubyte
**src
= draw
->vertex_fetch
.src_ptr
;
335 // debug_printf("%s\n", __FUNCTION__);
337 /* loop over vertex attributes (vertex shader inputs)
340 for (i
= 0; i
< 4; i
++) {
342 const float *in
= (const float *)(src
[0] + elts
[i
] * pitch
[0]);
343 float *out
= &machine
->Inputs
[0].xyzw
[0].f
[i
];
351 const float *in
= (const float *)(src
[1] + elts
[i
] * pitch
[1]);
352 float *out
= &machine
->Inputs
[1].xyzw
[0].f
[i
];
364 static void fetch_xyz_rgb_st( struct draw_context
*draw
,
365 struct tgsi_exec_machine
*machine
,
366 const unsigned *elts
,
369 const unsigned *pitch
= draw
->vertex_fetch
.pitch
;
370 const ubyte
**src
= draw
->vertex_fetch
.src_ptr
;
375 /* loop over vertex attributes (vertex shader inputs)
378 for (i
= 0; i
< 4; i
++) {
380 const float *in
= (const float *)(src
[0] + elts
[i
] * pitch
[0]);
381 float *out
= &machine
->Inputs
[0].xyzw
[0].f
[i
];
389 const float *in
= (const float *)(src
[1] + elts
[i
] * pitch
[1]);
390 float *out
= &machine
->Inputs
[1].xyzw
[0].f
[i
];
398 const float *in
= (const float *)(src
[2] + elts
[i
] * pitch
[2]);
399 float *out
= &machine
->Inputs
[2].xyzw
[0].f
[i
];
412 * Fetch vertex attributes for 'count' vertices.
414 static void generic_vertex_fetch( struct draw_context
*draw
,
415 struct tgsi_exec_machine
*machine
,
416 const unsigned *elts
,
419 unsigned nr_attrs
= draw
->vertex_fetch
.nr_attrs
;
424 // debug_printf("%s %d\n", __FUNCTION__, count);
426 /* loop over vertex attributes (vertex shader inputs)
428 for (attr
= 0; attr
< nr_attrs
; attr
++) {
430 const unsigned pitch
= draw
->vertex_fetch
.pitch
[attr
];
431 const ubyte
*src
= draw
->vertex_fetch
.src_ptr
[attr
];
432 const fetch_func fetch
= draw
->vertex_fetch
.fetch
[attr
];
437 /* Fetch four attributes for four vertices.
439 * Could fetch directly into AOS format, but this is meant to be
440 * a prototype for an sse implementation, which would have
441 * difficulties doing that.
443 for (i
= 0; i
< count
; i
++)
444 fetch( src
+ elts
[i
] * pitch
, p
[i
] );
446 /* Be nice and zero out any missing vertices:
449 p
[i
][0] = p
[i
][1] = p
[i
][2] = p
[i
][3] = 0;
451 /* Transpose/swizzle into sse-friendly format. Currently
452 * assuming that all vertex shader inputs are float[4], but this
453 * isn't true -- if the vertex shader only wants tex0.xy, we
454 * could optimize for that.
456 * To do so fully without codegen would probably require an
457 * excessive number of fetch functions, but we could at least
458 * minimize the transpose step:
460 transpose_4x4( (float *)&machine
->Inputs
[attr
].xyzw
[0].f
[0], (float *)p
);
466 void draw_update_vertex_fetch( struct draw_context
*draw
)
468 unsigned nr_attrs
, i
;
470 // debug_printf("%s\n", __FUNCTION__);
472 /* this may happend during context init */
473 if (!draw
->vertex_shader
)
476 nr_attrs
= draw
->vertex_shader
->state
->num_inputs
;
478 for (i
= 0; i
< nr_attrs
; i
++) {
479 unsigned buf
= draw
->vertex_element
[i
].vertex_buffer_index
;
480 enum pipe_format format
= draw
->vertex_element
[i
].src_format
;
482 draw
->vertex_fetch
.src_ptr
[i
] = (const ubyte
*) draw
->user
.vbuffer
[buf
] +
483 draw
->vertex_buffer
[buf
].buffer_offset
+
484 draw
->vertex_element
[i
].src_offset
;
486 draw
->vertex_fetch
.pitch
[i
] = draw
->vertex_buffer
[buf
].pitch
;
487 draw
->vertex_fetch
.fetch
[i
] = get_fetch_func( format
);
490 draw
->vertex_fetch
.nr_attrs
= nr_attrs
;
492 draw
->vertex_fetch
.fetch_func
= generic_vertex_fetch
;
496 if (draw
->vertex_element
[0].src_format
== PIPE_FORMAT_R32G32B32_FLOAT
&&
497 draw
->vertex_element
[1].src_format
== PIPE_FORMAT_R32G32B32_FLOAT
)
498 draw
->vertex_fetch
.fetch_func
= fetch_xyz_rgb
;
501 if (draw
->vertex_element
[0].src_format
== PIPE_FORMAT_R32G32B32_FLOAT
&&
502 draw
->vertex_element
[1].src_format
== PIPE_FORMAT_R32G32B32_FLOAT
&&
503 draw
->vertex_element
[2].src_format
== PIPE_FORMAT_R32G32_FLOAT
)
504 draw
->vertex_fetch
.fetch_func
= fetch_xyz_rgb_st
;