#include "tgsi/util/tgsi_parse.h"
#define SSE_MAX_VERTICES 4
-#define SSE_SWIZZLES 1
-#if SSE_SWIZZLES
typedef void (XSTDCALL *codegen_function) (
const struct tgsi_exec_vector *input, /* 1 */
struct tgsi_exec_vector *output, /* 2 */
float (*aos_output)[4], /* 9 */
uint num_outputs, /* 10 */
uint output_stride ); /* 11 */
-#else
-typedef void (XSTDCALL *codegen_function) (
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4],
- struct tgsi_exec_vector *temporary,
- float (*immediates)[4] );
-#endif
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
-#if SSE_SWIZZLES
/* run compiled shader
*/
shader->func(machine->Inputs,
input = (const float (*)[4])((const char *)input + input_stride * max_vertices);
output = (float (*)[4])((char *)output + output_stride * max_vertices);
-#else
- unsigned int j, slot;
-
- /* Swizzle inputs.
- */
- for (j = 0; j < max_vertices; j++) {
- for (slot = 0; slot < base->info.num_inputs; slot++) {
- machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
- machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
- machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
- machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
- }
-
- input = (const float (*)[4])((const char *)input + input_stride);
- }
-
- /* run compiled shader
- */
- shader->func(machine->Inputs,
- machine->Outputs,
- (float (*)[4])constants,
- machine->Temps,
- shader->immediates);
-
- /* Unswizzle all output results.
- */
- for (j = 0; j < max_vertices; j++) {
- for (slot = 0; slot < base->info.num_outputs; slot++) {
- output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
-
- output = (float (*)[4])((char *)output + output_stride);
- }
-#endif
}
}
x86_init_func( &vs->sse2_program );
if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens,
- &vs->sse2_program, vs->immediates, SSE_SWIZZLES ))
+ &vs->sse2_program, vs->immediates, TRUE ))
goto fail;
vs->func = (codegen_function) x86_get_func( &vs->sse2_program );