}
-#define CVT_32_FLOAT(q) (*q)
+#define CVT_32_FLOAT(q) (*(q))
static INLINE qword
CVT_64_FLOAT(const qword *qw)
* This is probably needed/dupliocated elsewhere, eg format
* conversion, texture sampling etc.
*/
-#define FETCH_ATTRIB( NAME, SZ, CVT ) \
-static qword \
-fetch_##NAME(const qword *qw) \
-{ \
- qword expanded = CVT(qw); \
- return si_selb(expanded, (qword) defaults, SZ); \
+#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \
+static void \
+fetch_##NAME(qword *out, const qword *in) \
+{ \
+ qword tmp[4]; \
+ \
+ tmp[0] = si_selb(CVT(in + (0 * N)), (qword) defaults, SZ); \
+ tmp[1] = si_selb(CVT(in + (1 * N)), (qword) defaults, SZ); \
+ tmp[2] = si_selb(CVT(in + (2 * N)), (qword) defaults, SZ); \
+ tmp[3] = si_selb(CVT(in + (3 * N)), (qword) defaults, SZ); \
+ _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) tmp); \
}
-FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT )
-FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 )
+FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 )
+FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 )
+FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 )
-FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT )
-FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT )
+FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 )
+FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 )
+FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 )
+FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 )
-FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED )
-FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 )
+FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 )
+FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 )
+FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 )
-FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED )
-FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED )
+FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 )
+FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 )
+FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 )
+FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 )
-FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM )
-FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 )
+FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 )
+FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 )
+FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 )
-FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM )
-FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 )
+FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 )
+FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 )
+FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 )
-FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED )
-FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 )
+FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 )
+FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 )
+FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 )
-FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED )
-FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 )
+FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 )
+FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 )
+FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 )
-FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM )
-FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 )
+FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 )
+FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 )
+FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 )
-FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM )
-FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 )
+FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 )
+FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 )
+FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 )
-FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED )
-FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 )
+FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 )
+FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 )
+FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 )
-FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED )
-FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 )
+FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 )
+FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 )
+FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 )
-FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM )
-FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 )
+FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 )
+FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 )
+FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 )
-FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM )
-FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 )
+FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 )
+FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 )
+FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 )
-FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM )
+FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 )
unsigned idx;
const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
- qword p[4];
qword in[2 * 4];
/* Convert all 4 vertices to vectors of float.
*/
- idx = 0;
- for (i = 0; i < 4; i++) {
- p[i] = (*fetch)(in + idx);
- idx += quads_per_entry;
- }
-
-
- /* Transpose/swizzle into vector-friendly format. Currently
- * assuming that all vertex shader inputs are float[4], but this
- * isn't true -- if the vertex shader only wants tex0.xy, we
- * could optimize for that.
- *
- * To do so fully without codegen would probably require an
- * excessive number of fetch functions, but we could at least
- * minimize the transpose step:
- */
- _transpose_matrix4x4(&machine->Inputs[attr].xyzw[0].q, p);
+ (*fetch)(&machine->Inputs[attr].xyzw[0].q, in);
}
}