1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * (C) Copyright IBM Corporation 2008
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **************************************************************************/
31 * Keith Whitwell <keith@tungstengraphics.com>
32 * Ian Romanick <idr@us.ibm.com>
35 #include <spu_mfcio.h>
37 #include "pipe/p_util.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_shader_tokens.h"
41 #include "spu_vertex_shader.h"
44 #define CACHE_NAME attribute
45 #define CACHED_TYPE qword
46 #define CACHE_TYPE CACHE_TYPE_RO
47 #define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
48 #define CACHE_LOG2NNWAY 2
49 #define CACHE_LOG2NSETS 6
50 #include <cache-api.h>
52 /* Yes folks, this is ugly.
56 #define CACHE_NAME attribute
58 #define CACHE_NSETS (1U << 6)
63 typedef void (*spu_fetch_func
)(qword
*out
, const qword
*in
,
64 const qword
*shuffle_data
);
67 static const qword fetch_shuffle_data
[] = {
68 /* Shuffle used by CVT_64_FLOAT
71 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
72 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
75 /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
78 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
79 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
82 /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
85 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
86 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
89 /* High value shuffle used by trans4x4.
92 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
93 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
96 /* Low value shuffle used by trans4x4.
99 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
100 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
106 * Fetch between 1 and 32 bytes from an unaligned address
109 fetch_unaligned(qword
*dst
, unsigned ea
, unsigned size
)
112 const int shift
= ea
& 0x0f;
113 const unsigned aligned_start_ea
= ea
& ~0x0f;
114 const unsigned aligned_end_ea
= (ea
+ size
) & ~0x0f;
115 const unsigned num_entries
= ((aligned_end_ea
- aligned_start_ea
) / 16) + 1;
120 /* Data is already aligned. Fetch directly into the destination buffer.
122 for (i
= 0; i
< num_entries
; i
++) {
123 dst
[i
] = cache_rd(attribute
, (ea
& ~0x0f) + (i
* 16));
126 /* Fetch data from the cache to the local buffer.
128 for (i
= 0; i
< num_entries
; i
++) {
129 tmp
[i
] = cache_rd(attribute
, (ea
& ~0x0f) + (i
* 16));
133 /* Fix the alignment of the data and write to the destination buffer.
135 for (i
= 0; i
< ((size
+ 15) / 16); i
++) {
136 dst
[i
] = si_or((qword
) spu_slqwbyte(tmp
[i
], shift
),
137 (qword
) spu_rlmaskqwbyte(tmp
[i
+ 1], shift
- 16));
144 * Fetch vertex attributes for 'count' vertices.
146 static void generic_vertex_fetch(struct spu_vs_context
*draw
,
147 struct spu_exec_machine
*machine
,
148 const unsigned *elts
,
151 unsigned nr_attrs
= draw
->vertex_fetch
.nr_attrs
;
157 printf("SPU: %s count = %u, nr_attrs = %u\n",
158 __FUNCTION__
, count
, nr_attrs
);
161 /* loop over vertex attributes (vertex shader inputs)
163 for (attr
= 0; attr
< nr_attrs
; attr
++) {
164 const unsigned pitch
= draw
->vertex_fetch
.pitch
[attr
];
165 const uint64_t src
= draw
->vertex_fetch
.src_ptr
[attr
];
166 const spu_fetch_func fetch
= (spu_fetch_func
)
167 (draw
->vertex_fetch
.code
+ draw
->vertex_fetch
.code_offset
[attr
]);
170 const unsigned bytes_per_entry
= draw
->vertex_fetch
.size
[attr
];
171 const unsigned quads_per_entry
= (bytes_per_entry
+ 15) / 16;
175 /* Fetch four attributes for four vertices.
178 for (i
= 0; i
< count
; i
++) {
179 const uint64_t addr
= src
+ (elts
[i
] * pitch
);
182 printf("SPU: fetching = 0x%llx\n", addr
);
185 fetch_unaligned(& in
[idx
], addr
, bytes_per_entry
);
186 idx
+= quads_per_entry
;
189 /* Be nice and zero out any missing vertices.
191 (void) memset(& in
[idx
], 0, (8 - idx
) * sizeof(qword
));
194 /* Convert all 4 vertices to vectors of float.
196 (*fetch
)(&machine
->Inputs
[attr
].xyzw
[0].q
, in
, fetch_shuffle_data
);
201 void spu_update_vertex_fetch( struct spu_vs_context
*draw
)
206 /* Invalidate the vertex cache.
208 for (i
= 0; i
< (CACHE_NWAY
* CACHE_NSETS
); i
++) {
209 CACHELINE_CLEARVALID(i
);
213 draw
->vertex_fetch
.fetch_func
= generic_vertex_fetch
;