Cell: trivial clean-ups
[mesa.git] / src / gallium / drivers / cell / spu / spu_vertex_fetch.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * (C) Copyright IBM Corporation 2008
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /*
30 * Authors:
31 * Keith Whitwell <keith@tungstengraphics.com>
32 * Ian Romanick <idr@us.ibm.com>
33 */
34
35 #include <spu_mfcio.h>
36
37 #include "pipe/p_util.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_shader_tokens.h"
40 #include "spu_exec.h"
41 #include "spu_vertex_shader.h"
42 #include "spu_main.h"
43
44 #define CACHE_NAME attribute
45 #define CACHED_TYPE qword
46 #define CACHE_TYPE CACHE_TYPE_RO
47 #define CACHE_SET_TAGID(set) TAG_VERTEX_BUFFER
48 #define CACHE_LOG2NNWAY 2
49 #define CACHE_LOG2NSETS 6
50 #include <cache-api.h>
51
52 /* Yes folks, this is ugly.
53 */
54 #undef CACHE_NWAY
55 #undef CACHE_NSETS
56 #define CACHE_NAME attribute
57 #define CACHE_NWAY 4
58 #define CACHE_NSETS (1U << 6)
59
60
61 #define DRAW_DBG 0
62
63 typedef void (*spu_fetch_func)(qword *out, const qword *in,
64 const qword *shuffle_data);
65
66
67 static const qword fetch_shuffle_data[] = {
68 /* Shuffle used by CVT_64_FLOAT
69 */
70 {
71 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
72 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
73 },
74
75 /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
76 */
77 {
78 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
79 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
80 },
81
82 /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
83 */
84 {
85 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
86 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
87 },
88
89 /* High value shuffle used by trans4x4.
90 */
91 {
92 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
93 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
94 },
95
96 /* Low value shuffle used by trans4x4.
97 */
98 {
99 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
100 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
101 }
102 };
103
104
105 /**
106 * Fetch between 1 and 32 bytes from an unaligned address
107 */
108 static INLINE void
109 fetch_unaligned(qword *dst, unsigned ea, unsigned size)
110 {
111 qword tmp[4];
112 const int shift = ea & 0x0f;
113 const unsigned aligned_start_ea = ea & ~0x0f;
114 const unsigned aligned_end_ea = (ea + size) & ~0x0f;
115 const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1;
116 unsigned i;
117
118
119 if (shift == 0) {
120 /* Data is already aligned. Fetch directly into the destination buffer.
121 */
122 for (i = 0; i < num_entries; i++) {
123 dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
124 }
125 } else {
126 /* Fetch data from the cache to the local buffer.
127 */
128 for (i = 0; i < num_entries; i++) {
129 tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
130 }
131
132
133 /* Fix the alignment of the data and write to the destination buffer.
134 */
135 for (i = 0; i < ((size + 15) / 16); i++) {
136 dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift),
137 (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16));
138 }
139 }
140 }
141
142
143 /**
144 * Fetch vertex attributes for 'count' vertices.
145 */
146 static void generic_vertex_fetch(struct spu_vs_context *draw,
147 struct spu_exec_machine *machine,
148 const unsigned *elts,
149 unsigned count)
150 {
151 unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
152 unsigned attr;
153
154 assert(count <= 4);
155
156 #if DRAW_DBG
157 printf("SPU: %s count = %u, nr_attrs = %u\n",
158 __FUNCTION__, count, nr_attrs);
159 #endif
160
161 /* loop over vertex attributes (vertex shader inputs)
162 */
163 for (attr = 0; attr < nr_attrs; attr++) {
164 const unsigned pitch = draw->vertex_fetch.pitch[attr];
165 const uint64_t src = draw->vertex_fetch.src_ptr[attr];
166 const spu_fetch_func fetch = (spu_fetch_func)
167 (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]);
168 unsigned i;
169 unsigned idx;
170 const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
171 const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
172 qword in[2 * 4];
173
174
175 /* Fetch four attributes for four vertices.
176 */
177 idx = 0;
178 for (i = 0; i < count; i++) {
179 const uint64_t addr = src + (elts[i] * pitch);
180
181 #if DRAW_DBG
182 printf("SPU: fetching = 0x%llx\n", addr);
183 #endif
184
185 fetch_unaligned(& in[idx], addr, bytes_per_entry);
186 idx += quads_per_entry;
187 }
188
189 /* Be nice and zero out any missing vertices.
190 */
191 (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
192
193
194 /* Convert all 4 vertices to vectors of float.
195 */
196 (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data);
197 }
198 }
199
200
201 void spu_update_vertex_fetch( struct spu_vs_context *draw )
202 {
203 unsigned i;
204
205
206 /* Invalidate the vertex cache.
207 */
208 for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
209 CACHELINE_CLEARVALID(i);
210 }
211
212
213 draw->vertex_fetch.fetch_func = generic_vertex_fetch;
214 }