Code reorganization: move files into their places.
[mesa.git] / src / gallium / drivers / cell / spu / spu_texture.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_compiler.h"
30 #include "spu_main.h"
31 #include "spu_texture.h"
32 #include "spu_tile.h"
33 #include "spu_colorpack.h"
34
35
36 /**
37 * Number of texture tiles to cache.
38 * Note that this will probably be the largest consumer of SPU local store/
39 * memory for this driver!
40 */
41 #define CACHE_SIZE 16
42
43 static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB;
44
45 static vector unsigned int tex_tile_xy[CACHE_SIZE];
46
47
48
49 /**
50 * Mark all tex cache entries as invalid.
51 */
52 void
53 invalidate_tex_cache(void)
54 {
55 /* XXX memset? */
56 uint i;
57 for (i = 0; i < CACHE_SIZE; i++) {
58 tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U });
59 }
60 }
61
62
63 /**
64 * Return the cache pos/index which corresponds to tile (tx,ty)
65 */
66 static INLINE uint
67 cache_pos(vector unsigned int txty)
68 {
69 uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE;
70 return pos;
71 }
72
73
74 /**
75 * Make sure the tile for texel (i,j) is present, return its position/index
76 * in the cache.
77 */
78 static uint
79 get_tex_tile(vector unsigned int ij)
80 {
81 /* tile address: tx,ty */
82 const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */
83 const uint pos = cache_pos(txty);
84
85 if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) ||
86 (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) {
87
88 /* texture cache miss, fetch tile from main memory */
89 const uint tiles_per_row = spu.texture.width / TILE_SIZE;
90 const uint bytes_per_tile = sizeof(tile_t);
91 const void *src = (const ubyte *) spu.texture.start
92 + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile;
93
94 printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n",
95 spu.init.id,
96 spu_extract(txty,0),
97 spu_extract(txty,1),
98 pos,
99 spu_extract(tex_tile_xy[pos],0),
100 spu_extract(tex_tile_xy[pos],1));
101
102 ASSERT_ALIGN16(tex_tiles[pos].ui);
103 ASSERT_ALIGN16(src);
104
105 mfc_get(tex_tiles[pos].ui, /* dest */
106 (unsigned int) src,
107 bytes_per_tile, /* size */
108 TAG_TEXTURE_TILE,
109 0, /* tid */
110 0 /* rid */);
111
112 wait_on_mask(1 << TAG_TEXTURE_TILE);
113
114 tex_tile_xy[pos] = txty;
115 }
116 else {
117 #if 0
118 printf("SPU %u: tex cache HIT at %d, %d\n",
119 spu.init.id, tx, ty);
120 #endif
121 }
122
123 return pos;
124 }
125
126
127 /**
128 * Get texture sample at texcoord.
129 * XXX this is extremely primitive for now.
130 */
131 vector float
132 sample_texture_nearest(vector float texcoord)
133 {
134 vector float tc = spu_mul(texcoord, spu.tex_size);
135 vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */
136 itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */
137 vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */
138 uint pos = get_tex_tile(itc);
139 uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)];
140 return spu_unpack_A8R8G8B8(texel);
141 }
142
143
144 vector float
145 sample_texture_bilinear(vector float texcoord)
146 {
147 static const vector unsigned int offset10 = {1, 0, 0, 0};
148 static const vector unsigned int offset01 = {0, 1, 0, 0};
149
150 vector float tc = spu_mul(texcoord, spu.tex_size);
151 tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */
152
153 /* integer texcoords S,T: */
154 vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */
155 vector unsigned int itc01 = spu_add(itc00, offset01);
156 vector unsigned int itc10 = spu_add(itc00, offset10);
157 vector unsigned int itc11 = spu_add(itc10, offset01);
158
159 /* mask (GL_REPEAT) */
160 itc00 = spu_and(itc00, spu.tex_size_mask);
161 itc01 = spu_and(itc01, spu.tex_size_mask);
162 itc10 = spu_and(itc10, spu.tex_size_mask);
163 itc11 = spu_and(itc11, spu.tex_size_mask);
164
165 /* intra tile addr */
166 vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1);
167 vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1);
168 vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1);
169 vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1);
170
171 /* get tile cache positions */
172 uint pos00 = get_tex_tile(itc00);
173 uint pos01, pos10, pos11;
174 if ((spu_extract(ij00, 0) < TILE_SIZE-1) &&
175 (spu_extract(ij00, 1) < TILE_SIZE-1)) {
176 /* all texels are in the same tile */
177 pos01 = pos10 = pos11 = pos00;
178 }
179 else {
180 pos01 = get_tex_tile(itc01);
181 pos10 = get_tex_tile(itc10);
182 pos11 = get_tex_tile(itc11);
183 }
184
185 /* get texels from tiles and convert to float[4] */
186 vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]);
187 vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]);
188 vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]);
189 vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]);
190
191 /* Compute weighting factors in [0,1]
192 * Multiply texcoord by 1024, AND with 1023, convert back to float.
193 */
194 vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
195 vector signed int itc1024 = spu_convts(tc1024, 0);
196 itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
197 vector float weight = spu_convtf(itc1024, 10);
198
199 /* smeared frac and 1-frac */
200 vector float sfrac = spu_splats(spu_extract(weight, 0));
201 vector float tfrac = spu_splats(spu_extract(weight, 1));
202 vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
203 vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
204
205 /* multiply the samples (colors) by the S/T weights */
206 texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
207 texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
208 texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
209 texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
210
211 /* compute sum of weighted samples */
212 vector float texel_sum = spu_add(texel00, texel01);
213 texel_sum = spu_add(texel_sum, texel10);
214 texel_sum = spu_add(texel_sum, texel11);
215
216 return texel_sum;
217 }