1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_compiler.h"
31 #include "spu_texture.h"
33 #include "spu_colorpack.h"
37 * Number of texture tiles to cache.
38 * Note that this will probably be the largest consumer of SPU local store/
39 * memory for this driver!
43 static tile_t tex_tiles
[CACHE_SIZE
] ALIGN16_ATTRIB
;
45 static vector
unsigned int tex_tile_xy
[CACHE_SIZE
];
50 * Mark all tex cache entries as invalid.
53 invalidate_tex_cache(void)
57 for (i
= 0; i
< CACHE_SIZE
; i
++) {
58 tex_tile_xy
[i
] = ((vector
unsigned int) { ~0U, ~0U, ~0U, ~0U });
64 * Return the cache pos/index which corresponds to tile (tx,ty)
67 cache_pos(vector
unsigned int txty
)
69 uint pos
= (spu_extract(txty
,0) + spu_extract(txty
,1) * 4) % CACHE_SIZE
;
75 * Make sure the tile for texel (i,j) is present, return its position/index
79 get_tex_tile(vector
unsigned int ij
)
81 /* tile address: tx,ty */
82 const vector
unsigned int txty
= spu_rlmask(ij
, -5); /* divide by 32 */
83 const uint pos
= cache_pos(txty
);
85 if ((spu_extract(tex_tile_xy
[pos
], 0) != spu_extract(txty
, 0)) ||
86 (spu_extract(tex_tile_xy
[pos
], 1) != spu_extract(txty
, 1))) {
88 /* texture cache miss, fetch tile from main memory */
89 const uint tiles_per_row
= spu
.texture
.width
/ TILE_SIZE
;
90 const uint bytes_per_tile
= sizeof(tile_t
);
91 const void *src
= (const ubyte
*) spu
.texture
.start
92 + (spu_extract(txty
,1) * tiles_per_row
+ spu_extract(txty
,0)) * bytes_per_tile
;
94 printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n",
99 spu_extract(tex_tile_xy
[pos
],0),
100 spu_extract(tex_tile_xy
[pos
],1));
102 ASSERT_ALIGN16(tex_tiles
[pos
].ui
);
105 mfc_get(tex_tiles
[pos
].ui
, /* dest */
107 bytes_per_tile
, /* size */
112 wait_on_mask(1 << TAG_TEXTURE_TILE
);
114 tex_tile_xy
[pos
] = txty
;
118 printf("SPU %u: tex cache HIT at %d, %d\n",
119 spu
.init
.id
, tx
, ty
);
128 * Get texture sample at texcoord.
129 * XXX this is extremely primitive for now.
132 sample_texture_nearest(vector
float texcoord
)
134 vector
float tc
= spu_mul(texcoord
, spu
.tex_size
);
135 vector
unsigned int itc
= spu_convtu(tc
, 0); /* convert to int */
136 itc
= spu_and(itc
, spu
.tex_size_mask
); /* mask (GL_REPEAT) */
137 vector
unsigned int ij
= spu_and(itc
, TILE_SIZE
-1); /* intra tile addr */
138 uint pos
= get_tex_tile(itc
);
139 uint texel
= tex_tiles
[pos
].ui
[spu_extract(ij
, 1)][spu_extract(ij
, 0)];
140 return spu_unpack_A8R8G8B8(texel
);
145 sample_texture_bilinear(vector
float texcoord
)
147 static const vector
unsigned int offset10
= {1, 0, 0, 0};
148 static const vector
unsigned int offset01
= {0, 1, 0, 0};
150 vector
float tc
= spu_mul(texcoord
, spu
.tex_size
);
151 tc
= spu_add(tc
, spu_splats(-0.5f
)); /* half texel bias */
153 /* integer texcoords S,T: */
154 vector
unsigned int itc00
= spu_convtu(tc
, 0); /* convert to int */
155 vector
unsigned int itc01
= spu_add(itc00
, offset01
);
156 vector
unsigned int itc10
= spu_add(itc00
, offset10
);
157 vector
unsigned int itc11
= spu_add(itc10
, offset01
);
159 /* mask (GL_REPEAT) */
160 itc00
= spu_and(itc00
, spu
.tex_size_mask
);
161 itc01
= spu_and(itc01
, spu
.tex_size_mask
);
162 itc10
= spu_and(itc10
, spu
.tex_size_mask
);
163 itc11
= spu_and(itc11
, spu
.tex_size_mask
);
165 /* intra tile addr */
166 vector
unsigned int ij00
= spu_and(itc00
, TILE_SIZE
-1);
167 vector
unsigned int ij01
= spu_and(itc01
, TILE_SIZE
-1);
168 vector
unsigned int ij10
= spu_and(itc10
, TILE_SIZE
-1);
169 vector
unsigned int ij11
= spu_and(itc11
, TILE_SIZE
-1);
171 /* get tile cache positions */
172 uint pos00
= get_tex_tile(itc00
);
173 uint pos01
, pos10
, pos11
;
174 if ((spu_extract(ij00
, 0) < TILE_SIZE
-1) &&
175 (spu_extract(ij00
, 1) < TILE_SIZE
-1)) {
176 /* all texels are in the same tile */
177 pos01
= pos10
= pos11
= pos00
;
180 pos01
= get_tex_tile(itc01
);
181 pos10
= get_tex_tile(itc10
);
182 pos11
= get_tex_tile(itc11
);
185 /* get texels from tiles and convert to float[4] */
186 vector
float texel00
= spu_unpack_A8R8G8B8(tex_tiles
[pos00
].ui
[spu_extract(ij00
, 1)][spu_extract(ij00
, 0)]);
187 vector
float texel01
= spu_unpack_A8R8G8B8(tex_tiles
[pos01
].ui
[spu_extract(ij01
, 1)][spu_extract(ij01
, 0)]);
188 vector
float texel10
= spu_unpack_A8R8G8B8(tex_tiles
[pos10
].ui
[spu_extract(ij10
, 1)][spu_extract(ij10
, 0)]);
189 vector
float texel11
= spu_unpack_A8R8G8B8(tex_tiles
[pos11
].ui
[spu_extract(ij11
, 1)][spu_extract(ij11
, 0)]);
191 /* Compute weighting factors in [0,1]
192 * Multiply texcoord by 1024, AND with 1023, convert back to float.
194 vector
float tc1024
= spu_mul(tc
, spu_splats(1024.0f
));
195 vector
signed int itc1024
= spu_convts(tc1024
, 0);
196 itc1024
= spu_and(itc1024
, spu_splats((1 << 10) - 1));
197 vector
float weight
= spu_convtf(itc1024
, 10);
199 /* smeared frac and 1-frac */
200 vector
float sfrac
= spu_splats(spu_extract(weight
, 0));
201 vector
float tfrac
= spu_splats(spu_extract(weight
, 1));
202 vector
float sfrac1
= spu_sub(spu_splats(1.0f
), sfrac
);
203 vector
float tfrac1
= spu_sub(spu_splats(1.0f
), tfrac
);
205 /* multiply the samples (colors) by the S/T weights */
206 texel00
= spu_mul(spu_mul(texel00
, sfrac1
), tfrac1
);
207 texel10
= spu_mul(spu_mul(texel10
, sfrac
), tfrac1
);
208 texel01
= spu_mul(spu_mul(texel01
, sfrac1
), tfrac
);
209 texel11
= spu_mul(spu_mul(texel11
, sfrac
), tfrac
);
211 /* compute sum of weighted samples */
212 vector
float texel_sum
= spu_add(texel00
, texel01
);
213 texel_sum
= spu_add(texel_sum
, texel10
);
214 texel_sum
= spu_add(texel_sum
, texel11
);