1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_compiler.h"
31 #include "spu_texture.h"
33 #include "spu_colorpack.h"
34 #include "spu_dcache.h"
38 * Mark all tex cache entries as invalid.
41 invalidate_tex_cache(void)
44 uint bytes
= 4 * spu
.texture
[unit
].width
45 * spu
.texture
[unit
].height
;
47 spu_dcache_mark_dirty((unsigned) spu
.texture
[unit
].start
, bytes
);
52 * XXX look into getting texels for all four pixels in a quad at once.
55 get_texel(uint unit
, vec_uint4 coordinate
)
58 * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as
59 * SIMD since X and Y are already in a SIMD register.
61 const unsigned texture_ea
= (uintptr_t) spu
.texture
[unit
].start
;
62 ushort x
= spu_extract(coordinate
, 0);
63 ushort y
= spu_extract(coordinate
, 1);
64 unsigned tile_offset
= sizeof(tile_t
)
65 * ((y
/ TILE_SIZE
* spu
.texture
[unit
].tiles_per_row
) + (x
/ TILE_SIZE
));
66 ushort texel_offset
= (ushort
) 4
67 * (ushort
) (((ushort
) (y
% TILE_SIZE
) * (ushort
) TILE_SIZE
) + (x
% TILE_SIZE
));
70 spu_dcache_fetch_unaligned((qword
*) & tmp
,
71 texture_ea
+ tile_offset
+ texel_offset
,
73 return spu_extract(tmp
, 0);
78 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
80 * NOTE: in the typical case of bilinear filtering, the four texels
81 * are in a 2x2 group so we could get by with just two dcache fetches
82 * (two side-by-side texels per fetch). But when bilinear filtering
83 * wraps around a texture edge, we'll probably need code like we have
85 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
86 * it's quite likely that the four pixels in a quad will need some of the
87 * same texels. So look into doing texture fetches for four pixels at
91 get_four_texels(uint unit
, vec_uint4 x
, vec_uint4 y
, vec_uint4
*texels
)
93 const unsigned texture_ea
= (uintptr_t) spu
.texture
[unit
].start
;
94 vec_uint4 tile_x
= spu_rlmask(x
, -5);
95 vec_uint4 tile_y
= spu_rlmask(y
, -5);
96 const qword offset_x
= si_andi((qword
) x
, 0x1f);
97 const qword offset_y
= si_andi((qword
) y
, 0x1f);
99 const qword tiles_per_row
= (qword
) spu_splats(spu
.texture
[unit
].tiles_per_row
);
100 const qword tile_size
= (qword
) spu_splats(sizeof(tile_t
));
102 qword tile_offset
= si_mpya((qword
) tile_y
, tiles_per_row
, (qword
) tile_x
);
103 tile_offset
= si_mpy((qword
) tile_offset
, tile_size
);
105 qword texel_offset
= si_a(si_mpyui(offset_y
, 32), offset_x
);
106 texel_offset
= si_mpyui(texel_offset
, 4);
108 vec_uint4 offset
= (vec_uint4
) si_a(tile_offset
, texel_offset
);
110 spu_dcache_fetch_unaligned((qword
*) & texels
[0],
111 texture_ea
+ spu_extract(offset
, 0), 4);
112 spu_dcache_fetch_unaligned((qword
*) & texels
[1],
113 texture_ea
+ spu_extract(offset
, 1), 4);
114 spu_dcache_fetch_unaligned((qword
*) & texels
[2],
115 texture_ea
+ spu_extract(offset
, 2), 4);
116 spu_dcache_fetch_unaligned((qword
*) & texels
[3],
117 texture_ea
+ spu_extract(offset
, 3), 4);
122 * Get texture sample at texcoord.
125 sample_texture_nearest(uint unit
, vector
float texcoord
)
127 vector
float tc
= spu_mul(texcoord
, spu
.texture
[unit
].tex_size
);
128 vector
unsigned int itc
= spu_convtu(tc
, 0); /* convert to int */
129 itc
= spu_and(itc
, spu
.texture
[unit
].tex_size_mask
); /* mask (GL_REPEAT) */
130 uint texel
= get_texel(unit
, itc
);
131 return spu_unpack_A8R8G8B8(texel
);
136 sample_texture_bilinear(uint unit
, vector
float texcoord
)
138 static const vec_uint4 offset_x
= {0, 0, 1, 1};
139 static const vec_uint4 offset_y
= {0, 1, 0, 1};
141 vector
float tc
= spu_mul(texcoord
, spu
.texture
[unit
].tex_size
);
142 tc
= spu_add(tc
, spu_splats(-0.5f
)); /* half texel bias */
144 /* integer texcoords S,T: */
145 vec_uint4 itc
= spu_convtu(tc
, 0); /* convert to int */
149 /* setup texcoords for quad:
156 vec_uint4 x
= spu_splats(spu_extract(itc
, 0));
157 vec_uint4 y
= spu_splats(spu_extract(itc
, 1));
158 x
= spu_add(x
, offset_x
);
159 y
= spu_add(y
, offset_y
);
161 /* GL_REPEAT wrap mode: */
162 x
= spu_and(x
, spu
.texture
[unit
].tex_size_x_mask
);
163 y
= spu_and(y
, spu
.texture
[unit
].tex_size_y_mask
);
165 get_four_texels(unit
, x
, y
, texels
);
167 /* integer A8R8G8B8 to float texel conversion */
168 vector
float texel00
= spu_unpack_A8R8G8B8(spu_extract(texels
[0], 0));
169 vector
float texel01
= spu_unpack_A8R8G8B8(spu_extract(texels
[1], 0));
170 vector
float texel10
= spu_unpack_A8R8G8B8(spu_extract(texels
[2], 0));
171 vector
float texel11
= spu_unpack_A8R8G8B8(spu_extract(texels
[3], 0));
174 /* Compute weighting factors in [0,1]
175 * Multiply texcoord by 1024, AND with 1023, convert back to float.
177 vector
float tc1024
= spu_mul(tc
, spu_splats(1024.0f
));
178 vector
signed int itc1024
= spu_convts(tc1024
, 0);
179 itc1024
= spu_and(itc1024
, spu_splats((1 << 10) - 1));
180 vector
float weight
= spu_convtf(itc1024
, 10);
182 /* smeared frac and 1-frac */
183 vector
float sfrac
= spu_splats(spu_extract(weight
, 0));
184 vector
float tfrac
= spu_splats(spu_extract(weight
, 1));
185 vector
float sfrac1
= spu_sub(spu_splats(1.0f
), sfrac
);
186 vector
float tfrac1
= spu_sub(spu_splats(1.0f
), tfrac
);
188 /* multiply the samples (colors) by the S/T weights */
189 texel00
= spu_mul(spu_mul(texel00
, sfrac1
), tfrac1
);
190 texel10
= spu_mul(spu_mul(texel10
, sfrac
), tfrac1
);
191 texel01
= spu_mul(spu_mul(texel01
, sfrac1
), tfrac
);
192 texel11
= spu_mul(spu_mul(texel11
, sfrac
), tfrac
);
194 /* compute sum of weighted samples */
195 vector
float texel_sum
= spu_add(texel00
, texel01
);
196 texel_sum
= spu_add(texel_sum
, texel10
);
197 texel_sum
= spu_add(texel_sum
, texel11
);