gallium: fix refcount bug introduced in eb20e2984
[mesa.git] / src / gallium / drivers / cell / spu / spu_texture.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_compiler.h"
30 #include "spu_main.h"
31 #include "spu_texture.h"
32 #include "spu_tile.h"
33 #include "spu_colorpack.h"
34 #include "spu_dcache.h"
35
36
37 /**
38 * Mark all tex cache entries as invalid.
39 */
40 void
41 invalidate_tex_cache(void)
42 {
43 uint unit = 0;
44 uint bytes = 4 * spu.texture[unit].width
45 * spu.texture[unit].height;
46
47 spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
48 }
49
50
51 /**
52 * XXX look into getting texels for all four pixels in a quad at once.
53 */
54 static uint
55 get_texel(uint unit, vec_uint4 coordinate)
56 {
57 /*
58 * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as
59 * SIMD since X and Y are already in a SIMD register.
60 */
61 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
62 ushort x = spu_extract(coordinate, 0);
63 ushort y = spu_extract(coordinate, 1);
64 unsigned tile_offset = sizeof(tile_t)
65 * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE));
66 ushort texel_offset = (ushort) 4
67 * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE));
68 vec_uint4 tmp;
69
70 spu_dcache_fetch_unaligned((qword *) & tmp,
71 texture_ea + tile_offset + texel_offset,
72 4);
73 return spu_extract(tmp, 0);
74 }
75
76
77 /**
78 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
79 *
80 * NOTE: in the typical case of bilinear filtering, the four texels
81 * are in a 2x2 group so we could get by with just two dcache fetches
82 * (two side-by-side texels per fetch). But when bilinear filtering
83 * wraps around a texture edge, we'll probably need code like we have
84 * now.
85 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
86 * it's quite likely that the four pixels in a quad will need some of the
87 * same texels. So look into doing texture fetches for four pixels at
88 * a time.
89 */
90 static void
91 get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
92 {
93 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
94 vec_uint4 tile_x = spu_rlmask(x, -5);
95 vec_uint4 tile_y = spu_rlmask(y, -5);
96 const qword offset_x = si_andi((qword) x, 0x1f);
97 const qword offset_y = si_andi((qword) y, 0x1f);
98
99 const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
100 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
101
102 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
103 tile_offset = si_mpy((qword) tile_offset, tile_size);
104
105 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
106 texel_offset = si_mpyui(texel_offset, 4);
107
108 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
109
110 spu_dcache_fetch_unaligned((qword *) & texels[0],
111 texture_ea + spu_extract(offset, 0), 4);
112 spu_dcache_fetch_unaligned((qword *) & texels[1],
113 texture_ea + spu_extract(offset, 1), 4);
114 spu_dcache_fetch_unaligned((qword *) & texels[2],
115 texture_ea + spu_extract(offset, 2), 4);
116 spu_dcache_fetch_unaligned((qword *) & texels[3],
117 texture_ea + spu_extract(offset, 3), 4);
118 }
119
120
121 /**
122 * Get texture sample at texcoord.
123 */
124 vector float
125 sample_texture_nearest(uint unit, vector float texcoord)
126 {
127 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
128 vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */
129 itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */
130 uint texel = get_texel(unit, itc);
131 return spu_unpack_A8R8G8B8(texel);
132 }
133
134
135 vector float
136 sample_texture_bilinear(uint unit, vector float texcoord)
137 {
138 static const vec_uint4 offset_x = {0, 0, 1, 1};
139 static const vec_uint4 offset_y = {0, 1, 0, 1};
140
141 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
142 tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */
143
144 /* integer texcoords S,T: */
145 vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */
146
147 vec_uint4 texels[4];
148
149 /* setup texcoords for quad:
150 * +-----+-----+
151 * |x0,y0|x1,y1|
152 * +-----+-----+
153 * |x2,y2|x3,y3|
154 * +-----+-----+
155 */
156 vec_uint4 x = spu_splats(spu_extract(itc, 0));
157 vec_uint4 y = spu_splats(spu_extract(itc, 1));
158 x = spu_add(x, offset_x);
159 y = spu_add(y, offset_y);
160
161 /* GL_REPEAT wrap mode: */
162 x = spu_and(x, spu.texture[unit].tex_size_x_mask);
163 y = spu_and(y, spu.texture[unit].tex_size_y_mask);
164
165 get_four_texels(unit, x, y, texels);
166
167 /* integer A8R8G8B8 to float texel conversion */
168 vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
169 vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
170 vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
171 vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
172
173
174 /* Compute weighting factors in [0,1]
175 * Multiply texcoord by 1024, AND with 1023, convert back to float.
176 */
177 vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
178 vector signed int itc1024 = spu_convts(tc1024, 0);
179 itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
180 vector float weight = spu_convtf(itc1024, 10);
181
182 /* smeared frac and 1-frac */
183 vector float sfrac = spu_splats(spu_extract(weight, 0));
184 vector float tfrac = spu_splats(spu_extract(weight, 1));
185 vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
186 vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
187
188 /* multiply the samples (colors) by the S/T weights */
189 texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
190 texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
191 texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
192 texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
193
194 /* compute sum of weighted samples */
195 vector float texel_sum = spu_add(texel00, texel01);
196 texel_sum = spu_add(texel_sum, texel10);
197 texel_sum = spu_add(texel_sum, texel11);
198
199 return texel_sum;
200 }