Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
[mesa.git] / src / gallium / drivers / cell / spu / spu_texture.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_compiler.h"
30 #include "spu_main.h"
31 #include "spu_texture.h"
32 #include "spu_tile.h"
33 #include "spu_colorpack.h"
34 #include "spu_dcache.h"
35
36
37 /**
38 * Mark all tex cache entries as invalid.
39 */
40 void
41 invalidate_tex_cache(void)
42 {
43 uint unit = 0;
44 uint bytes = 4 * spu.texture[unit].width
45 * spu.texture[unit].height;
46
47 spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
48 }
49
50
51 static uint
52 get_texel(vec_uint4 coordinate)
53 {
54 const uint unit = 0;
55 vec_uint4 tmp;
56 unsigned x = spu_extract(coordinate, 0);
57 unsigned y = spu_extract(coordinate, 1);
58 const unsigned tiles_per_row = spu.texture[unit].width / TILE_SIZE;
59 unsigned tile_offset = sizeof(tile_t) * ((y / TILE_SIZE * tiles_per_row)
60 + (x / TILE_SIZE));
61 unsigned texel_offset = 4 * (((y % TILE_SIZE) * TILE_SIZE)
62 + (x % TILE_SIZE));
63
64 spu_dcache_fetch_unaligned((qword *) & tmp,
65 spu.texture[unit].start + tile_offset + texel_offset,
66 4);
67 return spu_extract(tmp, 0);
68 }
69
70
71 static void
72 get_four_texels(vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
73 {
74 const uint unit = 0;
75 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
76 vec_uint4 tile_x = spu_rlmask(x, -5);
77 vec_uint4 tile_y = spu_rlmask(y, -5);
78 const qword offset_x = si_andi((qword) x, 0x1f);
79 const qword offset_y = si_andi((qword) y, 0x1f);
80
81 const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].width / TILE_SIZE);
82 const qword tile_size = (qword) spu_splats(sizeof(tile_t));
83
84 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
85 tile_offset = si_mpy((qword) tile_offset, tile_size);
86
87 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
88 texel_offset = si_mpyui(texel_offset, 4);
89
90 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
91
92 spu_dcache_fetch_unaligned((qword *) & texels[0],
93 texture_ea + spu_extract(offset, 0), 4);
94 spu_dcache_fetch_unaligned((qword *) & texels[1],
95 texture_ea + spu_extract(offset, 1), 4);
96 spu_dcache_fetch_unaligned((qword *) & texels[2],
97 texture_ea + spu_extract(offset, 2), 4);
98 spu_dcache_fetch_unaligned((qword *) & texels[3],
99 texture_ea + spu_extract(offset, 3), 4);
100 }
101
102 /**
103 * Get texture sample at texcoord.
104 * XXX this is extremely primitive for now.
105 */
106 vector float
107 sample_texture_nearest(vector float texcoord)
108 {
109 const uint unit = 0;
110 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
111 vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */
112 itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */
113 uint texel = get_texel(itc);
114 return spu_unpack_A8R8G8B8(texel);
115 }
116
117
118 vector float
119 sample_texture_bilinear(vector float texcoord)
120 {
121 const uint unit = 0;
122 static const vec_uint4 offset_x = {0, 0, 1, 1};
123 static const vec_uint4 offset_y = {0, 1, 0, 1};
124
125 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
126 tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */
127
128 /* integer texcoords S,T: */
129 vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */
130
131 vec_uint4 texels[4];
132
133 vec_uint4 x = spu_splats(spu_extract(itc, 0));
134 vec_uint4 y = spu_splats(spu_extract(itc, 1));
135
136 x = spu_add(x, offset_x);
137 y = spu_add(y, offset_y);
138
139 x = spu_and(x, spu.texture[unit].tex_size_x_mask);
140 y = spu_and(y, spu.texture[unit].tex_size_y_mask);
141
142 get_four_texels(x, y, texels);
143
144 vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
145 vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
146 vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
147 vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
148
149
150 /* Compute weighting factors in [0,1]
151 * Multiply texcoord by 1024, AND with 1023, convert back to float.
152 */
153 vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
154 vector signed int itc1024 = spu_convts(tc1024, 0);
155 itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
156 vector float weight = spu_convtf(itc1024, 10);
157
158 /* smeared frac and 1-frac */
159 vector float sfrac = spu_splats(spu_extract(weight, 0));
160 vector float tfrac = spu_splats(spu_extract(weight, 1));
161 vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
162 vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
163
164 /* multiply the samples (colors) by the S/T weights */
165 texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
166 texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
167 texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
168 texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
169
170 /* compute sum of weighted samples */
171 vector float texel_sum = spu_add(texel00, texel01);
172 texel_sum = spu_add(texel_sum, texel10);
173 texel_sum = spu_add(texel_sum, texel11);
174
175 return texel_sum;
176 }