**************************************************************************/
-#include <transpose_matrix4x4.h>
+#include <math.h>
#include "pipe/p_compiler.h"
#include "spu_main.h"
void
invalidate_tex_cache(void)
{
- uint unit = 0;
- uint bytes = 4 * spu.texture[unit].width
- * spu.texture[unit].height;
-
- spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
+ uint lvl;
+ for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
+ uint unit = 0;
+ uint bytes = 4 * spu.texture[unit].level[lvl].width
+ * spu.texture[unit].level[lvl].height;
+
+ if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
+ bytes *= 6;
+ else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
+ bytes *= spu.texture[unit].level[lvl].depth;
+
+ spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
+ }
}
* a time.
*/
static void
-get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
+get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y,
+ vec_uint4 *texels)
{
- const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
- vec_uint4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
- vec_uint4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ unsigned texture_ea = (uintptr_t) tlevel->start;
+ const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
+ const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
- const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
+ const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
+ texture_ea = texture_ea + face * tlevel->bytes_per_image;
+
spu_dcache_fetch_unaligned((qword *) & texels[0],
texture_ea + spu_extract(offset, 0), 4);
spu_dcache_fetch_unaligned((qword *) & texels[1],
}
+/** clamp vec to [0, max] */
+static INLINE vector signed int
+spu_clamp(vector signed int vec, vector signed int max)
+{
+ static const vector signed int zero = {0,0,0,0};
+ vector unsigned int c;
+ c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
+ vec = spu_sel(zero, vec, c);
+ c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
+ vec = spu_sel(vec, max, c);
+ return vec;
+}
+
+
/**
* Do nearest texture sampling for four pixels.
* \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
*/
void
-sample_texture4_nearest(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, vector float colors[4])
+sample_texture_2d_nearest(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
- vector float ss = spu_mul(s, spu.texture[unit].width4);
- vector float tt = spu_mul(t, spu.texture[unit].height4);
- vector unsigned int is = spu_convtu(ss, 0);
- vector unsigned int it = spu_convtu(tt, 0);
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ vector float ss = spu_mul(s, tlevel->scale_s);
+ vector float tt = spu_mul(t, tlevel->scale_t);
+ vector signed int is = spu_convts(ss, 0);
+ vector signed int it = spu_convts(tt, 0);
vec_uint4 texels[4];
/* PIPE_TEX_WRAP_REPEAT */
- is = spu_and(is, spu.texture[unit].tex_size_x_mask);
- it = spu_and(it, spu.texture[unit].tex_size_y_mask);
+ is = spu_and(is, tlevel->mask_s);
+ it = spu_and(it, tlevel->mask_t);
- get_four_texels(unit, is, it, texels);
+ /* PIPE_TEX_WRAP_CLAMP */
+ is = spu_clamp(is, tlevel->max_s);
+ it = spu_clamp(it, tlevel->max_t);
+
+ get_four_texels(unit, level, face, is, it, texels);
/* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
spu_unpack_A8R8G8B8_transpose4(texels, colors);
* \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
*/
void
-sample_texture4_bilinear(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, vector float colors[4])
+sample_texture_2d_bilinear(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
- vector float ss = spu_madd(s, spu.texture[unit].width4, spu_splats(-0.5f));
- vector float tt = spu_madd(t, spu.texture[unit].height4, spu_splats(-0.5f));
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
+ static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
- vector unsigned int is0 = spu_convtu(ss, 0);
- vector unsigned int it0 = spu_convtu(tt, 0);
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
+
+ vector signed int is0 = spu_convts(ss, 0);
+ vector signed int it0 = spu_convts(tt, 0);
/* is + 1, it + 1 */
- vector unsigned int is1 = spu_add(is0, 1);
- vector unsigned int it1 = spu_add(it0, 1);
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
/* PIPE_TEX_WRAP_REPEAT */
- is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask);
- it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask);
- is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask);
- it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask);
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
/* get packed int texels */
vector unsigned int texels[16];
- get_four_texels(unit, is0, it0, texels + 0); /* upper-left */
- get_four_texels(unit, is1, it0, texels + 4); /* upper-right */
- get_four_texels(unit, is0, it1, texels + 8); /* lower-left */
- get_four_texels(unit, is1, it1, texels + 12); /* lower-right */
-
- /* XXX possibly rework following code to compute the weighted sample
- * colors with integer arithmetic for fewer int->float conversions.
- */
+ get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
/* convert packed int texels to float colors */
vector float ftexels[16];
/**
- * Bilinear filtering, using int intead of float arithmetic
+ * Bilinear filtering, using int instead of float arithmetic for computing
+ * sample weights.
*/
void
-sample_texture4_bilinear_2(vector float s, vector float t,
- vector float r, vector float q,
- uint unit, vector float colors[4])
+sample_texture_2d_bilinear_int(vector float s, vector float t,
+ uint unit, uint level, uint face,
+ vector float colors[4])
{
+ const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
+
/* Scale texcoords by size of texture, and add half pixel bias */
- vector float ss = spu_madd(s, spu.texture[unit].width4, half);
- vector float tt = spu_madd(t, spu.texture[unit].height4, half);
+ vector float ss = spu_madd(s, tlevel->scale_s, half);
+ vector float tt = spu_madd(t, tlevel->scale_t, half);
/* convert float coords to fixed-pt coords with 8 fraction bits */
- vector unsigned int is = spu_convtu(ss, 8);
- vector unsigned int it = spu_convtu(tt, 8);
+ vector signed int is = spu_convts(ss, 8);
+ vector signed int it = spu_convts(tt, 8);
/* compute integer texel weights in [0, 255] */
- vector signed int sWeights0 = spu_and((vector signed int) is, 255);
- vector signed int tWeights0 = spu_and((vector signed int) it, 255);
+ vector signed int sWeights0 = spu_and(is, 255);
+ vector signed int tWeights0 = spu_and(it, 255);
vector signed int sWeights1 = spu_sub(255, sWeights0);
vector signed int tWeights1 = spu_sub(255, tWeights0);
/* texel coords: is0 = is / 256, it0 = is / 256 */
- vector unsigned int is0 = spu_rlmask(is, -8);
- vector unsigned int it0 = spu_rlmask(it, -8);
+ vector signed int is0 = spu_rlmask(is, -8);
+ vector signed int it0 = spu_rlmask(it, -8);
/* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
- vector unsigned int is1 = spu_add(is0, 1);
- vector unsigned int it1 = spu_add(it0, 1);
+ vector signed int is1 = spu_add(is0, 1);
+ vector signed int it1 = spu_add(it0, 1);
/* PIPE_TEX_WRAP_REPEAT */
- is0 = spu_and(is0, spu.texture[unit].tex_size_x_mask);
- it0 = spu_and(it0, spu.texture[unit].tex_size_y_mask);
- is1 = spu_and(is1, spu.texture[unit].tex_size_x_mask);
- it1 = spu_and(it1, spu.texture[unit].tex_size_y_mask);
+ is0 = spu_and(is0, tlevel->mask_s);
+ it0 = spu_and(it0, tlevel->mask_t);
+ is1 = spu_and(is1, tlevel->mask_s);
+ it1 = spu_and(it1, tlevel->mask_t);
+
+ /* PIPE_TEX_WRAP_CLAMP */
+ is0 = spu_clamp(is0, tlevel->max_s);
+ it0 = spu_clamp(it0, tlevel->max_t);
+ is1 = spu_clamp(is1, tlevel->max_s);
+ it1 = spu_clamp(it1, tlevel->max_t);
/* get packed int texels */
vector unsigned int texels[16];
- get_four_texels(unit, is0, it0, texels + 0); /* upper-left */
- get_four_texels(unit, is1, it0, texels + 4); /* upper-right */
- get_four_texels(unit, is0, it1, texels + 8); /* lower-left */
- get_four_texels(unit, is1, it1, texels + 12); /* lower-right */
+ get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
+ get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
+ get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
+ get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
/* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
{
cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
colors[3] = spu_convtf(cSum, 24);
}
+
+
+
+/**
+ * Compute level of detail factor from texcoords.
+ */
+static float
+compute_lambda(uint unit, vector float s, vector float t)
+{
+ uint baseLevel = 0;
+ float width = spu.texture[unit].level[baseLevel].width;
+ float height = spu.texture[unit].level[baseLevel].width;
+ float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
+ float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
+ float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
+ float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
+ float x = dsdx * dsdx + dtdx * dtdx;
+ float y = dsdy * dsdy + dtdy * dtdy;
+ float rho = x > y ? x : y;
+ rho = sqrtf(rho);
+ float lambda = logf(rho) * 1.442695f;
+ return lambda;
+}
+
+
+
+/**
+ * Texture sampling with level of detail selection.
+ */
+void
+sample_texture_2d_lod(vector float s, vector float t,
+ uint unit, uint level_ignored, uint face,
+ vector float colors[4])
+{
+ /*
+ * Note that we're computing a lambda/lod here that's used for all
+ * four pixels in the quad.
+ */
+ float lambda = compute_lambda(unit, s, t);
+
+ (void) face;
+ (void) level_ignored;
+
+ /* apply lod bias */
+ lambda += spu.sampler[unit].lod_bias;
+
+ /* clamp */
+ if (lambda < spu.sampler[unit].min_lod)
+ lambda = spu.sampler[unit].min_lod;
+ else if (lambda > spu.sampler[unit].max_lod)
+ lambda = spu.sampler[unit].max_lod;
+
+ if (lambda <= 0.0f) {
+ /* magnify */
+ spu.mag_sample_texture_2d[unit](s, t, unit, 0, 0, colors);
+ }
+ else {
+ /* minify */
+ int level = (int) (lambda + 0.5f);
+ if (level > (int) spu.texture[unit].max_level)
+ level = spu.texture[unit].max_level;
+ spu.min_sample_texture_2d[unit](s, t, unit, level, 0, colors);
+ /* XXX to do: mipmap level interpolation */
+ }
+}
+
+
+/** XXX need a SIMD version of this */
+static unsigned
+choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
+{
+ /*
+ major axis
+ direction target sc tc ma
+ ---------- ------------------------------- --- --- ---
+ +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
+ -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
+ +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
+ -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
+ +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
+ -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
+ */
+ const float arx = fabsf(rx);
+ const float ary = fabsf(ry);
+ const float arz = fabsf(rz);
+ unsigned face;
+ float sc, tc, ma;
+
+ if (arx > ary && arx > arz) {
+ if (rx >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_X;
+ sc = -rz;
+ tc = -ry;
+ ma = arx;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_X;
+ sc = rz;
+ tc = -ry;
+ ma = arx;
+ }
+ }
+ else if (ary > arx && ary > arz) {
+ if (ry >= 0.0F) {
+ face = PIPE_TEX_FACE_POS_Y;
+ sc = rx;
+ tc = rz;
+ ma = ary;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Y;
+ sc = rx;
+ tc = -rz;
+ ma = ary;
+ }
+ }
+ else {
+ if (rz > 0.0F) {
+ face = PIPE_TEX_FACE_POS_Z;
+ sc = rx;
+ tc = -ry;
+ ma = arz;
+ }
+ else {
+ face = PIPE_TEX_FACE_NEG_Z;
+ sc = -rx;
+ tc = -ry;
+ ma = arz;
+ }
+ }
+
+ *newS = (sc / ma + 1.0F) * 0.5F;
+ *newT = (tc / ma + 1.0F) * 0.5F;
+
+ return face;
+}
+
+
+
+void
+sample_texture_cube(vector float s, vector float t, vector float r,
+ uint unit, vector float colors[4])
+{
+ uint p, faces[4], level = 0;
+ float newS[4], newT[4];
+
+ /* Compute cube faces referenced by the four sets of texcoords.
+ * XXX we should SIMD-ize this.
+ */
+ for (p = 0; p < 4; p++) {
+ float rx = spu_extract(s, p);
+ float ry = spu_extract(t, p);
+ float rz = spu_extract(r, p);
+ faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
+ }
+
+ if (faces[0] == faces[1] &&
+ faces[0] == faces[2] &&
+ faces[0] == faces[3]) {
+ /* GOOD! All four texcoords refer to the same cube face */
+ s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
+ t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
+ spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
+ }
+ else {
+ /* BAD! The four texcoords refer to different faces */
+ for (p = 0; p < 4; p++) {
+ vector float c[4];
+
+ spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
+ unit, level, faces[p], c);
+
+ float red = spu_extract(c[0], p);
+ float green = spu_extract(c[1], p);
+ float blue = spu_extract(c[2], p);
+ float alpha = spu_extract(c[3], p);
+
+ colors[0] = spu_insert(red, colors[0], p);
+ colors[1] = spu_insert(green, colors[1], p);
+ colors[2] = spu_insert(blue, colors[2], p);
+ colors[3] = spu_insert(alpha, colors[3], p);
+ }
+ }
+}