src/gallium/drivers/cell/spu/spu_texture.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "pipe/p_compiler.h"
  30 #include "spu_main.h"
  31 #include "spu_texture.h"
  32 #include "spu_tile.h"
  33 #include "spu_colorpack.h"
  34 #include "spu_dcache.h"
  35
  36
  37 /**
  38  * Mark all tex cache entries as invalid.
  39  */
  40 void
  41 invalidate_tex_cache(void)
  42 {
  43    uint unit = 0;
  44    uint bytes = 4 * spu.texture[unit].width
  45       * spu.texture[unit].height;
  46
  47    spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
  48 }
  49
  50
  51 /**
  52  * XXX look into getting texels for all four pixels in a quad at once.
  53  */
  54 static uint
  55 get_texel(uint unit, vec_uint4 coordinate)
  56 {
  57    /*
  58     * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as
  59     * SIMD since X and Y are already in a SIMD register.
  60     */
  61    const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
  62    ushort x = spu_extract(coordinate, 0);
  63    ushort y = spu_extract(coordinate, 1);
  64    unsigned tile_offset = sizeof(tile_t)
  65       * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE));
  66    ushort texel_offset = (ushort) 4
  67       * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE));
  68    vec_uint4 tmp;
  69
  70    spu_dcache_fetch_unaligned((qword *) & tmp,
  71                               texture_ea + tile_offset + texel_offset,
  72                               4);
  73    return spu_extract(tmp, 0);
  74 }
  75
  76
  77 /**
  78  * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
  79  *
  80  * NOTE: in the typical case of bilinear filtering, the four texels
  81  * are in a 2x2 group so we could get by with just two dcache fetches
  82  * (two side-by-side texels per fetch).  But when bilinear filtering
  83  * wraps around a texture edge, we'll probably need code like we have
  84  * now.
  85  * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
  86  * it's quite likely that the four pixels in a quad will need some of the
  87  * same texels.  So look into doing texture fetches for four pixels at
  88  * a time.
  89  */
  90 static void
  91 get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
  92 {
  93    const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
  94    vec_uint4 tile_x = spu_rlmask(x, -5);
  95    vec_uint4 tile_y = spu_rlmask(y, -5);
  96    const qword offset_x = si_andi((qword) x, 0x1f);
  97    const qword offset_y = si_andi((qword) y, 0x1f);
  98
  99    const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
 100    const qword tile_size = (qword) spu_splats(sizeof(tile_t));
 101
 102    qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
 103    tile_offset = si_mpy((qword) tile_offset, tile_size);
 104
 105    qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
 106    texel_offset = si_mpyui(texel_offset, 4);
 107
 108    vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
 109
 110    spu_dcache_fetch_unaligned((qword *) & texels[0],
 111                               texture_ea + spu_extract(offset, 0), 4);
 112    spu_dcache_fetch_unaligned((qword *) & texels[1],
 113                               texture_ea + spu_extract(offset, 1), 4);
 114    spu_dcache_fetch_unaligned((qword *) & texels[2],
 115                               texture_ea + spu_extract(offset, 2), 4);
 116    spu_dcache_fetch_unaligned((qword *) & texels[3],
 117                               texture_ea + spu_extract(offset, 3), 4);
 118 }
 119
 120
 121 /**
 122  * Get texture sample at texcoord.
 123  */
 124 vector float
 125 sample_texture_nearest(uint unit, vector float texcoord)
 126 {
 127    vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
 128    vector unsigned int itc = spu_convtu(tc, 0);  /* convert to int */
 129    itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */
 130    uint texel = get_texel(unit, itc);
 131    return spu_unpack_A8R8G8B8(texel);
 132 }
 133
 134
 135 vector float
 136 sample_texture_bilinear(uint unit, vector float texcoord)
 137 {
 138    static const vec_uint4 offset_x = {0, 0, 1, 1};
 139    static const vec_uint4 offset_y = {0, 1, 0, 1};
 140
 141    vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
 142    tc = spu_add(tc, spu_splats(-0.5f));  /* half texel bias */
 143
 144    /* integer texcoords S,T: */
 145    vec_uint4 itc = spu_convtu(tc, 0);  /* convert to int */
 146
 147    vec_uint4 texels[4];
 148
 149    /* setup texcoords for quad:
 150     *  +-----+-----+
 151     *  |x0,y0|x1,y1|
 152     *  +-----+-----+
 153     *  |x2,y2|x3,y3|
 154     *  +-----+-----+
 155     */
 156    vec_uint4 x = spu_splats(spu_extract(itc, 0));
 157    vec_uint4 y = spu_splats(spu_extract(itc, 1));
 158    x = spu_add(x, offset_x);
 159    y = spu_add(y, offset_y);
 160
 161    /* GL_REPEAT wrap mode: */
 162    x = spu_and(x, spu.texture[unit].tex_size_x_mask);
 163    y = spu_and(y, spu.texture[unit].tex_size_y_mask);
 164
 165    get_four_texels(unit, x, y, texels);
 166
 167    /* integer A8R8G8B8 to float texel conversion */
 168    vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
 169    vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
 170    vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
 171    vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
 172
 173
 174    /* Compute weighting factors in [0,1]
 175     * Multiply texcoord by 1024, AND with 1023, convert back to float.
 176     */
 177    vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
 178    vector signed int itc1024 = spu_convts(tc1024, 0);
 179    itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
 180    vector float weight = spu_convtf(itc1024, 10);
 181
 182    /* smeared frac and 1-frac */
 183    vector float sfrac = spu_splats(spu_extract(weight, 0));
 184    vector float tfrac = spu_splats(spu_extract(weight, 1));
 185    vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
 186    vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
 187
 188    /* multiply the samples (colors) by the S/T weights */
 189    texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
 190    texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
 191    texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
 192    texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
 193
 194    /* compute sum of weighted samples */
 195    vector float texel_sum = spu_add(texel00, texel01);
 196    texel_sum = spu_add(texel_sum, texel10);
 197    texel_sum = spu_add(texel_sum, texel11);
 198
 199    return texel_sum;
 200 }