src/gallium/drivers/llvmpipe/lp_tile_image.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.  All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the
   7  * "Software"), to deal in the Software without restriction, including
   8  * without limitation the rights to use, copy, modify, merge, publish,
   9  * distribute, sub license, and/or sell copies of the Software, and to
  10  * permit persons to whom the Software is furnished to do so, subject to
  11  * the following conditions:
  12  *
  13  * The above copyright notice and this permission notice (including the
  14  * next paragraph) shall be included in all copies or substantial portions
  15  * of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  20  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
  21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24  *
  25  **************************************************************************/
  26
  27
  28 /**
  29  * Code to convert images from tiled to linear and back.
  30  * XXX there are quite a few assumptions about color and z/stencil being
  31  * 32bpp.
  32  */
  33
  34
  35 #include "util/u_format.h"
  36 #include "lp_tile_soa.h"
  37 #include "lp_tile_image.h"
  38
  39
  40 #define BYTES_PER_TILE (TILE_SIZE * TILE_SIZE * 4)
  41
  42
  43 /**
  44  * Untile a 4x4 block of 32-bit words (all contiguous) to linear layout
  45  * at dst, with dst_stride words between rows.
  46  */
  47 static void
  48 untile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned dst_stride)
  49 {
  50    uint32_t *d0 = dst;
  51    uint32_t *d1 = d0 + dst_stride;
  52    uint32_t *d2 = d1 + dst_stride;
  53    uint32_t *d3 = d2 + dst_stride;
  54
  55    d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
  56    d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
  57    d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
  58    d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
  59 }
  60
  61
  62
  63 /**
  64  * Untile a 4x4 block of 16-bit words (all contiguous) to linear layout
  65  * at dst, with dst_stride words between rows.
  66  */
  67 static void
  68 untile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned dst_stride)
  69 {
  70    uint16_t *d0 = dst;
  71    uint16_t *d1 = d0 + dst_stride;
  72    uint16_t *d2 = d1 + dst_stride;
  73    uint16_t *d3 = d2 + dst_stride;
  74
  75    d0[0] = src[0];   d0[1] = src[1];   d0[2] = src[4];   d0[3] = src[5];
  76    d1[0] = src[2];   d1[1] = src[3];   d1[2] = src[6];   d1[3] = src[7];
  77    d2[0] = src[8];   d2[1] = src[9];   d2[2] = src[12];  d2[3] = src[13];
  78    d3[0] = src[10];  d3[1] = src[11];  d3[2] = src[14];  d3[3] = src[15];
  79 }
  80
  81
  82
  83 /**
  84  * Convert a 4x4 rect of 32-bit words from a linear layout into tiled
  85  * layout (in which all 16 words are contiguous).
  86  */
  87 static void
  88 tile_4_4_uint32(const uint32_t *src, uint32_t *dst, unsigned src_stride)
  89 {
  90    const uint32_t *s0 = src;
  91    const uint32_t *s1 = s0 + src_stride;
  92    const uint32_t *s2 = s1 + src_stride;
  93    const uint32_t *s3 = s2 + src_stride;
  94
  95    dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
  96    dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
  97    dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
  98    dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
  99 }
 100
 101
 102
 103 /**
 104  * Convert a 4x4 rect of 16-bit words from a linear layout into tiled
 105  * layout (in which all 16 words are contiguous).
 106  */
 107 static void
 108 tile_4_4_uint16(const uint16_t *src, uint16_t *dst, unsigned src_stride)
 109 {
 110    const uint16_t *s0 = src;
 111    const uint16_t *s1 = s0 + src_stride;
 112    const uint16_t *s2 = s1 + src_stride;
 113    const uint16_t *s3 = s2 + src_stride;
 114
 115    dst[0] = s0[0];   dst[1] = s0[1];   dst[4] = s0[2];   dst[5] = s0[3];
 116    dst[2] = s1[0];   dst[3] = s1[1];   dst[6] = s1[2];   dst[7] = s1[3];
 117    dst[8] = s2[0];   dst[9] = s2[1];   dst[12] = s2[2];  dst[13] = s2[3];
 118    dst[10] = s3[0];  dst[11] = s3[1];  dst[14] = s3[2];  dst[15] = s3[3];
 119 }
 120
 121
 122
 123 /**
 124  * Convert a tiled image into a linear image.
 125  * \param src_stride  source row stride in bytes (bytes per row of tiles)
 126  * \param dst_stride  dest row stride in bytes
 127  */
 128 void
 129 lp_tiled_to_linear(const void *src, void *dst,
 130                    unsigned x, unsigned y,
 131                    unsigned width, unsigned height,
 132                    enum pipe_format format, unsigned dst_stride)
 133 {
 134    assert(x % TILE_SIZE == 0);
 135    assert(y % TILE_SIZE == 0);
 136    /*assert(width % TILE_SIZE == 0);
 137      assert(height % TILE_SIZE == 0);*/
 138
 139    /* Note that Z/stencil surfaces use a different tiling size than
 140     * color surfaces.
 141     */
 142    if (util_format_is_depth_or_stencil(format)) {
 143       const uint bpp = util_format_get_blocksize(format);
 144       const uint src_stride = dst_stride * TILE_VECTOR_WIDTH;
 145       const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
 146       const uint tiles_per_row = src_stride / (tile_w * tile_h * bpp);
 147
 148       dst_stride /= bpp;   /* convert from bytes to words */
 149
 150       if (bpp == 4) {
 151          const uint32_t *src32 = (const uint32_t *) src;
 152          uint32_t *dst32 = (uint32_t *) dst;
 153          uint i, j;
 154
 155          for (j = 0; j < height; j += tile_h) {
 156             for (i = 0; i < width; i += tile_w) {
 157                /* compute offsets in 32-bit words */
 158                uint ii = i + x, jj = j + y;
 159                uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
 160                   * (tile_w * tile_h);
 161                uint dst_offset = jj * dst_stride + ii;
 162                untile_4_4_uint32(src32 + src_offset,
 163                                  dst32 + dst_offset,
 164                                  dst_stride);
 165             }
 166          }
 167       }
 168       else {
 169          const uint16_t *src16 = (const uint16_t *) src;
 170          uint16_t *dst16 = (uint16_t *) dst;
 171          uint i, j;
 172
 173          assert(bpp == 2);
 174
 175          for (j = 0; j < height; j += tile_h) {
 176             for (i = 0; i < width; i += tile_w) {
 177                /* compute offsets in 16-bit words */
 178                uint ii = i + x, jj = j + y;
 179                uint src_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
 180                   * (tile_w * tile_h);
 181                uint dst_offset = jj * dst_stride + ii;
 182                untile_4_4_uint16(src16 + src_offset,
 183                                  dst16 + dst_offset,
 184                                  dst_stride);
 185             }
 186          }
 187       }
 188    }
 189    else {
 190       /* color image */
 191       const uint bpp = 4;
 192       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
 193       const uint bytes_per_tile = tile_w * tile_h * bpp;
 194       const uint src_stride = dst_stride * tile_w;
 195       const uint tiles_per_row = src_stride / bytes_per_tile;
 196       uint i, j;
 197
 198       for (j = 0; j < height; j += tile_h) {
 199          for (i = 0; i < width; i += tile_w) {
 200             uint ii = i + x, jj = j + y;
 201             uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
 202             uint byte_offset = tile_offset * bytes_per_tile;
 203             const uint8_t *src_tile = (uint8_t *) src + byte_offset;
 204
 205             lp_tile_write_4ub(format,
 206                               src_tile,
 207                               dst, dst_stride,
 208                               ii, jj, tile_w, tile_h);
 209          }
 210       }
 211    }
 212 }
 213
 214
 215 /**
 216  * Convert a linear image into a tiled image.
 217  * \param src_stride  source row stride in bytes
 218  * \param dst_stride  dest row stride in bytes (bytes per row of tiles)
 219  */
 220 void
 221 lp_linear_to_tiled(const void *src, void *dst,
 222                    unsigned x, unsigned y,
 223                    unsigned width, unsigned height,
 224                    enum pipe_format format, unsigned src_stride)
 225 {
 226    assert(x % TILE_SIZE == 0);
 227    assert(y % TILE_SIZE == 0);
 228    /*
 229    assert(width % TILE_SIZE == 0);
 230    assert(height % TILE_SIZE == 0);
 231    */
 232
 233    if (util_format_is_depth_or_stencil(format)) {
 234       const uint bpp = util_format_get_blocksize(format);
 235       const uint dst_stride = src_stride * TILE_VECTOR_WIDTH;
 236       const uint tile_w = TILE_VECTOR_WIDTH, tile_h = TILE_VECTOR_HEIGHT;
 237       const uint tiles_per_row = dst_stride / (tile_w * tile_h * bpp);
 238
 239       src_stride /= bpp;   /* convert from bytes to words */
 240
 241       if (bpp == 4) {
 242          const uint32_t *src32 = (const uint32_t *) src;
 243          uint32_t *dst32 = (uint32_t *) dst;
 244          uint i, j;
 245
 246          for (j = 0; j < height; j += tile_h) {
 247             for (i = 0; i < width; i += tile_w) {
 248                /* compute offsets in 32-bit words */
 249                uint ii = i + x, jj = j + y;
 250                uint src_offset = jj * src_stride + ii;
 251                uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
 252                   * (tile_w * tile_h);
 253                tile_4_4_uint32(src32 + src_offset,
 254                                dst32 + dst_offset,
 255                                src_stride);
 256             }
 257          }
 258       }
 259       else {
 260          const uint16_t *src16 = (const uint16_t *) src;
 261          uint16_t *dst16 = (uint16_t *) dst;
 262          uint i, j;
 263
 264          assert(bpp == 2);
 265
 266          for (j = 0; j < height; j += tile_h) {
 267             for (i = 0; i < width; i += tile_w) {
 268                /* compute offsets in 16-bit words */
 269                uint ii = i + x, jj = j + y;
 270                uint src_offset = jj * src_stride + ii;
 271                uint dst_offset = (jj / tile_h * tiles_per_row + ii / tile_w)
 272                   * (tile_w * tile_h);
 273                tile_4_4_uint16(src16 + src_offset,
 274                                dst16 + dst_offset,
 275                                src_stride);
 276             }
 277          }
 278       }
 279    }
 280    else {
 281       const uint bpp = 4;
 282       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
 283       const uint bytes_per_tile = tile_w * tile_h * bpp;
 284       const uint dst_stride = src_stride * tile_w;
 285       const uint tiles_per_row = dst_stride / bytes_per_tile;
 286       uint i, j;
 287
 288       for (j = 0; j < height; j += TILE_SIZE) {
 289          for (i = 0; i < width; i += TILE_SIZE) {
 290             uint ii = i + x, jj = j + y;
 291             uint tile_offset = ((jj / tile_h) * tiles_per_row + ii / tile_w);
 292             uint byte_offset = tile_offset * bytes_per_tile;
 293             uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
 294
 295             lp_tile_read_4ub(format,
 296                              dst_tile,
 297                              src, src_stride,
 298                              ii, jj, tile_w, tile_h);
 299          }
 300       }
 301    }
 302 }
 303
 304
 305 /**
 306  * For testing only.
 307  */
 308 void
 309 test_tiled_linear_conversion(void *data,
 310                              enum pipe_format format,
 311                              unsigned width, unsigned height,
 312                              unsigned stride)
 313 {
 314    /* size in tiles */
 315    unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
 316    unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
 317
 318    uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
 319
 320    /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
 321
 322    lp_linear_to_tiled(data, tiled, 0, 0, width, height, format,
 323                       stride);
 324
 325    lp_tiled_to_linear(tiled, data, 0, 0, width, height, format,
 326                       stride);
 327
 328    free(tiled);
 329 }
 330