src/gallium/auxiliary/util/u_gen_mipmap.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008  VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * @file
  31  * Mipmap generation utility
  32  *
  33  * @author Brian Paul
  34  */
  35
  36
  37 #include "pipe/p_context.h"
  38 #include "util/u_debug.h"
  39 #include "pipe/p_defines.h"
  40 #include "pipe/p_inlines.h"
  41 #include "pipe/p_shader_tokens.h"
  42 #include "pipe/p_state.h"
  43
  44 #include "util/u_memory.h"
  45 #include "util/u_draw_quad.h"
  46 #include "util/u_gen_mipmap.h"
  47 #include "util/u_simple_shaders.h"
  48
  49 #include "cso_cache/cso_context.h"
  50
  51
  52 struct gen_mipmap_state
  53 {
  54    struct pipe_context *pipe;
  55    struct cso_context *cso;
  56
  57    struct pipe_blend_state blend;
  58    struct pipe_depth_stencil_alpha_state depthstencil;
  59    struct pipe_rasterizer_state rasterizer;
  60    struct pipe_sampler_state sampler;
  61
  62    void *vs;
  63    void *fs;
  64
  65    struct pipe_buffer *vbuf;  /**< quad vertices */
  66    unsigned vbuf_slot;
  67
  68    float vertices[4][2][4];   /**< vertex/texcoords for quad */
  69 };
  70
  71
  72
  73 enum dtype
  74 {
  75    DTYPE_UBYTE,
  76    DTYPE_UBYTE_3_3_2,
  77    DTYPE_USHORT,
  78    DTYPE_USHORT_4_4_4_4,
  79    DTYPE_USHORT_5_6_5,
  80    DTYPE_USHORT_1_5_5_5_REV,
  81    DTYPE_UINT,
  82    DTYPE_FLOAT,
  83    DTYPE_HALF_FLOAT
  84 };
  85
  86
  87 typedef ushort half_float;
  88
  89
  90 static half_float
  91 float_to_half(float f)
  92 {
  93    /* XXX fix this */
  94    return 0;
  95 }
  96
  97 static float
  98 half_to_float(half_float h)
  99 {
 100    /* XXX fix this */
 101    return 0.0f;
 102 }
 103
 104
 105
 106
 107 /**
 108  * \name Support macros for do_row and do_row_3d
 109  *
 110  * The macro madness is here for two reasons.  First, it compacts the code
 111  * slightly.  Second, it makes it much easier to adjust the specifics of the
 112  * filter to tune the rounding characteristics.
 113  */
 114 /*@{*/
 115 #define DECLARE_ROW_POINTERS(t, e) \
 116       const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
 117       const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
 118       const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
 119       const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
 120       t(*dst)[e] = (t(*)[e]) dstRow
 121
 122 #define DECLARE_ROW_POINTERS0(t) \
 123       const t *rowA = (const t *) srcRowA; \
 124       const t *rowB = (const t *) srcRowB; \
 125       const t *rowC = (const t *) srcRowC; \
 126       const t *rowD = (const t *) srcRowD; \
 127       t *dst = (t *) dstRow
 128
 129 #define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
 130    ((unsigned) Aj + (unsigned) Ak \
 131     + (unsigned) Bj + (unsigned) Bk \
 132     + (unsigned) Cj + (unsigned) Ck \
 133     + (unsigned) Dj + (unsigned) Dk \
 134     + 4) >> 3
 135
 136 #define FILTER_3D(e) \
 137    do { \
 138       dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
 139                                 rowB[j][e], rowB[k][e], \
 140                                 rowC[j][e], rowC[k][e], \
 141                                 rowD[j][e], rowD[k][e]); \
 142    } while(0)
 143
 144 #define FILTER_F_3D(e) \
 145    do { \
 146       dst[i][e] = (rowA[j][e] + rowA[k][e] \
 147                    + rowB[j][e] + rowB[k][e] \
 148                    + rowC[j][e] + rowC[k][e] \
 149                    + rowD[j][e] + rowD[k][e]) * 0.125F; \
 150    } while(0)
 151
 152 #define FILTER_HF_3D(e) \
 153    do { \
 154       const float aj = half_to_float(rowA[j][e]); \
 155       const float ak = half_to_float(rowA[k][e]); \
 156       const float bj = half_to_float(rowB[j][e]); \
 157       const float bk = half_to_float(rowB[k][e]); \
 158       const float cj = half_to_float(rowC[j][e]); \
 159       const float ck = half_to_float(rowC[k][e]); \
 160       const float dj = half_to_float(rowD[j][e]); \
 161       const float dk = half_to_float(rowD[k][e]); \
 162       dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
 163                                       * 0.125F); \
 164    } while(0)
 165 /*@}*/
 166
 167
 168 /**
 169  * Average together two rows of a source image to produce a single new
 170  * row in the dest image.  It's legal for the two source rows to point
 171  * to the same data.  The source width must be equal to either the
 172  * dest width or two times the dest width.
 173  * \param datatype  GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc.
 174  * \param comps  number of components per pixel (1..4)
 175  */
 176 static void
 177 do_row(enum dtype datatype, uint comps, int srcWidth,
 178        const void *srcRowA, const void *srcRowB,
 179        int dstWidth, void *dstRow)
 180 {
 181    const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
 182    const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
 183
 184    assert(comps >= 1);
 185    assert(comps <= 4);
 186
 187    /* This assertion is no longer valid with non-power-of-2 textures
 188    assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth);
 189    */
 190
 191    if (datatype == DTYPE_UBYTE && comps == 4) {
 192       uint i, j, k;
 193       const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA;
 194       const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB;
 195       ubyte(*dst)[4] = (ubyte(*)[4]) dstRow;
 196       for (i = j = 0, k = k0; i < (uint) dstWidth;
 197            i++, j += colStride, k += colStride) {
 198          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 199          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 200          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 201          dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
 202       }
 203    }
 204    else if (datatype == DTYPE_UBYTE && comps == 3) {
 205       uint i, j, k;
 206       const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA;
 207       const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB;
 208       ubyte(*dst)[3] = (ubyte(*)[3]) dstRow;
 209       for (i = j = 0, k = k0; i < (uint) dstWidth;
 210            i++, j += colStride, k += colStride) {
 211          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 212          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 213          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 214       }
 215    }
 216    else if (datatype == DTYPE_UBYTE && comps == 2) {
 217       uint i, j, k;
 218       const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA;
 219       const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB;
 220       ubyte(*dst)[2] = (ubyte(*)[2]) dstRow;
 221       for (i = j = 0, k = k0; i < (uint) dstWidth;
 222            i++, j += colStride, k += colStride) {
 223          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2;
 224          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2;
 225       }
 226    }
 227    else if (datatype == DTYPE_UBYTE && comps == 1) {
 228       uint i, j, k;
 229       const ubyte *rowA = (const ubyte *) srcRowA;
 230       const ubyte *rowB = (const ubyte *) srcRowB;
 231       ubyte *dst = (ubyte *) dstRow;
 232       for (i = j = 0, k = k0; i < (uint) dstWidth;
 233            i++, j += colStride, k += colStride) {
 234          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2;
 235       }
 236    }
 237
 238    else if (datatype == DTYPE_USHORT && comps == 4) {
 239       uint i, j, k;
 240       const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA;
 241       const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB;
 242       ushort(*dst)[4] = (ushort(*)[4]) dstRow;
 243       for (i = j = 0, k = k0; i < (uint) dstWidth;
 244            i++, j += colStride, k += colStride) {
 245          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 246          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 247          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 248          dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
 249       }
 250    }
 251    else if (datatype == DTYPE_USHORT && comps == 3) {
 252       uint i, j, k;
 253       const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA;
 254       const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB;
 255       ushort(*dst)[3] = (ushort(*)[3]) dstRow;
 256       for (i = j = 0, k = k0; i < (uint) dstWidth;
 257            i++, j += colStride, k += colStride) {
 258          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 259          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 260          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 261       }
 262    }
 263    else if (datatype == DTYPE_USHORT && comps == 2) {
 264       uint i, j, k;
 265       const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA;
 266       const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB;
 267       ushort(*dst)[2] = (ushort(*)[2]) dstRow;
 268       for (i = j = 0, k = k0; i < (uint) dstWidth;
 269            i++, j += colStride, k += colStride) {
 270          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 271          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 272       }
 273    }
 274    else if (datatype == DTYPE_USHORT && comps == 1) {
 275       uint i, j, k;
 276       const ushort *rowA = (const ushort *) srcRowA;
 277       const ushort *rowB = (const ushort *) srcRowB;
 278       ushort *dst = (ushort *) dstRow;
 279       for (i = j = 0, k = k0; i < (uint) dstWidth;
 280            i++, j += colStride, k += colStride) {
 281          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4;
 282       }
 283    }
 284
 285    else if (datatype == DTYPE_FLOAT && comps == 4) {
 286       uint i, j, k;
 287       const float(*rowA)[4] = (const float(*)[4]) srcRowA;
 288       const float(*rowB)[4] = (const float(*)[4]) srcRowB;
 289       float(*dst)[4] = (float(*)[4]) dstRow;
 290       for (i = j = 0, k = k0; i < (uint) dstWidth;
 291            i++, j += colStride, k += colStride) {
 292          dst[i][0] = (rowA[j][0] + rowA[k][0] +
 293                       rowB[j][0] + rowB[k][0]) * 0.25F;
 294          dst[i][1] = (rowA[j][1] + rowA[k][1] +
 295                       rowB[j][1] + rowB[k][1]) * 0.25F;
 296          dst[i][2] = (rowA[j][2] + rowA[k][2] +
 297                       rowB[j][2] + rowB[k][2]) * 0.25F;
 298          dst[i][3] = (rowA[j][3] + rowA[k][3] +
 299                       rowB[j][3] + rowB[k][3]) * 0.25F;
 300       }
 301    }
 302    else if (datatype == DTYPE_FLOAT && comps == 3) {
 303       uint i, j, k;
 304       const float(*rowA)[3] = (const float(*)[3]) srcRowA;
 305       const float(*rowB)[3] = (const float(*)[3]) srcRowB;
 306       float(*dst)[3] = (float(*)[3]) dstRow;
 307       for (i = j = 0, k = k0; i < (uint) dstWidth;
 308            i++, j += colStride, k += colStride) {
 309          dst[i][0] = (rowA[j][0] + rowA[k][0] +
 310                       rowB[j][0] + rowB[k][0]) * 0.25F;
 311          dst[i][1] = (rowA[j][1] + rowA[k][1] +
 312                       rowB[j][1] + rowB[k][1]) * 0.25F;
 313          dst[i][2] = (rowA[j][2] + rowA[k][2] +
 314                       rowB[j][2] + rowB[k][2]) * 0.25F;
 315       }
 316    }
 317    else if (datatype == DTYPE_FLOAT && comps == 2) {
 318       uint i, j, k;
 319       const float(*rowA)[2] = (const float(*)[2]) srcRowA;
 320       const float(*rowB)[2] = (const float(*)[2]) srcRowB;
 321       float(*dst)[2] = (float(*)[2]) dstRow;
 322       for (i = j = 0, k = k0; i < (uint) dstWidth;
 323            i++, j += colStride, k += colStride) {
 324          dst[i][0] = (rowA[j][0] + rowA[k][0] +
 325                       rowB[j][0] + rowB[k][0]) * 0.25F;
 326          dst[i][1] = (rowA[j][1] + rowA[k][1] +
 327                       rowB[j][1] + rowB[k][1]) * 0.25F;
 328       }
 329    }
 330    else if (datatype == DTYPE_FLOAT && comps == 1) {
 331       uint i, j, k;
 332       const float *rowA = (const float *) srcRowA;
 333       const float *rowB = (const float *) srcRowB;
 334       float *dst = (float *) dstRow;
 335       for (i = j = 0, k = k0; i < (uint) dstWidth;
 336            i++, j += colStride, k += colStride) {
 337          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F;
 338       }
 339    }
 340
 341 #if 0
 342    else if (datatype == HALF_DTYPE_FLOAT && comps == 4) {
 343       uint i, j, k, comp;
 344       const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA;
 345       const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB;
 346       half_float(*dst)[4] = (half_float(*)[4]) dstRow;
 347       for (i = j = 0, k = k0; i < (uint) dstWidth;
 348            i++, j += colStride, k += colStride) {
 349          for (comp = 0; comp < 4; comp++) {
 350             float aj, ak, bj, bk;
 351             aj = half_to_float(rowA[j][comp]);
 352             ak = half_to_float(rowA[k][comp]);
 353             bj = half_to_float(rowB[j][comp]);
 354             bk = half_to_float(rowB[k][comp]);
 355             dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
 356          }
 357       }
 358    }
 359    else if (datatype == DTYPE_HALF_FLOAT && comps == 3) {
 360       uint i, j, k, comp;
 361       const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA;
 362       const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB;
 363       half_float(*dst)[3] = (half_float(*)[3]) dstRow;
 364       for (i = j = 0, k = k0; i < (uint) dstWidth;
 365            i++, j += colStride, k += colStride) {
 366          for (comp = 0; comp < 3; comp++) {
 367             float aj, ak, bj, bk;
 368             aj = half_to_float(rowA[j][comp]);
 369             ak = half_to_float(rowA[k][comp]);
 370             bj = half_to_float(rowB[j][comp]);
 371             bk = half_to_float(rowB[k][comp]);
 372             dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
 373          }
 374       }
 375    }
 376    else if (datatype == DTYPE_HALF_FLOAT && comps == 2) {
 377       uint i, j, k, comp;
 378       const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA;
 379       const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB;
 380       half_float(*dst)[2] = (half_float(*)[2]) dstRow;
 381       for (i = j = 0, k = k0; i < (uint) dstWidth;
 382            i++, j += colStride, k += colStride) {
 383          for (comp = 0; comp < 2; comp++) {
 384             float aj, ak, bj, bk;
 385             aj = half_to_float(rowA[j][comp]);
 386             ak = half_to_float(rowA[k][comp]);
 387             bj = half_to_float(rowB[j][comp]);
 388             bk = half_to_float(rowB[k][comp]);
 389             dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
 390          }
 391       }
 392    }
 393    else if (datatype == DTYPE_HALF_FLOAT && comps == 1) {
 394       uint i, j, k;
 395       const half_float *rowA = (const half_float *) srcRowA;
 396       const half_float *rowB = (const half_float *) srcRowB;
 397       half_float *dst = (half_float *) dstRow;
 398       for (i = j = 0, k = k0; i < (uint) dstWidth;
 399            i++, j += colStride, k += colStride) {
 400          float aj, ak, bj, bk;
 401          aj = half_to_float(rowA[j]);
 402          ak = half_to_float(rowA[k]);
 403          bj = half_to_float(rowB[j]);
 404          bk = half_to_float(rowB[k]);
 405          dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F);
 406       }
 407    }
 408 #endif
 409
 410    else if (datatype == DTYPE_UINT && comps == 1) {
 411       uint i, j, k;
 412       const uint *rowA = (const uint *) srcRowA;
 413       const uint *rowB = (const uint *) srcRowB;
 414       uint *dst = (uint *) dstRow;
 415       for (i = j = 0, k = k0; i < (uint) dstWidth;
 416            i++, j += colStride, k += colStride) {
 417          dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4;
 418       }
 419    }
 420
 421    else if (datatype == DTYPE_USHORT_5_6_5 && comps == 3) {
 422       uint i, j, k;
 423       const ushort *rowA = (const ushort *) srcRowA;
 424       const ushort *rowB = (const ushort *) srcRowB;
 425       ushort *dst = (ushort *) dstRow;
 426       for (i = j = 0, k = k0; i < (uint) dstWidth;
 427            i++, j += colStride, k += colStride) {
 428          const int rowAr0 = rowA[j] & 0x1f;
 429          const int rowAr1 = rowA[k] & 0x1f;
 430          const int rowBr0 = rowB[j] & 0x1f;
 431          const int rowBr1 = rowB[k] & 0x1f;
 432          const int rowAg0 = (rowA[j] >> 5) & 0x3f;
 433          const int rowAg1 = (rowA[k] >> 5) & 0x3f;
 434          const int rowBg0 = (rowB[j] >> 5) & 0x3f;
 435          const int rowBg1 = (rowB[k] >> 5) & 0x3f;
 436          const int rowAb0 = (rowA[j] >> 11) & 0x1f;
 437          const int rowAb1 = (rowA[k] >> 11) & 0x1f;
 438          const int rowBb0 = (rowB[j] >> 11) & 0x1f;
 439          const int rowBb1 = (rowB[k] >> 11) & 0x1f;
 440          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 441          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 442          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 443          dst[i] = (blue << 11) | (green << 5) | red;
 444       }
 445    }
 446    else if (datatype == DTYPE_USHORT_4_4_4_4 && comps == 4) {
 447       uint i, j, k;
 448       const ushort *rowA = (const ushort *) srcRowA;
 449       const ushort *rowB = (const ushort *) srcRowB;
 450       ushort *dst = (ushort *) dstRow;
 451       for (i = j = 0, k = k0; i < (uint) dstWidth;
 452            i++, j += colStride, k += colStride) {
 453          const int rowAr0 = rowA[j] & 0xf;
 454          const int rowAr1 = rowA[k] & 0xf;
 455          const int rowBr0 = rowB[j] & 0xf;
 456          const int rowBr1 = rowB[k] & 0xf;
 457          const int rowAg0 = (rowA[j] >> 4) & 0xf;
 458          const int rowAg1 = (rowA[k] >> 4) & 0xf;
 459          const int rowBg0 = (rowB[j] >> 4) & 0xf;
 460          const int rowBg1 = (rowB[k] >> 4) & 0xf;
 461          const int rowAb0 = (rowA[j] >> 8) & 0xf;
 462          const int rowAb1 = (rowA[k] >> 8) & 0xf;
 463          const int rowBb0 = (rowB[j] >> 8) & 0xf;
 464          const int rowBb1 = (rowB[k] >> 8) & 0xf;
 465          const int rowAa0 = (rowA[j] >> 12) & 0xf;
 466          const int rowAa1 = (rowA[k] >> 12) & 0xf;
 467          const int rowBa0 = (rowB[j] >> 12) & 0xf;
 468          const int rowBa1 = (rowB[k] >> 12) & 0xf;
 469          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 470          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 471          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 472          const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
 473          dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red;
 474       }
 475    }
 476    else if (datatype == DTYPE_USHORT_1_5_5_5_REV && comps == 4) {
 477       uint i, j, k;
 478       const ushort *rowA = (const ushort *) srcRowA;
 479       const ushort *rowB = (const ushort *) srcRowB;
 480       ushort *dst = (ushort *) dstRow;
 481       for (i = j = 0, k = k0; i < (uint) dstWidth;
 482            i++, j += colStride, k += colStride) {
 483          const int rowAr0 = rowA[j] & 0x1f;
 484          const int rowAr1 = rowA[k] & 0x1f;
 485          const int rowBr0 = rowB[j] & 0x1f;
 486          const int rowBr1 = rowB[k] & 0x1f;
 487          const int rowAg0 = (rowA[j] >> 5) & 0x1f;
 488          const int rowAg1 = (rowA[k] >> 5) & 0x1f;
 489          const int rowBg0 = (rowB[j] >> 5) & 0x1f;
 490          const int rowBg1 = (rowB[k] >> 5) & 0x1f;
 491          const int rowAb0 = (rowA[j] >> 10) & 0x1f;
 492          const int rowAb1 = (rowA[k] >> 10) & 0x1f;
 493          const int rowBb0 = (rowB[j] >> 10) & 0x1f;
 494          const int rowBb1 = (rowB[k] >> 10) & 0x1f;
 495          const int rowAa0 = (rowA[j] >> 15) & 0x1;
 496          const int rowAa1 = (rowA[k] >> 15) & 0x1;
 497          const int rowBa0 = (rowB[j] >> 15) & 0x1;
 498          const int rowBa1 = (rowB[k] >> 15) & 0x1;
 499          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 500          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 501          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 502          const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
 503          dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red;
 504       }
 505    }
 506    else if (datatype == DTYPE_UBYTE_3_3_2 && comps == 3) {
 507       uint i, j, k;
 508       const ubyte *rowA = (const ubyte *) srcRowA;
 509       const ubyte *rowB = (const ubyte *) srcRowB;
 510       ubyte *dst = (ubyte *) dstRow;
 511       for (i = j = 0, k = k0; i < (uint) dstWidth;
 512            i++, j += colStride, k += colStride) {
 513          const int rowAr0 = rowA[j] & 0x3;
 514          const int rowAr1 = rowA[k] & 0x3;
 515          const int rowBr0 = rowB[j] & 0x3;
 516          const int rowBr1 = rowB[k] & 0x3;
 517          const int rowAg0 = (rowA[j] >> 2) & 0x7;
 518          const int rowAg1 = (rowA[k] >> 2) & 0x7;
 519          const int rowBg0 = (rowB[j] >> 2) & 0x7;
 520          const int rowBg1 = (rowB[k] >> 2) & 0x7;
 521          const int rowAb0 = (rowA[j] >> 5) & 0x7;
 522          const int rowAb1 = (rowA[k] >> 5) & 0x7;
 523          const int rowBb0 = (rowB[j] >> 5) & 0x7;
 524          const int rowBb1 = (rowB[k] >> 5) & 0x7;
 525          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 526          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 527          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 528          dst[i] = (blue << 5) | (green << 2) | red;
 529       }
 530    }
 531    else {
 532       debug_printf("bad format in do_row()");
 533    }
 534 }
 535
 536
 537 /**
 538  * Average together four rows of a source image to produce a single new
 539  * row in the dest image.  It's legal for the two source rows to point
 540  * to the same data.  The source width must be equal to either the
 541  * dest width or two times the dest width.
 542  *
 543  * \param datatype  GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
 544  *                  \c GL_FLOAT, etc.
 545  * \param comps     number of components per pixel (1..4)
 546  * \param srcWidth  Width of a row in the source data
 547  * \param srcRowA   Pointer to one of the rows of source data
 548  * \param srcRowB   Pointer to one of the rows of source data
 549  * \param srcRowC   Pointer to one of the rows of source data
 550  * \param srcRowD   Pointer to one of the rows of source data
 551  * \param dstWidth  Width of a row in the destination data
 552  * \param srcRowA   Pointer to the row of destination data
 553  */
 554 static void
 555 do_row_3D(enum dtype datatype, uint comps, int srcWidth,
 556           const void *srcRowA, const void *srcRowB,
 557           const void *srcRowC, const void *srcRowD,
 558           int dstWidth, void *dstRow)
 559 {
 560    const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
 561    const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
 562    uint i, j, k;
 563
 564    assert(comps >= 1);
 565    assert(comps <= 4);
 566
 567    if ((datatype == DTYPE_UBYTE) && (comps == 4)) {
 568       DECLARE_ROW_POINTERS(ubyte, 4);
 569
 570       for (i = j = 0, k = k0; i < (uint) dstWidth;
 571            i++, j += colStride, k += colStride) {
 572          FILTER_3D(0);
 573          FILTER_3D(1);
 574          FILTER_3D(2);
 575          FILTER_3D(3);
 576       }
 577    }
 578    else if ((datatype == DTYPE_UBYTE) && (comps == 3)) {
 579       DECLARE_ROW_POINTERS(ubyte, 3);
 580
 581       for (i = j = 0, k = k0; i < (uint) dstWidth;
 582            i++, j += colStride, k += colStride) {
 583          FILTER_3D(0);
 584          FILTER_3D(1);
 585          FILTER_3D(2);
 586       }
 587    }
 588    else if ((datatype == DTYPE_UBYTE) && (comps == 2)) {
 589       DECLARE_ROW_POINTERS(ubyte, 2);
 590
 591       for (i = j = 0, k = k0; i < (uint) dstWidth;
 592            i++, j += colStride, k += colStride) {
 593          FILTER_3D(0);
 594          FILTER_3D(1);
 595       }
 596    }
 597    else if ((datatype == DTYPE_UBYTE) && (comps == 1)) {
 598       DECLARE_ROW_POINTERS(ubyte, 1);
 599
 600       for (i = j = 0, k = k0; i < (uint) dstWidth;
 601            i++, j += colStride, k += colStride) {
 602          FILTER_3D(0);
 603       }
 604    }
 605    else if ((datatype == DTYPE_USHORT) && (comps == 4)) {
 606       DECLARE_ROW_POINTERS(ushort, 4);
 607
 608       for (i = j = 0, k = k0; i < (uint) dstWidth;
 609            i++, j += colStride, k += colStride) {
 610          FILTER_3D(0);
 611          FILTER_3D(1);
 612          FILTER_3D(2);
 613          FILTER_3D(3);
 614       }
 615    }
 616    else if ((datatype == DTYPE_USHORT) && (comps == 3)) {
 617       DECLARE_ROW_POINTERS(ushort, 3);
 618
 619       for (i = j = 0, k = k0; i < (uint) dstWidth;
 620            i++, j += colStride, k += colStride) {
 621          FILTER_3D(0);
 622          FILTER_3D(1);
 623          FILTER_3D(2);
 624       }
 625    }
 626    else if ((datatype == DTYPE_USHORT) && (comps == 2)) {
 627       DECLARE_ROW_POINTERS(ushort, 2);
 628
 629       for (i = j = 0, k = k0; i < (uint) dstWidth;
 630            i++, j += colStride, k += colStride) {
 631          FILTER_3D(0);
 632          FILTER_3D(1);
 633       }
 634    }
 635    else if ((datatype == DTYPE_USHORT) && (comps == 1)) {
 636       DECLARE_ROW_POINTERS(ushort, 1);
 637
 638       for (i = j = 0, k = k0; i < (uint) dstWidth;
 639            i++, j += colStride, k += colStride) {
 640          FILTER_3D(0);
 641       }
 642    }
 643    else if ((datatype == DTYPE_FLOAT) && (comps == 4)) {
 644       DECLARE_ROW_POINTERS(float, 4);
 645
 646       for (i = j = 0, k = k0; i < (uint) dstWidth;
 647            i++, j += colStride, k += colStride) {
 648          FILTER_F_3D(0);
 649          FILTER_F_3D(1);
 650          FILTER_F_3D(2);
 651          FILTER_F_3D(3);
 652       }
 653    }
 654    else if ((datatype == DTYPE_FLOAT) && (comps == 3)) {
 655       DECLARE_ROW_POINTERS(float, 3);
 656
 657       for (i = j = 0, k = k0; i < (uint) dstWidth;
 658            i++, j += colStride, k += colStride) {
 659          FILTER_F_3D(0);
 660          FILTER_F_3D(1);
 661          FILTER_F_3D(2);
 662       }
 663    }
 664    else if ((datatype == DTYPE_FLOAT) && (comps == 2)) {
 665       DECLARE_ROW_POINTERS(float, 2);
 666
 667       for (i = j = 0, k = k0; i < (uint) dstWidth;
 668            i++, j += colStride, k += colStride) {
 669          FILTER_F_3D(0);
 670          FILTER_F_3D(1);
 671       }
 672    }
 673    else if ((datatype == DTYPE_FLOAT) && (comps == 1)) {
 674       DECLARE_ROW_POINTERS(float, 1);
 675
 676       for (i = j = 0, k = k0; i < (uint) dstWidth;
 677            i++, j += colStride, k += colStride) {
 678          FILTER_F_3D(0);
 679       }
 680    }
 681    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 4)) {
 682       DECLARE_ROW_POINTERS(half_float, 4);
 683
 684       for (i = j = 0, k = k0; i < (uint) dstWidth;
 685            i++, j += colStride, k += colStride) {
 686          FILTER_HF_3D(0);
 687          FILTER_HF_3D(1);
 688          FILTER_HF_3D(2);
 689          FILTER_HF_3D(3);
 690       }
 691    }
 692    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 3)) {
 693       DECLARE_ROW_POINTERS(half_float, 4);
 694
 695       for (i = j = 0, k = k0; i < (uint) dstWidth;
 696            i++, j += colStride, k += colStride) {
 697          FILTER_HF_3D(0);
 698          FILTER_HF_3D(1);
 699          FILTER_HF_3D(2);
 700       }
 701    }
 702    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 2)) {
 703       DECLARE_ROW_POINTERS(half_float, 4);
 704
 705       for (i = j = 0, k = k0; i < (uint) dstWidth;
 706            i++, j += colStride, k += colStride) {
 707          FILTER_HF_3D(0);
 708          FILTER_HF_3D(1);
 709       }
 710    }
 711    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 1)) {
 712       DECLARE_ROW_POINTERS(half_float, 4);
 713
 714       for (i = j = 0, k = k0; i < (uint) dstWidth;
 715            i++, j += colStride, k += colStride) {
 716          FILTER_HF_3D(0);
 717       }
 718    }
 719    else if ((datatype == DTYPE_UINT) && (comps == 1)) {
 720       const uint *rowA = (const uint *) srcRowA;
 721       const uint *rowB = (const uint *) srcRowB;
 722       const uint *rowC = (const uint *) srcRowC;
 723       const uint *rowD = (const uint *) srcRowD;
 724       float *dst = (float *) dstRow;
 725
 726       for (i = j = 0, k = k0; i < (uint) dstWidth;
 727            i++, j += colStride, k += colStride) {
 728          const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
 729                                + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
 730                                + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
 731                                + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
 732          dst[i] = (float)((double) tmp * 0.125);
 733       }
 734    }
 735    else if ((datatype == DTYPE_USHORT_5_6_5) && (comps == 3)) {
 736       DECLARE_ROW_POINTERS0(ushort);
 737
 738       for (i = j = 0, k = k0; i < (uint) dstWidth;
 739            i++, j += colStride, k += colStride) {
 740          const int rowAr0 = rowA[j] & 0x1f;
 741          const int rowAr1 = rowA[k] & 0x1f;
 742          const int rowBr0 = rowB[j] & 0x1f;
 743          const int rowBr1 = rowB[k] & 0x1f;
 744          const int rowCr0 = rowC[j] & 0x1f;
 745          const int rowCr1 = rowC[k] & 0x1f;
 746          const int rowDr0 = rowD[j] & 0x1f;
 747          const int rowDr1 = rowD[k] & 0x1f;
 748          const int rowAg0 = (rowA[j] >> 5) & 0x3f;
 749          const int rowAg1 = (rowA[k] >> 5) & 0x3f;
 750          const int rowBg0 = (rowB[j] >> 5) & 0x3f;
 751          const int rowBg1 = (rowB[k] >> 5) & 0x3f;
 752          const int rowCg0 = (rowC[j] >> 5) & 0x3f;
 753          const int rowCg1 = (rowC[k] >> 5) & 0x3f;
 754          const int rowDg0 = (rowD[j] >> 5) & 0x3f;
 755          const int rowDg1 = (rowD[k] >> 5) & 0x3f;
 756          const int rowAb0 = (rowA[j] >> 11) & 0x1f;
 757          const int rowAb1 = (rowA[k] >> 11) & 0x1f;
 758          const int rowBb0 = (rowB[j] >> 11) & 0x1f;
 759          const int rowBb1 = (rowB[k] >> 11) & 0x1f;
 760          const int rowCb0 = (rowC[j] >> 11) & 0x1f;
 761          const int rowCb1 = (rowC[k] >> 11) & 0x1f;
 762          const int rowDb0 = (rowD[j] >> 11) & 0x1f;
 763          const int rowDb1 = (rowD[k] >> 11) & 0x1f;
 764          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 765                                        rowCr0, rowCr1, rowDr0, rowDr1);
 766          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 767                                        rowCg0, rowCg1, rowDg0, rowDg1);
 768          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 769                                        rowCb0, rowCb1, rowDb0, rowDb1);
 770          dst[i] = (b << 11) | (g << 5) | r;
 771       }
 772    }
 773    else if ((datatype == DTYPE_USHORT_4_4_4_4) && (comps == 4)) {
 774       DECLARE_ROW_POINTERS0(ushort);
 775
 776       for (i = j = 0, k = k0; i < (uint) dstWidth;
 777            i++, j += colStride, k += colStride) {
 778          const int rowAr0 = rowA[j] & 0xf;
 779          const int rowAr1 = rowA[k] & 0xf;
 780          const int rowBr0 = rowB[j] & 0xf;
 781          const int rowBr1 = rowB[k] & 0xf;
 782          const int rowCr0 = rowC[j] & 0xf;
 783          const int rowCr1 = rowC[k] & 0xf;
 784          const int rowDr0 = rowD[j] & 0xf;
 785          const int rowDr1 = rowD[k] & 0xf;
 786          const int rowAg0 = (rowA[j] >> 4) & 0xf;
 787          const int rowAg1 = (rowA[k] >> 4) & 0xf;
 788          const int rowBg0 = (rowB[j] >> 4) & 0xf;
 789          const int rowBg1 = (rowB[k] >> 4) & 0xf;
 790          const int rowCg0 = (rowC[j] >> 4) & 0xf;
 791          const int rowCg1 = (rowC[k] >> 4) & 0xf;
 792          const int rowDg0 = (rowD[j] >> 4) & 0xf;
 793          const int rowDg1 = (rowD[k] >> 4) & 0xf;
 794          const int rowAb0 = (rowA[j] >> 8) & 0xf;
 795          const int rowAb1 = (rowA[k] >> 8) & 0xf;
 796          const int rowBb0 = (rowB[j] >> 8) & 0xf;
 797          const int rowBb1 = (rowB[k] >> 8) & 0xf;
 798          const int rowCb0 = (rowC[j] >> 8) & 0xf;
 799          const int rowCb1 = (rowC[k] >> 8) & 0xf;
 800          const int rowDb0 = (rowD[j] >> 8) & 0xf;
 801          const int rowDb1 = (rowD[k] >> 8) & 0xf;
 802          const int rowAa0 = (rowA[j] >> 12) & 0xf;
 803          const int rowAa1 = (rowA[k] >> 12) & 0xf;
 804          const int rowBa0 = (rowB[j] >> 12) & 0xf;
 805          const int rowBa1 = (rowB[k] >> 12) & 0xf;
 806          const int rowCa0 = (rowC[j] >> 12) & 0xf;
 807          const int rowCa1 = (rowC[k] >> 12) & 0xf;
 808          const int rowDa0 = (rowD[j] >> 12) & 0xf;
 809          const int rowDa1 = (rowD[k] >> 12) & 0xf;
 810          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 811                                        rowCr0, rowCr1, rowDr0, rowDr1);
 812          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 813                                        rowCg0, rowCg1, rowDg0, rowDg1);
 814          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 815                                        rowCb0, rowCb1, rowDb0, rowDb1);
 816          const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
 817                                        rowCa0, rowCa1, rowDa0, rowDa1);
 818
 819          dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
 820       }
 821    }
 822    else if ((datatype == DTYPE_USHORT_1_5_5_5_REV) && (comps == 4)) {
 823       DECLARE_ROW_POINTERS0(ushort);
 824
 825       for (i = j = 0, k = k0; i < (uint) dstWidth;
 826            i++, j += colStride, k += colStride) {
 827          const int rowAr0 = rowA[j] & 0x1f;
 828          const int rowAr1 = rowA[k] & 0x1f;
 829          const int rowBr0 = rowB[j] & 0x1f;
 830          const int rowBr1 = rowB[k] & 0x1f;
 831          const int rowCr0 = rowC[j] & 0x1f;
 832          const int rowCr1 = rowC[k] & 0x1f;
 833          const int rowDr0 = rowD[j] & 0x1f;
 834          const int rowDr1 = rowD[k] & 0x1f;
 835          const int rowAg0 = (rowA[j] >> 5) & 0x1f;
 836          const int rowAg1 = (rowA[k] >> 5) & 0x1f;
 837          const int rowBg0 = (rowB[j] >> 5) & 0x1f;
 838          const int rowBg1 = (rowB[k] >> 5) & 0x1f;
 839          const int rowCg0 = (rowC[j] >> 5) & 0x1f;
 840          const int rowCg1 = (rowC[k] >> 5) & 0x1f;
 841          const int rowDg0 = (rowD[j] >> 5) & 0x1f;
 842          const int rowDg1 = (rowD[k] >> 5) & 0x1f;
 843          const int rowAb0 = (rowA[j] >> 10) & 0x1f;
 844          const int rowAb1 = (rowA[k] >> 10) & 0x1f;
 845          const int rowBb0 = (rowB[j] >> 10) & 0x1f;
 846          const int rowBb1 = (rowB[k] >> 10) & 0x1f;
 847          const int rowCb0 = (rowC[j] >> 10) & 0x1f;
 848          const int rowCb1 = (rowC[k] >> 10) & 0x1f;
 849          const int rowDb0 = (rowD[j] >> 10) & 0x1f;
 850          const int rowDb1 = (rowD[k] >> 10) & 0x1f;
 851          const int rowAa0 = (rowA[j] >> 15) & 0x1;
 852          const int rowAa1 = (rowA[k] >> 15) & 0x1;
 853          const int rowBa0 = (rowB[j] >> 15) & 0x1;
 854          const int rowBa1 = (rowB[k] >> 15) & 0x1;
 855          const int rowCa0 = (rowC[j] >> 15) & 0x1;
 856          const int rowCa1 = (rowC[k] >> 15) & 0x1;
 857          const int rowDa0 = (rowD[j] >> 15) & 0x1;
 858          const int rowDa1 = (rowD[k] >> 15) & 0x1;
 859          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 860                                        rowCr0, rowCr1, rowDr0, rowDr1);
 861          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 862                                        rowCg0, rowCg1, rowDg0, rowDg1);
 863          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 864                                        rowCb0, rowCb1, rowDb0, rowDb1);
 865          const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
 866                                        rowCa0, rowCa1, rowDa0, rowDa1);
 867
 868          dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
 869       }
 870    }
 871    else if ((datatype == DTYPE_UBYTE_3_3_2) && (comps == 3)) {
 872       DECLARE_ROW_POINTERS0(ushort);
 873
 874       for (i = j = 0, k = k0; i < (uint) dstWidth;
 875            i++, j += colStride, k += colStride) {
 876          const int rowAr0 = rowA[j] & 0x3;
 877          const int rowAr1 = rowA[k] & 0x3;
 878          const int rowBr0 = rowB[j] & 0x3;
 879          const int rowBr1 = rowB[k] & 0x3;
 880          const int rowCr0 = rowC[j] & 0x3;
 881          const int rowCr1 = rowC[k] & 0x3;
 882          const int rowDr0 = rowD[j] & 0x3;
 883          const int rowDr1 = rowD[k] & 0x3;
 884          const int rowAg0 = (rowA[j] >> 2) & 0x7;
 885          const int rowAg1 = (rowA[k] >> 2) & 0x7;
 886          const int rowBg0 = (rowB[j] >> 2) & 0x7;
 887          const int rowBg1 = (rowB[k] >> 2) & 0x7;
 888          const int rowCg0 = (rowC[j] >> 2) & 0x7;
 889          const int rowCg1 = (rowC[k] >> 2) & 0x7;
 890          const int rowDg0 = (rowD[j] >> 2) & 0x7;
 891          const int rowDg1 = (rowD[k] >> 2) & 0x7;
 892          const int rowAb0 = (rowA[j] >> 5) & 0x7;
 893          const int rowAb1 = (rowA[k] >> 5) & 0x7;
 894          const int rowBb0 = (rowB[j] >> 5) & 0x7;
 895          const int rowBb1 = (rowB[k] >> 5) & 0x7;
 896          const int rowCb0 = (rowC[j] >> 5) & 0x7;
 897          const int rowCb1 = (rowC[k] >> 5) & 0x7;
 898          const int rowDb0 = (rowD[j] >> 5) & 0x7;
 899          const int rowDb1 = (rowD[k] >> 5) & 0x7;
 900          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 901                                        rowCr0, rowCr1, rowDr0, rowDr1);
 902          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 903                                        rowCg0, rowCg1, rowDg0, rowDg1);
 904          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 905                                        rowCb0, rowCb1, rowDb0, rowDb1);
 906          dst[i] = (b << 5) | (g << 2) | r;
 907       }
 908    }
 909    else {
 910       debug_printf("bad format in do_row_3D()");
 911    }
 912 }
 913
 914
 915
 916 static void
 917 format_to_type_comps(enum pipe_format pformat,
 918                      enum dtype *datatype, uint *comps)
 919 {
 920    switch (pformat) {
 921    case PIPE_FORMAT_A8R8G8B8_UNORM:
 922    case PIPE_FORMAT_X8R8G8B8_UNORM:
 923    case PIPE_FORMAT_B8G8R8A8_UNORM:
 924    case PIPE_FORMAT_B8G8R8X8_UNORM:
 925       *datatype = DTYPE_UBYTE;
 926       *comps = 4;
 927       return;
 928    case PIPE_FORMAT_A1R5G5B5_UNORM:
 929       *datatype = DTYPE_USHORT_1_5_5_5_REV;
 930       *comps = 4;
 931       return;
 932    case PIPE_FORMAT_A4R4G4B4_UNORM:
 933       *datatype = DTYPE_USHORT_4_4_4_4;
 934       *comps = 4;
 935       return;
 936    case PIPE_FORMAT_R5G6B5_UNORM:
 937       *datatype = DTYPE_USHORT_5_6_5;
 938       *comps = 3;
 939       return;
 940    case PIPE_FORMAT_L8_UNORM:
 941    case PIPE_FORMAT_A8_UNORM:
 942    case PIPE_FORMAT_I8_UNORM:
 943       *datatype = DTYPE_UBYTE;
 944       *comps = 1;
 945       return;
 946    case PIPE_FORMAT_A8L8_UNORM:
 947       *datatype = DTYPE_UBYTE;
 948       *comps = 2;
 949       return;
 950    default:
 951       assert(0);
 952       *datatype = DTYPE_UBYTE;
 953       *comps = 0;
 954       break;
 955    }
 956 }
 957
 958
 959 static void
 960 reduce_1d(enum pipe_format pformat,
 961           int srcWidth, const ubyte *srcPtr,
 962           int dstWidth, ubyte *dstPtr)
 963 {
 964    enum dtype datatype;
 965    uint comps;
 966
 967    format_to_type_comps(pformat, &datatype, &comps);
 968
 969    /* we just duplicate the input row, kind of hack, saves code */
 970    do_row(datatype, comps,
 971           srcWidth, srcPtr, srcPtr,
 972           dstWidth, dstPtr);
 973 }
 974
 975
 976 /**
 977  * Strides are in bytes.  If zero, it'll be computed as width * bpp.
 978  */
 979 static void
 980 reduce_2d(enum pipe_format pformat,
 981           int srcWidth, int srcHeight,
 982           int srcRowStride, const ubyte *srcPtr,
 983           int dstWidth, int dstHeight,
 984           int dstRowStride, ubyte *dstPtr)
 985 {
 986    enum dtype datatype;
 987    uint comps;
 988    const int bpt = pf_get_size(pformat);
 989    const ubyte *srcA, *srcB;
 990    ubyte *dst;
 991    int row;
 992
 993    format_to_type_comps(pformat, &datatype, &comps);
 994
 995    if (!srcRowStride)
 996       srcRowStride = bpt * srcWidth;
 997
 998    if (!dstRowStride)
 999       dstRowStride = bpt * dstWidth;
1000
1001    /* Compute src and dst pointers */
1002    srcA = srcPtr;
1003    if (srcHeight > 1)
1004       srcB = srcA + srcRowStride;
1005    else
1006       srcB = srcA;
1007    dst = dstPtr;
1008
1009    for (row = 0; row < dstHeight; row++) {
1010       do_row(datatype, comps,
1011              srcWidth, srcA, srcB,
1012              dstWidth, dst);
1013       srcA += 2 * srcRowStride;
1014       srcB += 2 * srcRowStride;
1015       dst += dstRowStride;
1016    }
1017 }
1018
1019
1020 static void
1021 reduce_3d(enum pipe_format pformat,
1022           int srcWidth, int srcHeight, int srcDepth,
1023           int srcRowStride, const ubyte *srcPtr,
1024           int dstWidth, int dstHeight, int dstDepth,
1025           int dstRowStride, ubyte *dstPtr)
1026 {
1027    const int bpt = pf_get_size(pformat);
1028    const int border = 0;
1029    int img, row;
1030    int bytesPerSrcImage, bytesPerDstImage;
1031    int bytesPerSrcRow, bytesPerDstRow;
1032    int srcImageOffset, srcRowOffset;
1033    enum dtype datatype;
1034    uint comps;
1035
1036    format_to_type_comps(pformat, &datatype, &comps);
1037
1038    bytesPerSrcImage = srcWidth * srcHeight * bpt;
1039    bytesPerDstImage = dstWidth * dstHeight * bpt;
1040
1041    bytesPerSrcRow = srcWidth * bpt;
1042    bytesPerDstRow = dstWidth * bpt;
1043
1044    /* Offset between adjacent src images to be averaged together */
1045    srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage;
1046
1047    /* Offset between adjacent src rows to be averaged together */
1048    srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt;
1049
1050    /*
1051     * Need to average together up to 8 src pixels for each dest pixel.
1052     * Break that down into 3 operations:
1053     *   1. take two rows from source image and average them together.
1054     *   2. take two rows from next source image and average them together.
1055     *   3. take the two averaged rows and average them for the final dst row.
1056     */
1057
1058    /*
1059    _mesa_printf("mip3d %d x %d x %d  ->  %d x %d x %d\n",
1060           srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
1061    */
1062
1063    for (img = 0; img < dstDepth; img++) {
1064       /* first source image pointer, skipping border */
1065       const ubyte *imgSrcA = srcPtr
1066          + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border
1067          + img * (bytesPerSrcImage + srcImageOffset);
1068       /* second source image pointer, skipping border */
1069       const ubyte *imgSrcB = imgSrcA + srcImageOffset;
1070       /* address of the dest image, skipping border */
1071       ubyte *imgDst = dstPtr
1072          + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border
1073          + img * bytesPerDstImage;
1074
1075       /* setup the four source row pointers and the dest row pointer */
1076       const ubyte *srcImgARowA = imgSrcA;
1077       const ubyte *srcImgARowB = imgSrcA + srcRowOffset;
1078       const ubyte *srcImgBRowA = imgSrcB;
1079       const ubyte *srcImgBRowB = imgSrcB + srcRowOffset;
1080       ubyte *dstImgRow = imgDst;
1081
1082       for (row = 0; row < dstHeight; row++) {
1083          do_row_3D(datatype, comps, srcWidth,
1084                    srcImgARowA, srcImgARowB,
1085                    srcImgBRowA, srcImgBRowB,
1086                    dstWidth, dstImgRow);
1087
1088          /* advance to next rows */
1089          srcImgARowA += bytesPerSrcRow + srcRowOffset;
1090          srcImgARowB += bytesPerSrcRow + srcRowOffset;
1091          srcImgBRowA += bytesPerSrcRow + srcRowOffset;
1092          srcImgBRowB += bytesPerSrcRow + srcRowOffset;
1093          dstImgRow += bytesPerDstRow;
1094       }
1095    }
1096 }
1097
1098
1099
1100
1101 static void
1102 make_1d_mipmap(struct gen_mipmap_state *ctx,
1103                struct pipe_texture *pt,
1104                uint face, uint baseLevel, uint lastLevel)
1105 {
1106    struct pipe_context *pipe = ctx->pipe;
1107    struct pipe_screen *screen = pipe->screen;
1108    const uint zslice = 0;
1109    uint dstLevel;
1110
1111    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1112       const uint srcLevel = dstLevel - 1;
1113       struct pipe_transfer *srcTrans, *dstTrans;
1114       void *srcMap, *dstMap;
1115
1116       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1117                                           PIPE_TRANSFER_READ, 0, 0,
1118                                           pt->width[srcLevel],
1119                                           pt->height[srcLevel]);
1120       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1121                                           PIPE_TRANSFER_WRITE, 0, 0,
1122                                           pt->width[dstLevel],
1123                                           pt->height[dstLevel]);
1124
1125       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1126       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1127
1128       reduce_1d(pt->format,
1129                 srcTrans->width, srcMap,
1130                 dstTrans->width, dstMap);
1131
1132       screen->transfer_unmap(screen, srcTrans);
1133       screen->transfer_unmap(screen, dstTrans);
1134
1135       screen->tex_transfer_destroy(srcTrans);
1136       screen->tex_transfer_destroy(dstTrans);
1137    }
1138 }
1139
1140
1141 static void
1142 make_2d_mipmap(struct gen_mipmap_state *ctx,
1143                struct pipe_texture *pt,
1144                uint face, uint baseLevel, uint lastLevel)
1145 {
1146    struct pipe_context *pipe = ctx->pipe;
1147    struct pipe_screen *screen = pipe->screen;
1148    const uint zslice = 0;
1149    uint dstLevel;
1150
1151    assert(pt->block.width == 1);
1152    assert(pt->block.height == 1);
1153
1154    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1155       const uint srcLevel = dstLevel - 1;
1156       struct pipe_transfer *srcTrans, *dstTrans;
1157       ubyte *srcMap, *dstMap;
1158
1159       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1160                                           PIPE_TRANSFER_READ, 0, 0,
1161                                           pt->width[srcLevel],
1162                                           pt->height[srcLevel]);
1163       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1164                                           PIPE_TRANSFER_WRITE, 0, 0,
1165                                           pt->width[dstLevel],
1166                                           pt->height[dstLevel]);
1167
1168       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1169       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1170
1171       reduce_2d(pt->format,
1172                 srcTrans->width, srcTrans->height,
1173                 srcTrans->stride, srcMap,
1174                 dstTrans->width, dstTrans->height,
1175                 dstTrans->stride, dstMap);
1176
1177       screen->transfer_unmap(screen, srcTrans);
1178       screen->transfer_unmap(screen, dstTrans);
1179
1180       screen->tex_transfer_destroy(srcTrans);
1181       screen->tex_transfer_destroy(dstTrans);
1182    }
1183 }
1184
1185
1186 static void
1187 make_3d_mipmap(struct gen_mipmap_state *ctx,
1188                struct pipe_texture *pt,
1189                uint face, uint baseLevel, uint lastLevel)
1190 {
1191 #if 0
1192    struct pipe_context *pipe = ctx->pipe;
1193    struct pipe_screen *screen = pipe->screen;
1194    uint dstLevel, zslice = 0;
1195
1196    assert(pt->block.width == 1);
1197    assert(pt->block.height == 1);
1198
1199    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1200       const uint srcLevel = dstLevel - 1;
1201       struct pipe_transfer *srcTrans, *dstTrans;
1202       ubyte *srcMap, *dstMap;
1203
1204       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1205                                           PIPE_TRANSFER_READ, 0, 0,
1206                                           pt->width[srcLevel],
1207                                           pt->height[srcLevel]);
1208       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1209                                           PIPE_TRANSFER_WRITE, 0, 0,
1210                                           pt->width[dstLevel],
1211                                           pt->height[dstLevel]);
1212
1213       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1214       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1215
1216       reduce_3d(pt->format,
1217                 srcTrans->width, srcTrans->height,
1218                 srcTrans->stride, srcMap,
1219                 dstTrans->width, dstTrans->height,
1220                 dstTrans->stride, dstMap);
1221
1222       screen->transfer_unmap(screen, srcTrans);
1223       screen->transfer_unmap(screen, dstTrans);
1224
1225       screen->tex_transfer_destroy(srcTrans);
1226       screen->tex_transfer_destroy(dstTrans);
1227    }
1228 #else
1229    (void) reduce_3d;
1230 #endif
1231 }
1232
1233
1234 static void
1235 fallback_gen_mipmap(struct gen_mipmap_state *ctx,
1236                     struct pipe_texture *pt,
1237                     uint face, uint baseLevel, uint lastLevel)
1238 {
1239    switch (pt->target) {
1240    case PIPE_TEXTURE_1D:
1241       make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1242       break;
1243    case PIPE_TEXTURE_2D:
1244    case PIPE_TEXTURE_CUBE:
1245       make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1246       break;
1247    case PIPE_TEXTURE_3D:
1248       make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1249       break;
1250    default:
1251       assert(0);
1252    }
1253 }
1254
1255
1256 /**
1257  * Create a mipmap generation context.
1258  * The idea is to create one of these and re-use it each time we need to
1259  * generate a mipmap.
1260  */
1261 struct gen_mipmap_state *
1262 util_create_gen_mipmap(struct pipe_context *pipe,
1263                        struct cso_context *cso)
1264 {
1265    struct gen_mipmap_state *ctx;
1266    uint i;
1267
1268    ctx = CALLOC_STRUCT(gen_mipmap_state);
1269    if (!ctx)
1270       return NULL;
1271
1272    ctx->pipe = pipe;
1273    ctx->cso = cso;
1274
1275    /* disabled blending/masking */
1276    memset(&ctx->blend, 0, sizeof(ctx->blend));
1277    ctx->blend.colormask = PIPE_MASK_RGBA;
1278
1279    /* no-op depth/stencil/alpha */
1280    memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
1281
1282    /* rasterizer */
1283    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
1284    ctx->rasterizer.front_winding = PIPE_WINDING_CW;
1285    ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
1286    ctx->rasterizer.bypass_vs_clip_and_viewport = 1;
1287    ctx->rasterizer.gl_rasterization_rules = 1;
1288
1289    /* sampler state */
1290    memset(&ctx->sampler, 0, sizeof(ctx->sampler));
1291    ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1292    ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1293    ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1294    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
1295    ctx->sampler.normalized_coords = 1;
1296
1297    /* vertex shader - still needed to specify mapping from fragment
1298     * shader input semantics to vertex elements
1299     */
1300    {
1301       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
1302                                       TGSI_SEMANTIC_GENERIC };
1303       const uint semantic_indexes[] = { 0, 0 };
1304       ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
1305                                                     semantic_indexes);
1306    }
1307
1308    /* fragment shader */
1309    ctx->fs = util_make_fragment_tex_shader(pipe);
1310
1311    /* vertex data that doesn't change */
1312    for (i = 0; i < 4; i++) {
1313       ctx->vertices[i][0][2] = 0.0f; /* z */
1314       ctx->vertices[i][0][3] = 1.0f; /* w */
1315       ctx->vertices[i][1][3] = 1.0f; /* q */
1316    }
1317
1318    /* Note: the actual vertex buffer is allocated as needed below */
1319
1320    return ctx;
1321 }
1322
1323
1324 /**
1325  * Get next "slot" of vertex space in the vertex buffer.
1326  * We're allocating one large vertex buffer and using it piece by piece.
1327  */
1328 static unsigned
1329 get_next_slot(struct gen_mipmap_state *ctx)
1330 {
1331    const unsigned max_slots = 4096 / sizeof ctx->vertices;
1332
1333    if (ctx->vbuf_slot >= max_slots)
1334       util_gen_mipmap_flush( ctx );
1335
1336    if (!ctx->vbuf) {
1337       ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
1338                                      32,
1339                                      PIPE_BUFFER_USAGE_VERTEX,
1340                                      max_slots * sizeof ctx->vertices);
1341    }
1342
1343    return ctx->vbuf_slot++ * sizeof ctx->vertices;
1344 }
1345
1346
1347 static unsigned
1348 set_vertex_data(struct gen_mipmap_state *ctx,
1349                 enum pipe_texture_target tex_target,
1350                 uint face, float width, float height)
1351 {
1352    unsigned offset;
1353
1354    /* vert[0].position */
1355    ctx->vertices[0][0][0] = 0.0f; /*x*/
1356    ctx->vertices[0][0][1] = 0.0f; /*y*/
1357
1358    /* vert[1].position */
1359    ctx->vertices[1][0][0] = width;
1360    ctx->vertices[1][0][1] = 0.0f;
1361
1362    /* vert[2].position */
1363    ctx->vertices[2][0][0] = width;
1364    ctx->vertices[2][0][1] = height;
1365
1366    /* vert[3].position */
1367    ctx->vertices[3][0][0] = 0.0f;
1368    ctx->vertices[3][0][1] = height;
1369
1370    /* Setup vertex texcoords.  This is a little tricky for cube maps. */
1371    if (tex_target == PIPE_TEXTURE_CUBE) {
1372       static const float st[4][2] = {
1373          {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
1374       };
1375       float rx, ry, rz;
1376       uint i;
1377
1378       /* loop over quad verts */
1379       for (i = 0; i < 4; i++) {
1380          /* Compute sc = +/-scale and tc = +/-scale.
1381           * Not +/-1 to avoid cube face selection ambiguity near the edges,
1382           * though that can still sometimes happen with this scale factor...
1383           */
1384          const float scale = 0.9999f;
1385          const float sc = (2.0f * st[i][0] - 1.0f) * scale;
1386          const float tc = (2.0f * st[i][1] - 1.0f) * scale;
1387
1388          switch (face) {
1389          case PIPE_TEX_FACE_POS_X:
1390             rx = 1.0f;
1391             ry = -tc;
1392             rz = -sc;
1393             break;
1394          case PIPE_TEX_FACE_NEG_X:
1395             rx = -1.0f;
1396             ry = -tc;
1397             rz = sc;
1398             break;
1399          case PIPE_TEX_FACE_POS_Y:
1400             rx = sc;
1401             ry = 1.0f;
1402             rz = tc;
1403             break;
1404          case PIPE_TEX_FACE_NEG_Y:
1405             rx = sc;
1406             ry = -1.0f;
1407             rz = -tc;
1408             break;
1409          case PIPE_TEX_FACE_POS_Z:
1410             rx = sc;
1411             ry = -tc;
1412             rz = 1.0f;
1413             break;
1414          case PIPE_TEX_FACE_NEG_Z:
1415             rx = -sc;
1416             ry = -tc;
1417             rz = -1.0f;
1418             break;
1419          default:
1420             assert(0);
1421          }
1422
1423          ctx->vertices[i][1][0] = rx; /*s*/
1424          ctx->vertices[i][1][1] = ry; /*t*/
1425          ctx->vertices[i][1][2] = rz; /*r*/
1426       }
1427    }
1428    else {
1429       /* 1D/2D */
1430       ctx->vertices[0][1][0] = 0.0f; /*s*/
1431       ctx->vertices[0][1][1] = 0.0f; /*t*/
1432       ctx->vertices[0][1][2] = 0.0f; /*r*/
1433
1434       ctx->vertices[1][1][0] = 1.0f;
1435       ctx->vertices[1][1][1] = 0.0f;
1436       ctx->vertices[1][1][2] = 0.0f;
1437
1438       ctx->vertices[2][1][0] = 1.0f;
1439       ctx->vertices[2][1][1] = 1.0f;
1440       ctx->vertices[2][1][2] = 0.0f;
1441
1442       ctx->vertices[3][1][0] = 0.0f;
1443       ctx->vertices[3][1][1] = 1.0f;
1444       ctx->vertices[3][1][2] = 0.0f;
1445    }
1446
1447    offset = get_next_slot( ctx );
1448
1449    pipe_buffer_write(ctx->pipe->screen, ctx->vbuf,
1450                      offset, sizeof(ctx->vertices), ctx->vertices);
1451
1452    return offset;
1453 }
1454
1455
1456
1457 /**
1458  * Destroy a mipmap generation context
1459  */
1460 void
1461 util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
1462 {
1463    struct pipe_context *pipe = ctx->pipe;
1464
1465    pipe->delete_vs_state(pipe, ctx->vs);
1466    pipe->delete_fs_state(pipe, ctx->fs);
1467
1468    pipe_buffer_reference(&ctx->vbuf, NULL);
1469
1470    FREE(ctx);
1471 }
1472
1473
1474
1475 /* Release vertex buffer at end of frame to avoid synchronous
1476  * rendering.
1477  */
1478 void util_gen_mipmap_flush( struct gen_mipmap_state *ctx )
1479 {
1480    pipe_buffer_reference(&ctx->vbuf, NULL);
1481    ctx->vbuf_slot = 0;
1482 }
1483
1484
1485 /**
1486  * Generate mipmap images.  It's assumed all needed texture memory is
1487  * already allocated.
1488  *
1489  * \param pt  the texture to generate mipmap levels for
1490  * \param face  which cube face to generate mipmaps for (0 for non-cube maps)
1491  * \param baseLevel  the first mipmap level to use as a src
1492  * \param lastLevel  the last mipmap level to generate
1493  * \param filter  the minification filter used to generate mipmap levels with
1494  * \param filter  one of PIPE_TEX_FILTER_LINEAR, PIPE_TEX_FILTER_NEAREST
1495  */
1496 void
1497 util_gen_mipmap(struct gen_mipmap_state *ctx,
1498                 struct pipe_texture *pt,
1499                 uint face, uint baseLevel, uint lastLevel, uint filter)
1500 {
1501    struct pipe_context *pipe = ctx->pipe;
1502    struct pipe_screen *screen = pipe->screen;
1503    struct pipe_framebuffer_state fb;
1504    uint dstLevel;
1505    uint zslice = 0;
1506    uint offset;
1507
1508    /* check if we can render in the texture's format */
1509    if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D,
1510                                     PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
1511       fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
1512       return;
1513    }
1514
1515    /* save state (restored below) */
1516    cso_save_blend(ctx->cso);
1517    cso_save_depth_stencil_alpha(ctx->cso);
1518    cso_save_rasterizer(ctx->cso);
1519    cso_save_samplers(ctx->cso);
1520    cso_save_sampler_textures(ctx->cso);
1521    cso_save_framebuffer(ctx->cso);
1522    cso_save_fragment_shader(ctx->cso);
1523    cso_save_vertex_shader(ctx->cso);
1524
1525    /* bind our state */
1526    cso_set_blend(ctx->cso, &ctx->blend);
1527    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
1528    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
1529
1530    cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
1531    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
1532
1533    /* init framebuffer state */
1534    memset(&fb, 0, sizeof(fb));
1535    fb.nr_cbufs = 1;
1536
1537    /* set min/mag to same filter for faster sw speed */
1538    ctx->sampler.mag_img_filter = filter;
1539    ctx->sampler.min_img_filter = filter;
1540
1541    /*
1542     * XXX for small mipmap levels, it may be faster to use the software
1543     * fallback path...
1544     */
1545    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1546       const uint srcLevel = dstLevel - 1;
1547
1548       struct pipe_surface *surf =
1549          screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
1550                                  PIPE_BUFFER_USAGE_GPU_WRITE);
1551
1552       /*
1553        * Setup framebuffer / dest surface
1554        */
1555       fb.cbufs[0] = surf;
1556       fb.width = pt->width[dstLevel];
1557       fb.height = pt->height[dstLevel];
1558       cso_set_framebuffer(ctx->cso, &fb);
1559
1560       /*
1561        * Setup sampler state
1562        * Note: we should only have to set the min/max LOD clamps to ensure
1563        * we grab texels from the right mipmap level.  But some hardware
1564        * has trouble with min clamping so we also set the lod_bias to
1565        * try to work around that.
1566        */
1567       ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel;
1568       ctx->sampler.lod_bias = (float) srcLevel;
1569       cso_single_sampler(ctx->cso, 0, &ctx->sampler);
1570       cso_single_sampler_done(ctx->cso);
1571
1572       cso_set_sampler_textures(ctx->cso, 1, &pt);
1573
1574       /* quad coords in window coords (bypassing vs, clip and viewport) */
1575       offset = set_vertex_data(ctx,
1576                                pt->target,
1577                                face,
1578                                (float) pt->width[dstLevel],
1579                                (float) pt->height[dstLevel]);
1580
1581       util_draw_vertex_buffer(ctx->pipe,
1582                               ctx->vbuf,
1583                               offset,
1584                               PIPE_PRIM_TRIANGLE_FAN,
1585                               4,  /* verts */
1586                               2); /* attribs/vert */
1587
1588       pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
1589
1590       /* need to signal that the texture has changed _after_ rendering to it */
1591       pipe_surface_reference( &surf, NULL );
1592    }
1593
1594    /* restore state we changed */
1595    cso_restore_blend(ctx->cso);
1596    cso_restore_depth_stencil_alpha(ctx->cso);
1597    cso_restore_rasterizer(ctx->cso);
1598    cso_restore_samplers(ctx->cso);
1599    cso_restore_sampler_textures(ctx->cso);
1600    cso_restore_framebuffer(ctx->cso);
1601    cso_restore_fragment_shader(ctx->cso);
1602    cso_restore_vertex_shader(ctx->cso);
1603 }