src/gallium/auxiliary/util/u_gen_mipmap.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008  VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * @file
  31  * Mipmap generation utility
  32  *
  33  * @author Brian Paul
  34  */
  35
  36
  37 #include "pipe/p_context.h"
  38 #include "util/u_debug.h"
  39 #include "pipe/p_defines.h"
  40 #include "pipe/p_inlines.h"
  41 #include "pipe/p_shader_tokens.h"
  42 #include "pipe/p_state.h"
  43
  44 #include "util/u_memory.h"
  45 #include "util/u_draw_quad.h"
  46 #include "util/u_gen_mipmap.h"
  47 #include "util/u_simple_shaders.h"
  48
  49 #include "cso_cache/cso_context.h"
  50
  51
  52 struct gen_mipmap_state
  53 {
  54    struct pipe_context *pipe;
  55    struct cso_context *cso;
  56
  57    struct pipe_blend_state blend;
  58    struct pipe_depth_stencil_alpha_state depthstencil;
  59    struct pipe_rasterizer_state rasterizer;
  60    struct pipe_sampler_state sampler;
  61
  62    void *vs;
  63    void *fs;
  64
  65    struct pipe_buffer *vbuf;  /**< quad vertices */
  66    unsigned vbuf_slot;
  67
  68    float vertices[4][2][4];   /**< vertex/texcoords for quad */
  69 };
  70
  71
  72
  73 enum dtype
  74 {
  75    DTYPE_UBYTE,
  76    DTYPE_UBYTE_3_3_2,
  77    DTYPE_USHORT,
  78    DTYPE_USHORT_4_4_4_4,
  79    DTYPE_USHORT_5_6_5,
  80    DTYPE_USHORT_1_5_5_5_REV,
  81    DTYPE_UINT,
  82    DTYPE_FLOAT,
  83    DTYPE_HALF_FLOAT
  84 };
  85
  86
  87 typedef ushort half_float;
  88
  89
  90 static half_float
  91 float_to_half(float f)
  92 {
  93    /* XXX fix this */
  94    return 0;
  95 }
  96
  97 static float
  98 half_to_float(half_float h)
  99 {
 100    /* XXX fix this */
 101    return 0.0f;
 102 }
 103
 104
 105
 106
 107 /**
 108  * \name Support macros for do_row and do_row_3d
 109  *
 110  * The macro madness is here for two reasons.  First, it compacts the code
 111  * slightly.  Second, it makes it much easier to adjust the specifics of the
 112  * filter to tune the rounding characteristics.
 113  */
 114 /*@{*/
 115 #define DECLARE_ROW_POINTERS(t, e) \
 116       const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
 117       const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
 118       const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
 119       const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
 120       t(*dst)[e] = (t(*)[e]) dstRow
 121
 122 #define DECLARE_ROW_POINTERS0(t) \
 123       const t *rowA = (const t *) srcRowA; \
 124       const t *rowB = (const t *) srcRowB; \
 125       const t *rowC = (const t *) srcRowC; \
 126       const t *rowD = (const t *) srcRowD; \
 127       t *dst = (t *) dstRow
 128
 129 #define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
 130    ((unsigned) Aj + (unsigned) Ak \
 131     + (unsigned) Bj + (unsigned) Bk \
 132     + (unsigned) Cj + (unsigned) Ck \
 133     + (unsigned) Dj + (unsigned) Dk \
 134     + 4) >> 3
 135
 136 #define FILTER_3D(e) \
 137    do { \
 138       dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
 139                                 rowB[j][e], rowB[k][e], \
 140                                 rowC[j][e], rowC[k][e], \
 141                                 rowD[j][e], rowD[k][e]); \
 142    } while(0)
 143
 144 #define FILTER_F_3D(e) \
 145    do { \
 146       dst[i][e] = (rowA[j][e] + rowA[k][e] \
 147                    + rowB[j][e] + rowB[k][e] \
 148                    + rowC[j][e] + rowC[k][e] \
 149                    + rowD[j][e] + rowD[k][e]) * 0.125F; \
 150    } while(0)
 151
 152 #define FILTER_HF_3D(e) \
 153    do { \
 154       const float aj = half_to_float(rowA[j][e]); \
 155       const float ak = half_to_float(rowA[k][e]); \
 156       const float bj = half_to_float(rowB[j][e]); \
 157       const float bk = half_to_float(rowB[k][e]); \
 158       const float cj = half_to_float(rowC[j][e]); \
 159       const float ck = half_to_float(rowC[k][e]); \
 160       const float dj = half_to_float(rowD[j][e]); \
 161       const float dk = half_to_float(rowD[k][e]); \
 162       dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
 163                                       * 0.125F); \
 164    } while(0)
 165 /*@}*/
 166
 167
 168 /**
 169  * Average together two rows of a source image to produce a single new
 170  * row in the dest image.  It's legal for the two source rows to point
 171  * to the same data.  The source width must be equal to either the
 172  * dest width or two times the dest width.
 173  * \param datatype  GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc.
 174  * \param comps  number of components per pixel (1..4)
 175  */
 176 static void
 177 do_row(enum dtype datatype, uint comps, int srcWidth,
 178        const void *srcRowA, const void *srcRowB,
 179        int dstWidth, void *dstRow)
 180 {
 181    const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
 182    const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
 183
 184    assert(comps >= 1);
 185    assert(comps <= 4);
 186
 187    /* This assertion is no longer valid with non-power-of-2 textures
 188    assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth);
 189    */
 190
 191    if (datatype == DTYPE_UBYTE && comps == 4) {
 192       uint i, j, k;
 193       const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA;
 194       const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB;
 195       ubyte(*dst)[4] = (ubyte(*)[4]) dstRow;
 196       for (i = j = 0, k = k0; i < (uint) dstWidth;
 197            i++, j += colStride, k += colStride) {
 198          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 199          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 200          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 201          dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
 202       }
 203    }
 204    else if (datatype == DTYPE_UBYTE && comps == 3) {
 205       uint i, j, k;
 206       const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA;
 207       const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB;
 208       ubyte(*dst)[3] = (ubyte(*)[3]) dstRow;
 209       for (i = j = 0, k = k0; i < (uint) dstWidth;
 210            i++, j += colStride, k += colStride) {
 211          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 212          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 213          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 214       }
 215    }
 216    else if (datatype == DTYPE_UBYTE && comps == 2) {
 217       uint i, j, k;
 218       const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA;
 219       const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB;
 220       ubyte(*dst)[2] = (ubyte(*)[2]) dstRow;
 221       for (i = j = 0, k = k0; i < (uint) dstWidth;
 222            i++, j += colStride, k += colStride) {
 223          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2;
 224          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2;
 225       }
 226    }
 227    else if (datatype == DTYPE_UBYTE && comps == 1) {
 228       uint i, j, k;
 229       const ubyte *rowA = (const ubyte *) srcRowA;
 230       const ubyte *rowB = (const ubyte *) srcRowB;
 231       ubyte *dst = (ubyte *) dstRow;
 232       for (i = j = 0, k = k0; i < (uint) dstWidth;
 233            i++, j += colStride, k += colStride) {
 234          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2;
 235       }
 236    }
 237
 238    else if (datatype == DTYPE_USHORT && comps == 4) {
 239       uint i, j, k;
 240       const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA;
 241       const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB;
 242       ushort(*dst)[4] = (ushort(*)[4]) dstRow;
 243       for (i = j = 0, k = k0; i < (uint) dstWidth;
 244            i++, j += colStride, k += colStride) {
 245          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 246          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 247          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 248          dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
 249       }
 250    }
 251    else if (datatype == DTYPE_USHORT && comps == 3) {
 252       uint i, j, k;
 253       const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA;
 254       const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB;
 255       ushort(*dst)[3] = (ushort(*)[3]) dstRow;
 256       for (i = j = 0, k = k0; i < (uint) dstWidth;
 257            i++, j += colStride, k += colStride) {
 258          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 259          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 260          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
 261       }
 262    }
 263    else if (datatype == DTYPE_USHORT && comps == 2) {
 264       uint i, j, k;
 265       const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA;
 266       const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB;
 267       ushort(*dst)[2] = (ushort(*)[2]) dstRow;
 268       for (i = j = 0, k = k0; i < (uint) dstWidth;
 269            i++, j += colStride, k += colStride) {
 270          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
 271          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
 272       }
 273    }
 274    else if (datatype == DTYPE_USHORT && comps == 1) {
 275       uint i, j, k;
 276       const ushort *rowA = (const ushort *) srcRowA;
 277       const ushort *rowB = (const ushort *) srcRowB;
 278       ushort *dst = (ushort *) dstRow;
 279       for (i = j = 0, k = k0; i < (uint) dstWidth;
 280            i++, j += colStride, k += colStride) {
 281          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4;
 282       }
 283    }
 284
 285    else if (datatype == DTYPE_FLOAT && comps == 4) {
 286       uint i, j, k;
 287       const float(*rowA)[4] = (const float(*)[4]) srcRowA;
 288       const float(*rowB)[4] = (const float(*)[4]) srcRowB;
 289       float(*dst)[4] = (float(*)[4]) dstRow;
 290       for (i = j = 0, k = k0; i < (uint) dstWidth;
 291            i++, j += colStride, k += colStride) {
 292          dst[i][0] = (rowA[j][0] + rowA[k][0] +
 293                       rowB[j][0] + rowB[k][0]) * 0.25F;
 294          dst[i][1] = (rowA[j][1] + rowA[k][1] +
 295                       rowB[j][1] + rowB[k][1]) * 0.25F;
 296          dst[i][2] = (rowA[j][2] + rowA[k][2] +
 297                       rowB[j][2] + rowB[k][2]) * 0.25F;
 298          dst[i][3] = (rowA[j][3] + rowA[k][3] +
 299                       rowB[j][3] + rowB[k][3]) * 0.25F;
 300       }
 301    }
 302    else if (datatype == DTYPE_FLOAT && comps == 3) {
 303       uint i, j, k;
 304       const float(*rowA)[3] = (const float(*)[3]) srcRowA;
 305       const float(*rowB)[3] = (const float(*)[3]) srcRowB;
 306       float(*dst)[3] = (float(*)[3]) dstRow;
 307       for (i = j = 0, k = k0; i < (uint) dstWidth;
 308            i++, j += colStride, k += colStride) {
 309          dst[i][0] = (rowA[j][0] + rowA[k][0] +
 310                       rowB[j][0] + rowB[k][0]) * 0.25F;
 311          dst[i][1] = (rowA[j][1] + rowA[k][1] +
 312                       rowB[j][1] + rowB[k][1]) * 0.25F;
 313          dst[i][2] = (rowA[j][2] + rowA[k][2] +
 314                       rowB[j][2] + rowB[k][2]) * 0.25F;
 315       }
 316    }
 317    else if (datatype == DTYPE_FLOAT && comps == 2) {
 318       uint i, j, k;
 319       const float(*rowA)[2] = (const float(*)[2]) srcRowA;
 320       const float(*rowB)[2] = (const float(*)[2]) srcRowB;
 321       float(*dst)[2] = (float(*)[2]) dstRow;
 322       for (i = j = 0, k = k0; i < (uint) dstWidth;
 323            i++, j += colStride, k += colStride) {
 324          dst[i][0] = (rowA[j][0] + rowA[k][0] +
 325                       rowB[j][0] + rowB[k][0]) * 0.25F;
 326          dst[i][1] = (rowA[j][1] + rowA[k][1] +
 327                       rowB[j][1] + rowB[k][1]) * 0.25F;
 328       }
 329    }
 330    else if (datatype == DTYPE_FLOAT && comps == 1) {
 331       uint i, j, k;
 332       const float *rowA = (const float *) srcRowA;
 333       const float *rowB = (const float *) srcRowB;
 334       float *dst = (float *) dstRow;
 335       for (i = j = 0, k = k0; i < (uint) dstWidth;
 336            i++, j += colStride, k += colStride) {
 337          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F;
 338       }
 339    }
 340
 341 #if 0
 342    else if (datatype == HALF_DTYPE_FLOAT && comps == 4) {
 343       uint i, j, k, comp;
 344       const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA;
 345       const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB;
 346       half_float(*dst)[4] = (half_float(*)[4]) dstRow;
 347       for (i = j = 0, k = k0; i < (uint) dstWidth;
 348            i++, j += colStride, k += colStride) {
 349          for (comp = 0; comp < 4; comp++) {
 350             float aj, ak, bj, bk;
 351             aj = half_to_float(rowA[j][comp]);
 352             ak = half_to_float(rowA[k][comp]);
 353             bj = half_to_float(rowB[j][comp]);
 354             bk = half_to_float(rowB[k][comp]);
 355             dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
 356          }
 357       }
 358    }
 359    else if (datatype == DTYPE_HALF_FLOAT && comps == 3) {
 360       uint i, j, k, comp;
 361       const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA;
 362       const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB;
 363       half_float(*dst)[3] = (half_float(*)[3]) dstRow;
 364       for (i = j = 0, k = k0; i < (uint) dstWidth;
 365            i++, j += colStride, k += colStride) {
 366          for (comp = 0; comp < 3; comp++) {
 367             float aj, ak, bj, bk;
 368             aj = half_to_float(rowA[j][comp]);
 369             ak = half_to_float(rowA[k][comp]);
 370             bj = half_to_float(rowB[j][comp]);
 371             bk = half_to_float(rowB[k][comp]);
 372             dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
 373          }
 374       }
 375    }
 376    else if (datatype == DTYPE_HALF_FLOAT && comps == 2) {
 377       uint i, j, k, comp;
 378       const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA;
 379       const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB;
 380       half_float(*dst)[2] = (half_float(*)[2]) dstRow;
 381       for (i = j = 0, k = k0; i < (uint) dstWidth;
 382            i++, j += colStride, k += colStride) {
 383          for (comp = 0; comp < 2; comp++) {
 384             float aj, ak, bj, bk;
 385             aj = half_to_float(rowA[j][comp]);
 386             ak = half_to_float(rowA[k][comp]);
 387             bj = half_to_float(rowB[j][comp]);
 388             bk = half_to_float(rowB[k][comp]);
 389             dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F);
 390          }
 391       }
 392    }
 393    else if (datatype == DTYPE_HALF_FLOAT && comps == 1) {
 394       uint i, j, k;
 395       const half_float *rowA = (const half_float *) srcRowA;
 396       const half_float *rowB = (const half_float *) srcRowB;
 397       half_float *dst = (half_float *) dstRow;
 398       for (i = j = 0, k = k0; i < (uint) dstWidth;
 399            i++, j += colStride, k += colStride) {
 400          float aj, ak, bj, bk;
 401          aj = half_to_float(rowA[j]);
 402          ak = half_to_float(rowA[k]);
 403          bj = half_to_float(rowB[j]);
 404          bk = half_to_float(rowB[k]);
 405          dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F);
 406       }
 407    }
 408 #endif
 409
 410    else if (datatype == DTYPE_UINT && comps == 1) {
 411       uint i, j, k;
 412       const uint *rowA = (const uint *) srcRowA;
 413       const uint *rowB = (const uint *) srcRowB;
 414       uint *dst = (uint *) dstRow;
 415       for (i = j = 0, k = k0; i < (uint) dstWidth;
 416            i++, j += colStride, k += colStride) {
 417          dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4;
 418       }
 419    }
 420
 421    else if (datatype == DTYPE_USHORT_5_6_5 && comps == 3) {
 422       uint i, j, k;
 423       const ushort *rowA = (const ushort *) srcRowA;
 424       const ushort *rowB = (const ushort *) srcRowB;
 425       ushort *dst = (ushort *) dstRow;
 426       for (i = j = 0, k = k0; i < (uint) dstWidth;
 427            i++, j += colStride, k += colStride) {
 428          const int rowAr0 = rowA[j] & 0x1f;
 429          const int rowAr1 = rowA[k] & 0x1f;
 430          const int rowBr0 = rowB[j] & 0x1f;
 431          const int rowBr1 = rowB[k] & 0x1f;
 432          const int rowAg0 = (rowA[j] >> 5) & 0x3f;
 433          const int rowAg1 = (rowA[k] >> 5) & 0x3f;
 434          const int rowBg0 = (rowB[j] >> 5) & 0x3f;
 435          const int rowBg1 = (rowB[k] >> 5) & 0x3f;
 436          const int rowAb0 = (rowA[j] >> 11) & 0x1f;
 437          const int rowAb1 = (rowA[k] >> 11) & 0x1f;
 438          const int rowBb0 = (rowB[j] >> 11) & 0x1f;
 439          const int rowBb1 = (rowB[k] >> 11) & 0x1f;
 440          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 441          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 442          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 443          dst[i] = (blue << 11) | (green << 5) | red;
 444       }
 445    }
 446    else if (datatype == DTYPE_USHORT_4_4_4_4 && comps == 4) {
 447       uint i, j, k;
 448       const ushort *rowA = (const ushort *) srcRowA;
 449       const ushort *rowB = (const ushort *) srcRowB;
 450       ushort *dst = (ushort *) dstRow;
 451       for (i = j = 0, k = k0; i < (uint) dstWidth;
 452            i++, j += colStride, k += colStride) {
 453          const int rowAr0 = rowA[j] & 0xf;
 454          const int rowAr1 = rowA[k] & 0xf;
 455          const int rowBr0 = rowB[j] & 0xf;
 456          const int rowBr1 = rowB[k] & 0xf;
 457          const int rowAg0 = (rowA[j] >> 4) & 0xf;
 458          const int rowAg1 = (rowA[k] >> 4) & 0xf;
 459          const int rowBg0 = (rowB[j] >> 4) & 0xf;
 460          const int rowBg1 = (rowB[k] >> 4) & 0xf;
 461          const int rowAb0 = (rowA[j] >> 8) & 0xf;
 462          const int rowAb1 = (rowA[k] >> 8) & 0xf;
 463          const int rowBb0 = (rowB[j] >> 8) & 0xf;
 464          const int rowBb1 = (rowB[k] >> 8) & 0xf;
 465          const int rowAa0 = (rowA[j] >> 12) & 0xf;
 466          const int rowAa1 = (rowA[k] >> 12) & 0xf;
 467          const int rowBa0 = (rowB[j] >> 12) & 0xf;
 468          const int rowBa1 = (rowB[k] >> 12) & 0xf;
 469          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 470          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 471          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 472          const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
 473          dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red;
 474       }
 475    }
 476    else if (datatype == DTYPE_USHORT_1_5_5_5_REV && comps == 4) {
 477       uint i, j, k;
 478       const ushort *rowA = (const ushort *) srcRowA;
 479       const ushort *rowB = (const ushort *) srcRowB;
 480       ushort *dst = (ushort *) dstRow;
 481       for (i = j = 0, k = k0; i < (uint) dstWidth;
 482            i++, j += colStride, k += colStride) {
 483          const int rowAr0 = rowA[j] & 0x1f;
 484          const int rowAr1 = rowA[k] & 0x1f;
 485          const int rowBr0 = rowB[j] & 0x1f;
 486          const int rowBr1 = rowB[k] & 0x1f;
 487          const int rowAg0 = (rowA[j] >> 5) & 0x1f;
 488          const int rowAg1 = (rowA[k] >> 5) & 0x1f;
 489          const int rowBg0 = (rowB[j] >> 5) & 0x1f;
 490          const int rowBg1 = (rowB[k] >> 5) & 0x1f;
 491          const int rowAb0 = (rowA[j] >> 10) & 0x1f;
 492          const int rowAb1 = (rowA[k] >> 10) & 0x1f;
 493          const int rowBb0 = (rowB[j] >> 10) & 0x1f;
 494          const int rowBb1 = (rowB[k] >> 10) & 0x1f;
 495          const int rowAa0 = (rowA[j] >> 15) & 0x1;
 496          const int rowAa1 = (rowA[k] >> 15) & 0x1;
 497          const int rowBa0 = (rowB[j] >> 15) & 0x1;
 498          const int rowBa1 = (rowB[k] >> 15) & 0x1;
 499          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 500          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 501          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 502          const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
 503          dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red;
 504       }
 505    }
 506    else if (datatype == DTYPE_UBYTE_3_3_2 && comps == 3) {
 507       uint i, j, k;
 508       const ubyte *rowA = (const ubyte *) srcRowA;
 509       const ubyte *rowB = (const ubyte *) srcRowB;
 510       ubyte *dst = (ubyte *) dstRow;
 511       for (i = j = 0, k = k0; i < (uint) dstWidth;
 512            i++, j += colStride, k += colStride) {
 513          const int rowAr0 = rowA[j] & 0x3;
 514          const int rowAr1 = rowA[k] & 0x3;
 515          const int rowBr0 = rowB[j] & 0x3;
 516          const int rowBr1 = rowB[k] & 0x3;
 517          const int rowAg0 = (rowA[j] >> 2) & 0x7;
 518          const int rowAg1 = (rowA[k] >> 2) & 0x7;
 519          const int rowBg0 = (rowB[j] >> 2) & 0x7;
 520          const int rowBg1 = (rowB[k] >> 2) & 0x7;
 521          const int rowAb0 = (rowA[j] >> 5) & 0x7;
 522          const int rowAb1 = (rowA[k] >> 5) & 0x7;
 523          const int rowBb0 = (rowB[j] >> 5) & 0x7;
 524          const int rowBb1 = (rowB[k] >> 5) & 0x7;
 525          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
 526          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
 527          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
 528          dst[i] = (blue << 5) | (green << 2) | red;
 529       }
 530    }
 531    else {
 532       debug_printf("bad format in do_row()");
 533    }
 534 }
 535
 536
 537 /**
 538  * Average together four rows of a source image to produce a single new
 539  * row in the dest image.  It's legal for the two source rows to point
 540  * to the same data.  The source width must be equal to either the
 541  * dest width or two times the dest width.
 542  *
 543  * \param datatype  GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
 544  *                  \c GL_FLOAT, etc.
 545  * \param comps     number of components per pixel (1..4)
 546  * \param srcWidth  Width of a row in the source data
 547  * \param srcRowA   Pointer to one of the rows of source data
 548  * \param srcRowB   Pointer to one of the rows of source data
 549  * \param srcRowC   Pointer to one of the rows of source data
 550  * \param srcRowD   Pointer to one of the rows of source data
 551  * \param dstWidth  Width of a row in the destination data
 552  * \param srcRowA   Pointer to the row of destination data
 553  */
 554 static void
 555 do_row_3D(enum dtype datatype, uint comps, int srcWidth,
 556           const void *srcRowA, const void *srcRowB,
 557           const void *srcRowC, const void *srcRowD,
 558           int dstWidth, void *dstRow)
 559 {
 560    const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
 561    const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
 562    uint i, j, k;
 563
 564    assert(comps >= 1);
 565    assert(comps <= 4);
 566
 567    if ((datatype == DTYPE_UBYTE) && (comps == 4)) {
 568       DECLARE_ROW_POINTERS(ubyte, 4);
 569
 570       for (i = j = 0, k = k0; i < (uint) dstWidth;
 571            i++, j += colStride, k += colStride) {
 572          FILTER_3D(0);
 573          FILTER_3D(1);
 574          FILTER_3D(2);
 575          FILTER_3D(3);
 576       }
 577    }
 578    else if ((datatype == DTYPE_UBYTE) && (comps == 3)) {
 579       DECLARE_ROW_POINTERS(ubyte, 3);
 580
 581       for (i = j = 0, k = k0; i < (uint) dstWidth;
 582            i++, j += colStride, k += colStride) {
 583          FILTER_3D(0);
 584          FILTER_3D(1);
 585          FILTER_3D(2);
 586       }
 587    }
 588    else if ((datatype == DTYPE_UBYTE) && (comps == 2)) {
 589       DECLARE_ROW_POINTERS(ubyte, 2);
 590
 591       for (i = j = 0, k = k0; i < (uint) dstWidth;
 592            i++, j += colStride, k += colStride) {
 593          FILTER_3D(0);
 594          FILTER_3D(1);
 595       }
 596    }
 597    else if ((datatype == DTYPE_UBYTE) && (comps == 1)) {
 598       DECLARE_ROW_POINTERS(ubyte, 1);
 599
 600       for (i = j = 0, k = k0; i < (uint) dstWidth;
 601            i++, j += colStride, k += colStride) {
 602          FILTER_3D(0);
 603       }
 604    }
 605    else if ((datatype == DTYPE_USHORT) && (comps == 4)) {
 606       DECLARE_ROW_POINTERS(ushort, 4);
 607
 608       for (i = j = 0, k = k0; i < (uint) dstWidth;
 609            i++, j += colStride, k += colStride) {
 610          FILTER_3D(0);
 611          FILTER_3D(1);
 612          FILTER_3D(2);
 613          FILTER_3D(3);
 614       }
 615    }
 616    else if ((datatype == DTYPE_USHORT) && (comps == 3)) {
 617       DECLARE_ROW_POINTERS(ushort, 3);
 618
 619       for (i = j = 0, k = k0; i < (uint) dstWidth;
 620            i++, j += colStride, k += colStride) {
 621          FILTER_3D(0);
 622          FILTER_3D(1);
 623          FILTER_3D(2);
 624       }
 625    }
 626    else if ((datatype == DTYPE_USHORT) && (comps == 2)) {
 627       DECLARE_ROW_POINTERS(ushort, 2);
 628
 629       for (i = j = 0, k = k0; i < (uint) dstWidth;
 630            i++, j += colStride, k += colStride) {
 631          FILTER_3D(0);
 632          FILTER_3D(1);
 633       }
 634    }
 635    else if ((datatype == DTYPE_USHORT) && (comps == 1)) {
 636       DECLARE_ROW_POINTERS(ushort, 1);
 637
 638       for (i = j = 0, k = k0; i < (uint) dstWidth;
 639            i++, j += colStride, k += colStride) {
 640          FILTER_3D(0);
 641       }
 642    }
 643    else if ((datatype == DTYPE_FLOAT) && (comps == 4)) {
 644       DECLARE_ROW_POINTERS(float, 4);
 645
 646       for (i = j = 0, k = k0; i < (uint) dstWidth;
 647            i++, j += colStride, k += colStride) {
 648          FILTER_F_3D(0);
 649          FILTER_F_3D(1);
 650          FILTER_F_3D(2);
 651          FILTER_F_3D(3);
 652       }
 653    }
 654    else if ((datatype == DTYPE_FLOAT) && (comps == 3)) {
 655       DECLARE_ROW_POINTERS(float, 3);
 656
 657       for (i = j = 0, k = k0; i < (uint) dstWidth;
 658            i++, j += colStride, k += colStride) {
 659          FILTER_F_3D(0);
 660          FILTER_F_3D(1);
 661          FILTER_F_3D(2);
 662       }
 663    }
 664    else if ((datatype == DTYPE_FLOAT) && (comps == 2)) {
 665       DECLARE_ROW_POINTERS(float, 2);
 666
 667       for (i = j = 0, k = k0; i < (uint) dstWidth;
 668            i++, j += colStride, k += colStride) {
 669          FILTER_F_3D(0);
 670          FILTER_F_3D(1);
 671       }
 672    }
 673    else if ((datatype == DTYPE_FLOAT) && (comps == 1)) {
 674       DECLARE_ROW_POINTERS(float, 1);
 675
 676       for (i = j = 0, k = k0; i < (uint) dstWidth;
 677            i++, j += colStride, k += colStride) {
 678          FILTER_F_3D(0);
 679       }
 680    }
 681    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 4)) {
 682       DECLARE_ROW_POINTERS(half_float, 4);
 683
 684       for (i = j = 0, k = k0; i < (uint) dstWidth;
 685            i++, j += colStride, k += colStride) {
 686          FILTER_HF_3D(0);
 687          FILTER_HF_3D(1);
 688          FILTER_HF_3D(2);
 689          FILTER_HF_3D(3);
 690       }
 691    }
 692    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 3)) {
 693       DECLARE_ROW_POINTERS(half_float, 4);
 694
 695       for (i = j = 0, k = k0; i < (uint) dstWidth;
 696            i++, j += colStride, k += colStride) {
 697          FILTER_HF_3D(0);
 698          FILTER_HF_3D(1);
 699          FILTER_HF_3D(2);
 700       }
 701    }
 702    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 2)) {
 703       DECLARE_ROW_POINTERS(half_float, 4);
 704
 705       for (i = j = 0, k = k0; i < (uint) dstWidth;
 706            i++, j += colStride, k += colStride) {
 707          FILTER_HF_3D(0);
 708          FILTER_HF_3D(1);
 709       }
 710    }
 711    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 1)) {
 712       DECLARE_ROW_POINTERS(half_float, 4);
 713
 714       for (i = j = 0, k = k0; i < (uint) dstWidth;
 715            i++, j += colStride, k += colStride) {
 716          FILTER_HF_3D(0);
 717       }
 718    }
 719    else if ((datatype == DTYPE_UINT) && (comps == 1)) {
 720       const uint *rowA = (const uint *) srcRowA;
 721       const uint *rowB = (const uint *) srcRowB;
 722       const uint *rowC = (const uint *) srcRowC;
 723       const uint *rowD = (const uint *) srcRowD;
 724       float *dst = (float *) dstRow;
 725
 726       for (i = j = 0, k = k0; i < (uint) dstWidth;
 727            i++, j += colStride, k += colStride) {
 728          const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
 729                                + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
 730                                + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
 731                                + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
 732          dst[i] = (float)((double) tmp * 0.125);
 733       }
 734    }
 735    else if ((datatype == DTYPE_USHORT_5_6_5) && (comps == 3)) {
 736       DECLARE_ROW_POINTERS0(ushort);
 737
 738       for (i = j = 0, k = k0; i < (uint) dstWidth;
 739            i++, j += colStride, k += colStride) {
 740          const int rowAr0 = rowA[j] & 0x1f;
 741          const int rowAr1 = rowA[k] & 0x1f;
 742          const int rowBr0 = rowB[j] & 0x1f;
 743          const int rowBr1 = rowB[k] & 0x1f;
 744          const int rowCr0 = rowC[j] & 0x1f;
 745          const int rowCr1 = rowC[k] & 0x1f;
 746          const int rowDr0 = rowD[j] & 0x1f;
 747          const int rowDr1 = rowD[k] & 0x1f;
 748          const int rowAg0 = (rowA[j] >> 5) & 0x3f;
 749          const int rowAg1 = (rowA[k] >> 5) & 0x3f;
 750          const int rowBg0 = (rowB[j] >> 5) & 0x3f;
 751          const int rowBg1 = (rowB[k] >> 5) & 0x3f;
 752          const int rowCg0 = (rowC[j] >> 5) & 0x3f;
 753          const int rowCg1 = (rowC[k] >> 5) & 0x3f;
 754          const int rowDg0 = (rowD[j] >> 5) & 0x3f;
 755          const int rowDg1 = (rowD[k] >> 5) & 0x3f;
 756          const int rowAb0 = (rowA[j] >> 11) & 0x1f;
 757          const int rowAb1 = (rowA[k] >> 11) & 0x1f;
 758          const int rowBb0 = (rowB[j] >> 11) & 0x1f;
 759          const int rowBb1 = (rowB[k] >> 11) & 0x1f;
 760          const int rowCb0 = (rowC[j] >> 11) & 0x1f;
 761          const int rowCb1 = (rowC[k] >> 11) & 0x1f;
 762          const int rowDb0 = (rowD[j] >> 11) & 0x1f;
 763          const int rowDb1 = (rowD[k] >> 11) & 0x1f;
 764          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 765                                        rowCr0, rowCr1, rowDr0, rowDr1);
 766          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 767                                        rowCg0, rowCg1, rowDg0, rowDg1);
 768          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 769                                        rowCb0, rowCb1, rowDb0, rowDb1);
 770          dst[i] = (b << 11) | (g << 5) | r;
 771       }
 772    }
 773    else if ((datatype == DTYPE_USHORT_4_4_4_4) && (comps == 4)) {
 774       DECLARE_ROW_POINTERS0(ushort);
 775
 776       for (i = j = 0, k = k0; i < (uint) dstWidth;
 777            i++, j += colStride, k += colStride) {
 778          const int rowAr0 = rowA[j] & 0xf;
 779          const int rowAr1 = rowA[k] & 0xf;
 780          const int rowBr0 = rowB[j] & 0xf;
 781          const int rowBr1 = rowB[k] & 0xf;
 782          const int rowCr0 = rowC[j] & 0xf;
 783          const int rowCr1 = rowC[k] & 0xf;
 784          const int rowDr0 = rowD[j] & 0xf;
 785          const int rowDr1 = rowD[k] & 0xf;
 786          const int rowAg0 = (rowA[j] >> 4) & 0xf;
 787          const int rowAg1 = (rowA[k] >> 4) & 0xf;
 788          const int rowBg0 = (rowB[j] >> 4) & 0xf;
 789          const int rowBg1 = (rowB[k] >> 4) & 0xf;
 790          const int rowCg0 = (rowC[j] >> 4) & 0xf;
 791          const int rowCg1 = (rowC[k] >> 4) & 0xf;
 792          const int rowDg0 = (rowD[j] >> 4) & 0xf;
 793          const int rowDg1 = (rowD[k] >> 4) & 0xf;
 794          const int rowAb0 = (rowA[j] >> 8) & 0xf;
 795          const int rowAb1 = (rowA[k] >> 8) & 0xf;
 796          const int rowBb0 = (rowB[j] >> 8) & 0xf;
 797          const int rowBb1 = (rowB[k] >> 8) & 0xf;
 798          const int rowCb0 = (rowC[j] >> 8) & 0xf;
 799          const int rowCb1 = (rowC[k] >> 8) & 0xf;
 800          const int rowDb0 = (rowD[j] >> 8) & 0xf;
 801          const int rowDb1 = (rowD[k] >> 8) & 0xf;
 802          const int rowAa0 = (rowA[j] >> 12) & 0xf;
 803          const int rowAa1 = (rowA[k] >> 12) & 0xf;
 804          const int rowBa0 = (rowB[j] >> 12) & 0xf;
 805          const int rowBa1 = (rowB[k] >> 12) & 0xf;
 806          const int rowCa0 = (rowC[j] >> 12) & 0xf;
 807          const int rowCa1 = (rowC[k] >> 12) & 0xf;
 808          const int rowDa0 = (rowD[j] >> 12) & 0xf;
 809          const int rowDa1 = (rowD[k] >> 12) & 0xf;
 810          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 811                                        rowCr0, rowCr1, rowDr0, rowDr1);
 812          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 813                                        rowCg0, rowCg1, rowDg0, rowDg1);
 814          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 815                                        rowCb0, rowCb1, rowDb0, rowDb1);
 816          const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
 817                                        rowCa0, rowCa1, rowDa0, rowDa1);
 818
 819          dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
 820       }
 821    }
 822    else if ((datatype == DTYPE_USHORT_1_5_5_5_REV) && (comps == 4)) {
 823       DECLARE_ROW_POINTERS0(ushort);
 824
 825       for (i = j = 0, k = k0; i < (uint) dstWidth;
 826            i++, j += colStride, k += colStride) {
 827          const int rowAr0 = rowA[j] & 0x1f;
 828          const int rowAr1 = rowA[k] & 0x1f;
 829          const int rowBr0 = rowB[j] & 0x1f;
 830          const int rowBr1 = rowB[k] & 0x1f;
 831          const int rowCr0 = rowC[j] & 0x1f;
 832          const int rowCr1 = rowC[k] & 0x1f;
 833          const int rowDr0 = rowD[j] & 0x1f;
 834          const int rowDr1 = rowD[k] & 0x1f;
 835          const int rowAg0 = (rowA[j] >> 5) & 0x1f;
 836          const int rowAg1 = (rowA[k] >> 5) & 0x1f;
 837          const int rowBg0 = (rowB[j] >> 5) & 0x1f;
 838          const int rowBg1 = (rowB[k] >> 5) & 0x1f;
 839          const int rowCg0 = (rowC[j] >> 5) & 0x1f;
 840          const int rowCg1 = (rowC[k] >> 5) & 0x1f;
 841          const int rowDg0 = (rowD[j] >> 5) & 0x1f;
 842          const int rowDg1 = (rowD[k] >> 5) & 0x1f;
 843          const int rowAb0 = (rowA[j] >> 10) & 0x1f;
 844          const int rowAb1 = (rowA[k] >> 10) & 0x1f;
 845          const int rowBb0 = (rowB[j] >> 10) & 0x1f;
 846          const int rowBb1 = (rowB[k] >> 10) & 0x1f;
 847          const int rowCb0 = (rowC[j] >> 10) & 0x1f;
 848          const int rowCb1 = (rowC[k] >> 10) & 0x1f;
 849          const int rowDb0 = (rowD[j] >> 10) & 0x1f;
 850          const int rowDb1 = (rowD[k] >> 10) & 0x1f;
 851          const int rowAa0 = (rowA[j] >> 15) & 0x1;
 852          const int rowAa1 = (rowA[k] >> 15) & 0x1;
 853          const int rowBa0 = (rowB[j] >> 15) & 0x1;
 854          const int rowBa1 = (rowB[k] >> 15) & 0x1;
 855          const int rowCa0 = (rowC[j] >> 15) & 0x1;
 856          const int rowCa1 = (rowC[k] >> 15) & 0x1;
 857          const int rowDa0 = (rowD[j] >> 15) & 0x1;
 858          const int rowDa1 = (rowD[k] >> 15) & 0x1;
 859          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 860                                        rowCr0, rowCr1, rowDr0, rowDr1);
 861          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 862                                        rowCg0, rowCg1, rowDg0, rowDg1);
 863          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 864                                        rowCb0, rowCb1, rowDb0, rowDb1);
 865          const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
 866                                        rowCa0, rowCa1, rowDa0, rowDa1);
 867
 868          dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
 869       }
 870    }
 871    else if ((datatype == DTYPE_UBYTE_3_3_2) && (comps == 3)) {
 872       DECLARE_ROW_POINTERS0(ushort);
 873
 874       for (i = j = 0, k = k0; i < (uint) dstWidth;
 875            i++, j += colStride, k += colStride) {
 876          const int rowAr0 = rowA[j] & 0x3;
 877          const int rowAr1 = rowA[k] & 0x3;
 878          const int rowBr0 = rowB[j] & 0x3;
 879          const int rowBr1 = rowB[k] & 0x3;
 880          const int rowCr0 = rowC[j] & 0x3;
 881          const int rowCr1 = rowC[k] & 0x3;
 882          const int rowDr0 = rowD[j] & 0x3;
 883          const int rowDr1 = rowD[k] & 0x3;
 884          const int rowAg0 = (rowA[j] >> 2) & 0x7;
 885          const int rowAg1 = (rowA[k] >> 2) & 0x7;
 886          const int rowBg0 = (rowB[j] >> 2) & 0x7;
 887          const int rowBg1 = (rowB[k] >> 2) & 0x7;
 888          const int rowCg0 = (rowC[j] >> 2) & 0x7;
 889          const int rowCg1 = (rowC[k] >> 2) & 0x7;
 890          const int rowDg0 = (rowD[j] >> 2) & 0x7;
 891          const int rowDg1 = (rowD[k] >> 2) & 0x7;
 892          const int rowAb0 = (rowA[j] >> 5) & 0x7;
 893          const int rowAb1 = (rowA[k] >> 5) & 0x7;
 894          const int rowBb0 = (rowB[j] >> 5) & 0x7;
 895          const int rowBb1 = (rowB[k] >> 5) & 0x7;
 896          const int rowCb0 = (rowC[j] >> 5) & 0x7;
 897          const int rowCb1 = (rowC[k] >> 5) & 0x7;
 898          const int rowDb0 = (rowD[j] >> 5) & 0x7;
 899          const int rowDb1 = (rowD[k] >> 5) & 0x7;
 900          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
 901                                        rowCr0, rowCr1, rowDr0, rowDr1);
 902          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
 903                                        rowCg0, rowCg1, rowDg0, rowDg1);
 904          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
 905                                        rowCb0, rowCb1, rowDb0, rowDb1);
 906          dst[i] = (b << 5) | (g << 2) | r;
 907       }
 908    }
 909    else {
 910       debug_printf("bad format in do_row_3D()");
 911    }
 912 }
 913
 914
 915
 916 static void
 917 format_to_type_comps(enum pipe_format pformat,
 918                      enum dtype *datatype, uint *comps)
 919 {
 920    /* XXX I think this could be implemented in terms of the pf_*() functions */
 921    switch (pformat) {
 922    case PIPE_FORMAT_A8R8G8B8_UNORM:
 923    case PIPE_FORMAT_X8R8G8B8_UNORM:
 924    case PIPE_FORMAT_B8G8R8A8_UNORM:
 925    case PIPE_FORMAT_B8G8R8X8_UNORM:
 926    case PIPE_FORMAT_R8G8B8A8_SRGB:
 927    case PIPE_FORMAT_R8G8B8X8_SRGB:
 928    case PIPE_FORMAT_A8R8G8B8_SRGB:
 929    case PIPE_FORMAT_X8R8G8B8_SRGB:
 930    case PIPE_FORMAT_B8G8R8A8_SRGB:
 931    case PIPE_FORMAT_B8G8R8X8_SRGB:
 932    case PIPE_FORMAT_R8G8B8_SRGB:
 933       *datatype = DTYPE_UBYTE;
 934       *comps = 4;
 935       return;
 936    case PIPE_FORMAT_A1R5G5B5_UNORM:
 937       *datatype = DTYPE_USHORT_1_5_5_5_REV;
 938       *comps = 4;
 939       return;
 940    case PIPE_FORMAT_A4R4G4B4_UNORM:
 941       *datatype = DTYPE_USHORT_4_4_4_4;
 942       *comps = 4;
 943       return;
 944    case PIPE_FORMAT_R5G6B5_UNORM:
 945       *datatype = DTYPE_USHORT_5_6_5;
 946       *comps = 3;
 947       return;
 948    case PIPE_FORMAT_L8_UNORM:
 949    case PIPE_FORMAT_L8_SRGB:
 950    case PIPE_FORMAT_A8_UNORM:
 951    case PIPE_FORMAT_I8_UNORM:
 952       *datatype = DTYPE_UBYTE;
 953       *comps = 1;
 954       return;
 955    case PIPE_FORMAT_A8L8_UNORM:
 956    case PIPE_FORMAT_A8L8_SRGB:
 957       *datatype = DTYPE_UBYTE;
 958       *comps = 2;
 959       return;
 960    default:
 961       assert(0);
 962       *datatype = DTYPE_UBYTE;
 963       *comps = 0;
 964       break;
 965    }
 966 }
 967
 968
 969 static void
 970 reduce_1d(enum pipe_format pformat,
 971           int srcWidth, const ubyte *srcPtr,
 972           int dstWidth, ubyte *dstPtr)
 973 {
 974    enum dtype datatype;
 975    uint comps;
 976
 977    format_to_type_comps(pformat, &datatype, &comps);
 978
 979    /* we just duplicate the input row, kind of hack, saves code */
 980    do_row(datatype, comps,
 981           srcWidth, srcPtr, srcPtr,
 982           dstWidth, dstPtr);
 983 }
 984
 985
 986 /**
 987  * Strides are in bytes.  If zero, it'll be computed as width * bpp.
 988  */
 989 static void
 990 reduce_2d(enum pipe_format pformat,
 991           int srcWidth, int srcHeight,
 992           int srcRowStride, const ubyte *srcPtr,
 993           int dstWidth, int dstHeight,
 994           int dstRowStride, ubyte *dstPtr)
 995 {
 996    enum dtype datatype;
 997    uint comps;
 998    const int bpt = pf_get_size(pformat);
 999    const ubyte *srcA, *srcB;
1000    ubyte *dst;
1001    int row;
1002
1003    format_to_type_comps(pformat, &datatype, &comps);
1004
1005    if (!srcRowStride)
1006       srcRowStride = bpt * srcWidth;
1007
1008    if (!dstRowStride)
1009       dstRowStride = bpt * dstWidth;
1010
1011    /* Compute src and dst pointers */
1012    srcA = srcPtr;
1013    if (srcHeight > 1)
1014       srcB = srcA + srcRowStride;
1015    else
1016       srcB = srcA;
1017    dst = dstPtr;
1018
1019    for (row = 0; row < dstHeight; row++) {
1020       do_row(datatype, comps,
1021              srcWidth, srcA, srcB,
1022              dstWidth, dst);
1023       srcA += 2 * srcRowStride;
1024       srcB += 2 * srcRowStride;
1025       dst += dstRowStride;
1026    }
1027 }
1028
1029
1030 static void
1031 reduce_3d(enum pipe_format pformat,
1032           int srcWidth, int srcHeight, int srcDepth,
1033           int srcRowStride, const ubyte *srcPtr,
1034           int dstWidth, int dstHeight, int dstDepth,
1035           int dstRowStride, ubyte *dstPtr)
1036 {
1037    const int bpt = pf_get_size(pformat);
1038    const int border = 0;
1039    int img, row;
1040    int bytesPerSrcImage, bytesPerDstImage;
1041    int bytesPerSrcRow, bytesPerDstRow;
1042    int srcImageOffset, srcRowOffset;
1043    enum dtype datatype;
1044    uint comps;
1045
1046    format_to_type_comps(pformat, &datatype, &comps);
1047
1048    bytesPerSrcImage = srcWidth * srcHeight * bpt;
1049    bytesPerDstImage = dstWidth * dstHeight * bpt;
1050
1051    bytesPerSrcRow = srcWidth * bpt;
1052    bytesPerDstRow = dstWidth * bpt;
1053
1054    /* Offset between adjacent src images to be averaged together */
1055    srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage;
1056
1057    /* Offset between adjacent src rows to be averaged together */
1058    srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt;
1059
1060    /*
1061     * Need to average together up to 8 src pixels for each dest pixel.
1062     * Break that down into 3 operations:
1063     *   1. take two rows from source image and average them together.
1064     *   2. take two rows from next source image and average them together.
1065     *   3. take the two averaged rows and average them for the final dst row.
1066     */
1067
1068    /*
1069    _mesa_printf("mip3d %d x %d x %d  ->  %d x %d x %d\n",
1070           srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
1071    */
1072
1073    for (img = 0; img < dstDepth; img++) {
1074       /* first source image pointer, skipping border */
1075       const ubyte *imgSrcA = srcPtr
1076          + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border
1077          + img * (bytesPerSrcImage + srcImageOffset);
1078       /* second source image pointer, skipping border */
1079       const ubyte *imgSrcB = imgSrcA + srcImageOffset;
1080       /* address of the dest image, skipping border */
1081       ubyte *imgDst = dstPtr
1082          + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border
1083          + img * bytesPerDstImage;
1084
1085       /* setup the four source row pointers and the dest row pointer */
1086       const ubyte *srcImgARowA = imgSrcA;
1087       const ubyte *srcImgARowB = imgSrcA + srcRowOffset;
1088       const ubyte *srcImgBRowA = imgSrcB;
1089       const ubyte *srcImgBRowB = imgSrcB + srcRowOffset;
1090       ubyte *dstImgRow = imgDst;
1091
1092       for (row = 0; row < dstHeight; row++) {
1093          do_row_3D(datatype, comps, srcWidth,
1094                    srcImgARowA, srcImgARowB,
1095                    srcImgBRowA, srcImgBRowB,
1096                    dstWidth, dstImgRow);
1097
1098          /* advance to next rows */
1099          srcImgARowA += bytesPerSrcRow + srcRowOffset;
1100          srcImgARowB += bytesPerSrcRow + srcRowOffset;
1101          srcImgBRowA += bytesPerSrcRow + srcRowOffset;
1102          srcImgBRowB += bytesPerSrcRow + srcRowOffset;
1103          dstImgRow += bytesPerDstRow;
1104       }
1105    }
1106 }
1107
1108
1109
1110
1111 static void
1112 make_1d_mipmap(struct gen_mipmap_state *ctx,
1113                struct pipe_texture *pt,
1114                uint face, uint baseLevel, uint lastLevel)
1115 {
1116    struct pipe_context *pipe = ctx->pipe;
1117    struct pipe_screen *screen = pipe->screen;
1118    const uint zslice = 0;
1119    uint dstLevel;
1120
1121    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1122       const uint srcLevel = dstLevel - 1;
1123       struct pipe_transfer *srcTrans, *dstTrans;
1124       void *srcMap, *dstMap;
1125
1126       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1127                                           PIPE_TRANSFER_READ, 0, 0,
1128                                           pt->width[srcLevel],
1129                                           pt->height[srcLevel]);
1130       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1131                                           PIPE_TRANSFER_WRITE, 0, 0,
1132                                           pt->width[dstLevel],
1133                                           pt->height[dstLevel]);
1134
1135       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1136       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1137
1138       reduce_1d(pt->format,
1139                 srcTrans->width, srcMap,
1140                 dstTrans->width, dstMap);
1141
1142       screen->transfer_unmap(screen, srcTrans);
1143       screen->transfer_unmap(screen, dstTrans);
1144
1145       screen->tex_transfer_destroy(srcTrans);
1146       screen->tex_transfer_destroy(dstTrans);
1147    }
1148 }
1149
1150
1151 static void
1152 make_2d_mipmap(struct gen_mipmap_state *ctx,
1153                struct pipe_texture *pt,
1154                uint face, uint baseLevel, uint lastLevel)
1155 {
1156    struct pipe_context *pipe = ctx->pipe;
1157    struct pipe_screen *screen = pipe->screen;
1158    const uint zslice = 0;
1159    uint dstLevel;
1160
1161    assert(pt->block.width == 1);
1162    assert(pt->block.height == 1);
1163
1164    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1165       const uint srcLevel = dstLevel - 1;
1166       struct pipe_transfer *srcTrans, *dstTrans;
1167       ubyte *srcMap, *dstMap;
1168
1169       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1170                                           PIPE_TRANSFER_READ, 0, 0,
1171                                           pt->width[srcLevel],
1172                                           pt->height[srcLevel]);
1173       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1174                                           PIPE_TRANSFER_WRITE, 0, 0,
1175                                           pt->width[dstLevel],
1176                                           pt->height[dstLevel]);
1177
1178       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1179       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1180
1181       reduce_2d(pt->format,
1182                 srcTrans->width, srcTrans->height,
1183                 srcTrans->stride, srcMap,
1184                 dstTrans->width, dstTrans->height,
1185                 dstTrans->stride, dstMap);
1186
1187       screen->transfer_unmap(screen, srcTrans);
1188       screen->transfer_unmap(screen, dstTrans);
1189
1190       screen->tex_transfer_destroy(srcTrans);
1191       screen->tex_transfer_destroy(dstTrans);
1192    }
1193 }
1194
1195
1196 static void
1197 make_3d_mipmap(struct gen_mipmap_state *ctx,
1198                struct pipe_texture *pt,
1199                uint face, uint baseLevel, uint lastLevel)
1200 {
1201 #if 0
1202    struct pipe_context *pipe = ctx->pipe;
1203    struct pipe_screen *screen = pipe->screen;
1204    uint dstLevel, zslice = 0;
1205
1206    assert(pt->block.width == 1);
1207    assert(pt->block.height == 1);
1208
1209    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1210       const uint srcLevel = dstLevel - 1;
1211       struct pipe_transfer *srcTrans, *dstTrans;
1212       ubyte *srcMap, *dstMap;
1213
1214       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
1215                                           PIPE_TRANSFER_READ, 0, 0,
1216                                           pt->width[srcLevel],
1217                                           pt->height[srcLevel]);
1218       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
1219                                           PIPE_TRANSFER_WRITE, 0, 0,
1220                                           pt->width[dstLevel],
1221                                           pt->height[dstLevel]);
1222
1223       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
1224       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
1225
1226       reduce_3d(pt->format,
1227                 srcTrans->width, srcTrans->height,
1228                 srcTrans->stride, srcMap,
1229                 dstTrans->width, dstTrans->height,
1230                 dstTrans->stride, dstMap);
1231
1232       screen->transfer_unmap(screen, srcTrans);
1233       screen->transfer_unmap(screen, dstTrans);
1234
1235       screen->tex_transfer_destroy(srcTrans);
1236       screen->tex_transfer_destroy(dstTrans);
1237    }
1238 #else
1239    (void) reduce_3d;
1240 #endif
1241 }
1242
1243
1244 static void
1245 fallback_gen_mipmap(struct gen_mipmap_state *ctx,
1246                     struct pipe_texture *pt,
1247                     uint face, uint baseLevel, uint lastLevel)
1248 {
1249    switch (pt->target) {
1250    case PIPE_TEXTURE_1D:
1251       make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1252       break;
1253    case PIPE_TEXTURE_2D:
1254    case PIPE_TEXTURE_CUBE:
1255       make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1256       break;
1257    case PIPE_TEXTURE_3D:
1258       make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel);
1259       break;
1260    default:
1261       assert(0);
1262    }
1263 }
1264
1265
1266 /**
1267  * Create a mipmap generation context.
1268  * The idea is to create one of these and re-use it each time we need to
1269  * generate a mipmap.
1270  */
1271 struct gen_mipmap_state *
1272 util_create_gen_mipmap(struct pipe_context *pipe,
1273                        struct cso_context *cso)
1274 {
1275    struct gen_mipmap_state *ctx;
1276    uint i;
1277
1278    ctx = CALLOC_STRUCT(gen_mipmap_state);
1279    if (!ctx)
1280       return NULL;
1281
1282    ctx->pipe = pipe;
1283    ctx->cso = cso;
1284
1285    /* disabled blending/masking */
1286    memset(&ctx->blend, 0, sizeof(ctx->blend));
1287    ctx->blend.colormask = PIPE_MASK_RGBA;
1288
1289    /* no-op depth/stencil/alpha */
1290    memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil));
1291
1292    /* rasterizer */
1293    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
1294    ctx->rasterizer.front_winding = PIPE_WINDING_CW;
1295    ctx->rasterizer.cull_mode = PIPE_WINDING_NONE;
1296    ctx->rasterizer.bypass_vs_clip_and_viewport = 1;
1297    ctx->rasterizer.gl_rasterization_rules = 1;
1298
1299    /* sampler state */
1300    memset(&ctx->sampler, 0, sizeof(ctx->sampler));
1301    ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1302    ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1303    ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
1304    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
1305    ctx->sampler.normalized_coords = 1;
1306
1307    /* vertex shader - still needed to specify mapping from fragment
1308     * shader input semantics to vertex elements
1309     */
1310    {
1311       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
1312                                       TGSI_SEMANTIC_GENERIC };
1313       const uint semantic_indexes[] = { 0, 0 };
1314       ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names,
1315                                                     semantic_indexes);
1316    }
1317
1318    /* fragment shader */
1319    ctx->fs = util_make_fragment_tex_shader(pipe);
1320
1321    /* vertex data that doesn't change */
1322    for (i = 0; i < 4; i++) {
1323       ctx->vertices[i][0][2] = 0.0f; /* z */
1324       ctx->vertices[i][0][3] = 1.0f; /* w */
1325       ctx->vertices[i][1][3] = 1.0f; /* q */
1326    }
1327
1328    /* Note: the actual vertex buffer is allocated as needed below */
1329
1330    return ctx;
1331 }
1332
1333
1334 /**
1335  * Get next "slot" of vertex space in the vertex buffer.
1336  * We're allocating one large vertex buffer and using it piece by piece.
1337  */
1338 static unsigned
1339 get_next_slot(struct gen_mipmap_state *ctx)
1340 {
1341    const unsigned max_slots = 4096 / sizeof ctx->vertices;
1342
1343    if (ctx->vbuf_slot >= max_slots)
1344       util_gen_mipmap_flush( ctx );
1345
1346    if (!ctx->vbuf) {
1347       ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
1348                                      32,
1349                                      PIPE_BUFFER_USAGE_VERTEX,
1350                                      max_slots * sizeof ctx->vertices);
1351    }
1352
1353    return ctx->vbuf_slot++ * sizeof ctx->vertices;
1354 }
1355
1356
1357 static unsigned
1358 set_vertex_data(struct gen_mipmap_state *ctx,
1359                 enum pipe_texture_target tex_target,
1360                 uint face, float width, float height)
1361 {
1362    unsigned offset;
1363
1364    /* vert[0].position */
1365    ctx->vertices[0][0][0] = 0.0f; /*x*/
1366    ctx->vertices[0][0][1] = 0.0f; /*y*/
1367
1368    /* vert[1].position */
1369    ctx->vertices[1][0][0] = width;
1370    ctx->vertices[1][0][1] = 0.0f;
1371
1372    /* vert[2].position */
1373    ctx->vertices[2][0][0] = width;
1374    ctx->vertices[2][0][1] = height;
1375
1376    /* vert[3].position */
1377    ctx->vertices[3][0][0] = 0.0f;
1378    ctx->vertices[3][0][1] = height;
1379
1380    /* Setup vertex texcoords.  This is a little tricky for cube maps. */
1381    if (tex_target == PIPE_TEXTURE_CUBE) {
1382       static const float st[4][2] = {
1383          {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
1384       };
1385       float rx, ry, rz;
1386       uint i;
1387
1388       /* loop over quad verts */
1389       for (i = 0; i < 4; i++) {
1390          /* Compute sc = +/-scale and tc = +/-scale.
1391           * Not +/-1 to avoid cube face selection ambiguity near the edges,
1392           * though that can still sometimes happen with this scale factor...
1393           */
1394          const float scale = 0.9999f;
1395          const float sc = (2.0f * st[i][0] - 1.0f) * scale;
1396          const float tc = (2.0f * st[i][1] - 1.0f) * scale;
1397
1398          switch (face) {
1399          case PIPE_TEX_FACE_POS_X:
1400             rx = 1.0f;
1401             ry = -tc;
1402             rz = -sc;
1403             break;
1404          case PIPE_TEX_FACE_NEG_X:
1405             rx = -1.0f;
1406             ry = -tc;
1407             rz = sc;
1408             break;
1409          case PIPE_TEX_FACE_POS_Y:
1410             rx = sc;
1411             ry = 1.0f;
1412             rz = tc;
1413             break;
1414          case PIPE_TEX_FACE_NEG_Y:
1415             rx = sc;
1416             ry = -1.0f;
1417             rz = -tc;
1418             break;
1419          case PIPE_TEX_FACE_POS_Z:
1420             rx = sc;
1421             ry = -tc;
1422             rz = 1.0f;
1423             break;
1424          case PIPE_TEX_FACE_NEG_Z:
1425             rx = -sc;
1426             ry = -tc;
1427             rz = -1.0f;
1428             break;
1429          default:
1430             rx = ry = rz = 0.0f;
1431             assert(0);
1432          }
1433
1434          ctx->vertices[i][1][0] = rx; /*s*/
1435          ctx->vertices[i][1][1] = ry; /*t*/
1436          ctx->vertices[i][1][2] = rz; /*r*/
1437       }
1438    }
1439    else {
1440       /* 1D/2D */
1441       ctx->vertices[0][1][0] = 0.0f; /*s*/
1442       ctx->vertices[0][1][1] = 0.0f; /*t*/
1443       ctx->vertices[0][1][2] = 0.0f; /*r*/
1444
1445       ctx->vertices[1][1][0] = 1.0f;
1446       ctx->vertices[1][1][1] = 0.0f;
1447       ctx->vertices[1][1][2] = 0.0f;
1448
1449       ctx->vertices[2][1][0] = 1.0f;
1450       ctx->vertices[2][1][1] = 1.0f;
1451       ctx->vertices[2][1][2] = 0.0f;
1452
1453       ctx->vertices[3][1][0] = 0.0f;
1454       ctx->vertices[3][1][1] = 1.0f;
1455       ctx->vertices[3][1][2] = 0.0f;
1456    }
1457
1458    offset = get_next_slot( ctx );
1459
1460    pipe_buffer_write(ctx->pipe->screen, ctx->vbuf,
1461                      offset, sizeof(ctx->vertices), ctx->vertices);
1462
1463    return offset;
1464 }
1465
1466
1467
1468 /**
1469  * Destroy a mipmap generation context
1470  */
1471 void
1472 util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
1473 {
1474    struct pipe_context *pipe = ctx->pipe;
1475
1476    pipe->delete_vs_state(pipe, ctx->vs);
1477    pipe->delete_fs_state(pipe, ctx->fs);
1478
1479    pipe_buffer_reference(&ctx->vbuf, NULL);
1480
1481    FREE(ctx);
1482 }
1483
1484
1485
1486 /* Release vertex buffer at end of frame to avoid synchronous
1487  * rendering.
1488  */
1489 void util_gen_mipmap_flush( struct gen_mipmap_state *ctx )
1490 {
1491    pipe_buffer_reference(&ctx->vbuf, NULL);
1492    ctx->vbuf_slot = 0;
1493 }
1494
1495
1496 /**
1497  * Generate mipmap images.  It's assumed all needed texture memory is
1498  * already allocated.
1499  *
1500  * \param pt  the texture to generate mipmap levels for
1501  * \param face  which cube face to generate mipmaps for (0 for non-cube maps)
1502  * \param baseLevel  the first mipmap level to use as a src
1503  * \param lastLevel  the last mipmap level to generate
1504  * \param filter  the minification filter used to generate mipmap levels with
1505  * \param filter  one of PIPE_TEX_FILTER_LINEAR, PIPE_TEX_FILTER_NEAREST
1506  */
1507 void
1508 util_gen_mipmap(struct gen_mipmap_state *ctx,
1509                 struct pipe_texture *pt,
1510                 uint face, uint baseLevel, uint lastLevel, uint filter)
1511 {
1512    struct pipe_context *pipe = ctx->pipe;
1513    struct pipe_screen *screen = pipe->screen;
1514    struct pipe_framebuffer_state fb;
1515    uint dstLevel;
1516    uint zslice = 0;
1517    uint offset;
1518
1519    /* The texture object should have room for the levels which we're
1520     * about to generate.
1521     */
1522    assert(lastLevel <= pt->last_level);
1523
1524    /* If this fails, why are we here? */
1525    assert(lastLevel > baseLevel);
1526
1527    assert(filter == PIPE_TEX_FILTER_LINEAR ||
1528           filter == PIPE_TEX_FILTER_NEAREST);
1529
1530    /* check if we can render in the texture's format */
1531    if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D,
1532                                     PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
1533       fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
1534       return;
1535    }
1536
1537    /* save state (restored below) */
1538    cso_save_blend(ctx->cso);
1539    cso_save_depth_stencil_alpha(ctx->cso);
1540    cso_save_rasterizer(ctx->cso);
1541    cso_save_samplers(ctx->cso);
1542    cso_save_sampler_textures(ctx->cso);
1543    cso_save_framebuffer(ctx->cso);
1544    cso_save_fragment_shader(ctx->cso);
1545    cso_save_vertex_shader(ctx->cso);
1546
1547    /* bind our state */
1548    cso_set_blend(ctx->cso, &ctx->blend);
1549    cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil);
1550    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
1551
1552    cso_set_fragment_shader_handle(ctx->cso, ctx->fs);
1553    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
1554
1555    /* init framebuffer state */
1556    memset(&fb, 0, sizeof(fb));
1557    fb.nr_cbufs = 1;
1558
1559    /* set min/mag to same filter for faster sw speed */
1560    ctx->sampler.mag_img_filter = filter;
1561    ctx->sampler.min_img_filter = filter;
1562
1563    /*
1564     * XXX for small mipmap levels, it may be faster to use the software
1565     * fallback path...
1566     */
1567    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
1568       const uint srcLevel = dstLevel - 1;
1569
1570       struct pipe_surface *surf =
1571          screen->get_tex_surface(screen, pt, face, dstLevel, zslice,
1572                                  PIPE_BUFFER_USAGE_GPU_WRITE);
1573
1574       /*
1575        * Setup framebuffer / dest surface
1576        */
1577       fb.cbufs[0] = surf;
1578       fb.width = pt->width[dstLevel];
1579       fb.height = pt->height[dstLevel];
1580       cso_set_framebuffer(ctx->cso, &fb);
1581
1582       /*
1583        * Setup sampler state
1584        * Note: we should only have to set the min/max LOD clamps to ensure
1585        * we grab texels from the right mipmap level.  But some hardware
1586        * has trouble with min clamping so we also set the lod_bias to
1587        * try to work around that.
1588        */
1589       ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel;
1590       ctx->sampler.lod_bias = (float) srcLevel;
1591       cso_single_sampler(ctx->cso, 0, &ctx->sampler);
1592       cso_single_sampler_done(ctx->cso);
1593
1594       cso_set_sampler_textures(ctx->cso, 1, &pt);
1595
1596       /* quad coords in window coords (bypassing vs, clip and viewport) */
1597       offset = set_vertex_data(ctx,
1598                                pt->target,
1599                                face,
1600                                (float) pt->width[dstLevel],
1601                                (float) pt->height[dstLevel]);
1602
1603       util_draw_vertex_buffer(ctx->pipe,
1604                               ctx->vbuf,
1605                               offset,
1606                               PIPE_PRIM_TRIANGLE_FAN,
1607                               4,  /* verts */
1608                               2); /* attribs/vert */
1609
1610       pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
1611
1612       /* need to signal that the texture has changed _after_ rendering to it */
1613       pipe_surface_reference( &surf, NULL );
1614    }
1615
1616    /* restore state we changed */
1617    cso_restore_blend(ctx->cso);
1618    cso_restore_depth_stencil_alpha(ctx->cso);
1619    cso_restore_rasterizer(ctx->cso);
1620    cso_restore_samplers(ctx->cso);
1621    cso_restore_sampler_textures(ctx->cso);
1622    cso_restore_framebuffer(ctx->cso);
1623    cso_restore_fragment_shader(ctx->cso);
1624    cso_restore_vertex_shader(ctx->cso);
1625 }