src/gallium/auxiliary/gallivm/lp_bld_quad.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  * The above copyright notice and this permission notice (including the
  23  * next paragraph) shall be included in all copies or substantial portions
  24  * of the Software.
  25  *
  26  **************************************************************************/
  27
  28
  29 #include "u_cpu_detect.h"
  30 #include "lp_bld_type.h"
  31 #include "lp_bld_arit.h"
  32 #include "lp_bld_const.h"
  33 #include "lp_bld_swizzle.h"
  34 #include "lp_bld_quad.h"
  35
  36
  37 static const unsigned char
  38 swizzle_left[4] = {
  39    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_LEFT,
  40    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_LEFT
  41 };
  42
  43 static const unsigned char
  44 swizzle_right[4] = {
  45    LP_BLD_QUAD_TOP_RIGHT,    LP_BLD_QUAD_TOP_RIGHT,
  46    LP_BLD_QUAD_BOTTOM_RIGHT, LP_BLD_QUAD_BOTTOM_RIGHT
  47 };
  48
  49 static const unsigned char
  50 swizzle_top[4] = {
  51    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT,
  52    LP_BLD_QUAD_TOP_LEFT,     LP_BLD_QUAD_TOP_RIGHT
  53 };
  54
  55 static const unsigned char
  56 swizzle_bottom[4] = {
  57    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT,
  58    LP_BLD_QUAD_BOTTOM_LEFT,  LP_BLD_QUAD_BOTTOM_RIGHT
  59 };
  60
  61
  62 LLVMValueRef
  63 lp_build_ddx(struct lp_build_context *bld,
  64              LLVMValueRef a)
  65 {
  66    LLVMValueRef a_left  = lp_build_swizzle_aos(bld, a, swizzle_left);
  67    LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
  68    return lp_build_sub(bld, a_right, a_left);
  69 }
  70
  71
  72 LLVMValueRef
  73 lp_build_ddy(struct lp_build_context *bld,
  74              LLVMValueRef a)
  75 {
  76    LLVMValueRef a_top    = lp_build_swizzle_aos(bld, a, swizzle_top);
  77    LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
  78    return lp_build_sub(bld, a_bottom, a_top);
  79 }
  80
  81 /*
  82  * To be able to handle multiple quads at once in texture sampling and
  83  * do lod calculations per quad, it is necessary to get the per-quad
  84  * derivatives into the lp_build_rho function.
  85  * For 8-wide vectors the packed derivative values for 3 coords would
  86  * look like this, this scales to a arbitrary (multiple of 4) vector size:
  87  * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
  88  * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
  89  * The second vector will be unused for 1d and 2d textures.
  90  */
  91 LLVMValueRef
  92 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
  93                                  LLVMValueRef a)
  94 {
  95    struct gallivm_state *gallivm = bld->gallivm;
  96    LLVMBuilderRef builder = gallivm->builder;
  97    LLVMValueRef vec1, vec2;
  98
  99    /* same packing as _twocoord, but can use aos swizzle helper */
 100
 101    /*
 102     * XXX could make swizzle1 a noop swizzle by using right top/bottom
 103     * pair for ddy
 104     */
 105    static const unsigned char swizzle1[] = {
 106       LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
 107       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
 108    };
 109    static const unsigned char swizzle2[] = {
 110       LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
 111       LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
 112    };
 113
 114    vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
 115    vec2 = lp_build_swizzle_aos(bld, a, swizzle2);
 116
 117    if (bld->type.floating)
 118       return LLVMBuildFSub(builder, vec2, vec1, "ddxddy");
 119    else
 120       return LLVMBuildSub(builder, vec2, vec1, "ddxddy");
 121 }
 122
 123
 124 LLVMValueRef
 125 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
 126                                  LLVMValueRef a, LLVMValueRef b)
 127 {
 128    struct gallivm_state *gallivm = bld->gallivm;
 129    LLVMBuilderRef builder = gallivm->builder;
 130    LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH/4];
 131    LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH/4];
 132    LLVMValueRef vec1, vec2;
 133    unsigned length, num_quads, i;
 134
 135    /* XXX: do hsub version */
 136    length = bld->type.length;
 137    num_quads = length / 4;
 138    for (i = 0; i < num_quads; i++) {
 139       unsigned s1 = 4 * i;
 140       unsigned s2 = 4 * i + length;
 141       shuffles1[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
 142       shuffles1[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s1);
 143       shuffles1[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
 144       shuffles1[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_LEFT + s2);
 145       shuffles2[4*i + 0] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s1);
 146       shuffles2[4*i + 1] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s1);
 147       shuffles2[4*i + 2] = lp_build_const_int32(gallivm, LP_BLD_QUAD_TOP_RIGHT + s2);
 148       shuffles2[4*i + 3] = lp_build_const_int32(gallivm, LP_BLD_QUAD_BOTTOM_LEFT + s2);
 149    }
 150    vec1 = LLVMBuildShuffleVector(builder, a, b,
 151                                  LLVMConstVector(shuffles1, length), "");
 152    vec2 = LLVMBuildShuffleVector(builder, a, b,
 153                                  LLVMConstVector(shuffles2, length), "");
 154    if (bld->type.floating)
 155       return LLVMBuildFSub(builder, vec2, vec1, "ddxddyddxddy");
 156    else
 157       return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy");
 158 }
 159