src/gallium/drivers/llvmpipe/lp_setup_coef.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010, VMware.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Binning code for triangles
  30  */
  31
  32 #include "util/u_math.h"
  33 #include "util/u_memory.h"
  34 #include "lp_perf.h"
  35 #include "lp_setup_context.h"
  36 #include "lp_setup_coef.h"
  37 #include "lp_rast.h"
  38 #include "lp_state_fs.h"
  39
  40 #if !defined(PIPE_ARCH_SSE)
  41
  42 /**
  43  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  44  */
  45 static void constant_coef( struct lp_rast_shader_inputs *inputs,
  46                            unsigned slot,
  47                            const float value,
  48                            unsigned i )
  49 {
  50    inputs->a0[slot][i] = value;
  51    inputs->dadx[slot][i] = 0.0f;
  52    inputs->dady[slot][i] = 0.0f;
  53 }
  54
  55
  56
  57 static void linear_coef( struct lp_rast_shader_inputs *inputs,
  58                          const struct lp_tri_info *info,
  59                          unsigned slot,
  60                          unsigned vert_attr,
  61                          unsigned i)
  62 {
  63    float a0 = info->v0[vert_attr][i];
  64    float a1 = info->v1[vert_attr][i];
  65    float a2 = info->v2[vert_attr][i];
  66
  67    float da01 = a0 - a1;
  68    float da20 = a2 - a0;
  69    float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
  70    float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
  71
  72    inputs->dadx[slot][i] = dadx;
  73    inputs->dady[slot][i] = dady;
  74
  75    /* calculate a0 as the value which would be sampled for the
  76     * fragment at (0,0), taking into account that we want to sample at
  77     * pixel centers, in other words (0.5, 0.5).
  78     *
  79     * this is neat but unfortunately not a good way to do things for
  80     * triangles with very large values of dadx or dady as it will
  81     * result in the subtraction and re-addition from a0 of a very
  82     * large number, which means we'll end up loosing a lot of the
  83     * fractional bits and precision from a0.  the way to fix this is
  84     * to define a0 as the sample at a pixel center somewhere near vmin
  85     * instead - i'll switch to this later.
  86     */
  87    inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
  88                                    dady * info->y0_center);
  89 }
  90
  91
  92 /**
  93  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  94  * for a triangle.
  95  * We basically multiply the vertex value by 1/w before computing
  96  * the plane coefficients (a0, dadx, dady).
  97  * Later, when we compute the value at a particular fragment position we'll
  98  * divide the interpolated value by the interpolated W at that fragment.
  99  */
 100 static void perspective_coef( struct lp_rast_shader_inputs *inputs,
 101                               const struct lp_tri_info *info,
 102                               unsigned slot,
 103                               unsigned vert_attr,
 104                               unsigned i)
 105 {
 106    /* premultiply by 1/w  (v[0][3] is always 1/w):
 107     */
 108    float a0 = info->v0[vert_attr][i] * info->v0[0][3];
 109    float a1 = info->v1[vert_attr][i] * info->v1[0][3];
 110    float a2 = info->v2[vert_attr][i] * info->v2[0][3];
 111    float da01 = a0 - a1;
 112    float da20 = a2 - a0;
 113    float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
 114    float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
 115
 116    inputs->dadx[slot][i] = dadx;
 117    inputs->dady[slot][i] = dady;
 118    inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
 119                                    dady * info->y0_center);
 120 }
 121
 122
 123 /**
 124  * Special coefficient setup for gl_FragCoord.
 125  * X and Y are trivial
 126  * Z and W are copied from position_coef which should have already been computed.
 127  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 128  */
 129 static void
 130 setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
 131                      const struct lp_tri_info *info,
 132                      unsigned slot,
 133                      unsigned usage_mask)
 134 {
 135    /*X*/
 136    if (usage_mask & TGSI_WRITEMASK_X) {
 137       inputs->a0[slot][0] = 0.0;
 138       inputs->dadx[slot][0] = 1.0;
 139       inputs->dady[slot][0] = 0.0;
 140    }
 141
 142    /*Y*/
 143    if (usage_mask & TGSI_WRITEMASK_Y) {
 144       inputs->a0[slot][1] = 0.0;
 145       inputs->dadx[slot][1] = 0.0;
 146       inputs->dady[slot][1] = 1.0;
 147    }
 148
 149    /*Z*/
 150    if (usage_mask & TGSI_WRITEMASK_Z) {
 151       linear_coef(inputs, info, slot, 0, 2);
 152    }
 153
 154    /*W*/
 155    if (usage_mask & TGSI_WRITEMASK_W) {
 156       linear_coef(inputs, info, slot, 0, 3);
 157    }
 158 }
 159
 160
 161 /**
 162  * Setup the fragment input attribute with the front-facing value.
 163  * \param frontface  is the triangle front facing?
 164  */
 165 static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
 166                                unsigned slot,
 167                                boolean frontface,
 168                                unsigned usage_mask)
 169 {
 170    /* convert TRUE to 1.0 and FALSE to -1.0 */
 171    if (usage_mask & TGSI_WRITEMASK_X)
 172       constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
 173
 174    if (usage_mask & TGSI_WRITEMASK_Y)
 175       constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
 176
 177    if (usage_mask & TGSI_WRITEMASK_Z)
 178       constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
 179
 180    if (usage_mask & TGSI_WRITEMASK_W)
 181       constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
 182 }
 183
 184
 185 /**
 186  * Compute the tri->coef[] array dadx, dady, a0 values.
 187  */
 188 void lp_setup_tri_coef( struct lp_setup_context *setup,
 189                         struct lp_rast_shader_inputs *inputs,
 190                         const struct lp_tri_info *info)
 191 {
 192    unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
 193    unsigned slot;
 194    unsigned i;
 195
 196    /* setup interpolation for all the remaining attributes:
 197     */
 198    for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
 199       unsigned vert_attr = setup->fs.input[slot].src_index;
 200       unsigned usage_mask = setup->fs.input[slot].usage_mask;
 201
 202       switch (setup->fs.input[slot].interp) {
 203       case LP_INTERP_CONSTANT:
 204          if (setup->flatshade_first) {
 205             for (i = 0; i < NUM_CHANNELS; i++)
 206                if (usage_mask & (1 << i))
 207                   constant_coef(inputs, slot+1, info->v0[vert_attr][i], i);
 208          }
 209          else {
 210             for (i = 0; i < NUM_CHANNELS; i++)
 211                if (usage_mask & (1 << i))
 212                   constant_coef(inputs, slot+1, info->v2[vert_attr][i], i);
 213          }
 214          break;
 215
 216       case LP_INTERP_LINEAR:
 217          for (i = 0; i < NUM_CHANNELS; i++)
 218             if (usage_mask & (1 << i))
 219                linear_coef(inputs, info, slot+1, vert_attr, i);
 220          break;
 221
 222       case LP_INTERP_PERSPECTIVE:
 223          for (i = 0; i < NUM_CHANNELS; i++)
 224             if (usage_mask & (1 << i))
 225                perspective_coef(inputs, info, slot+1, vert_attr, i);
 226          fragcoord_usage_mask |= TGSI_WRITEMASK_W;
 227          break;
 228
 229       case LP_INTERP_POSITION:
 230          /*
 231           * The generated pixel interpolators will pick up the coeffs from
 232           * slot 0, so all need to ensure that the usage mask is covers all
 233           * usages.
 234           */
 235          fragcoord_usage_mask |= usage_mask;
 236          break;
 237
 238       case LP_INTERP_FACING:
 239          setup_facing_coef(inputs, slot+1, info->frontfacing, usage_mask);
 240          break;
 241
 242       default:
 243          assert(0);
 244       }
 245    }
 246
 247    /* The internal position input is in slot zero:
 248     */
 249    setup_fragcoord_coef(inputs, info, 0, fragcoord_usage_mask);
 250 }
 251
 252 #else
 253 extern void lp_setup_coef_dummy(void);
 254 void lp_setup_coef_dummy(void)
 255 {
 256 }
 257
 258 #endif