src/gallium/drivers/llvmpipe/lp_setup_coef.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2010, VMware.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28 /*
  29  * Binning code for triangles
  30  */
  31
  32 #include "util/u_math.h"
  33 #include "util/u_memory.h"
  34 #include "lp_perf.h"
  35 #include "lp_setup_context.h"
  36 #include "lp_setup_coef.h"
  37 #include "lp_rast.h"
  38 #include "lp_state_fs.h"
  39
  40 #if !defined(PIPE_ARCH_SSE)
  41
  42 /**
  43  * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
  44  */
  45 static void constant_coef( struct lp_rast_shader_inputs *inputs,
  46                            unsigned slot,
  47                            const float value,
  48                            unsigned i )
  49 {
  50    inputs->a0[slot][i] = value;
  51    inputs->dadx[slot][i] = 0.0f;
  52    inputs->dady[slot][i] = 0.0f;
  53 }
  54
  55
  56
  57 static void linear_coef( struct lp_rast_shader_inputs *inputs,
  58                          const struct lp_tri_info *info,
  59                          unsigned slot,
  60                          unsigned vert_attr,
  61                          unsigned i)
  62 {
  63    float a0 = info->v0[vert_attr][i];
  64    float a1 = info->v1[vert_attr][i];
  65    float a2 = info->v2[vert_attr][i];
  66
  67    float da01 = a0 - a1;
  68    float da20 = a2 - a0;
  69    float dadx = (da01 * info->dy20_ooa - info->dy01_ooa * da20);
  70    float dady = (da20 * info->dx01_ooa - info->dx20_ooa * da01);
  71
  72    inputs->dadx[slot][i] = dadx;
  73    inputs->dady[slot][i] = dady;
  74
  75    /* calculate a0 as the value which would be sampled for the
  76     * fragment at (0,0), taking into account that we want to sample at
  77     * pixel centers, in other words (0.5, 0.5).
  78     *
  79     * this is neat but unfortunately not a good way to do things for
  80     * triangles with very large values of dadx or dady as it will
  81     * result in the subtraction and re-addition from a0 of a very
  82     * large number, which means we'll end up loosing a lot of the
  83     * fractional bits and precision from a0.  the way to fix this is
  84     * to define a0 as the sample at a pixel center somewhere near vmin
  85     * instead - i'll switch to this later.
  86     */
  87    inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
  88                                    dady * info->y0_center);
  89 }
  90
  91
  92 /**
  93  * Compute a0, dadx and dady for a perspective-corrected interpolant,
  94  * for a triangle.
  95  * We basically multiply the vertex value by 1/w before computing
  96  * the plane coefficients (a0, dadx, dady).
  97  * Later, when we compute the value at a particular fragment position we'll
  98  * divide the interpolated value by the interpolated W at that fragment.
  99  */
 100 static void perspective_coef( struct lp_rast_shader_inputs *inputs,
 101                               const struct lp_tri_info *info,
 102                               unsigned slot,
 103                               unsigned vert_attr,
 104                               unsigned i)
 105 {
 106    /* premultiply by 1/w  (v[0][3] is always 1/w):
 107     */
 108    float a0 = info->v0[vert_attr][i] * info->v0[0][3];
 109    float a1 = info->v1[vert_attr][i] * info->v1[0][3];
 110    float a2 = info->v2[vert_attr][i] * info->v2[0][3];
 111    float da01 = a0 - a1;
 112    float da20 = a2 - a0;
 113    float dadx = da01 * info->dy20_ooa - info->dy01_ooa * da20;
 114    float dady = da20 * info->dx01_ooa - info->dx20_ooa * da01;
 115
 116    inputs->dadx[slot][i] = dadx;
 117    inputs->dady[slot][i] = dady;
 118    inputs->a0[slot][i] = a0 - (dadx * info->x0_center +
 119                                    dady * info->y0_center);
 120 }
 121
 122
 123 /**
 124  * Special coefficient setup for gl_FragCoord.
 125  * X and Y are trivial
 126  * Z and W are copied from position_coef which should have already been computed.
 127  * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask.
 128  */
 129 static void
 130 setup_fragcoord_coef(struct lp_rast_shader_inputs *inputs,
 131                      const struct lp_tri_info *info,
 132                      unsigned slot,
 133                      unsigned usage_mask)
 134 {
 135    /*X*/
 136    if (usage_mask & TGSI_WRITEMASK_X) {
 137       inputs->a0[slot][0] = 0.0;
 138       inputs->dadx[slot][0] = 1.0;
 139       inputs->dady[slot][0] = 0.0;
 140    }
 141
 142    /*Y*/
 143    if (usage_mask & TGSI_WRITEMASK_Y) {
 144       inputs->a0[slot][1] = 0.0;
 145       inputs->dadx[slot][1] = 0.0;
 146       inputs->dady[slot][1] = 1.0;
 147    }
 148
 149    /*Z*/
 150    if (usage_mask & TGSI_WRITEMASK_Z) {
 151       linear_coef(inputs, info, slot, 0, 2);
 152    }
 153
 154    /*W*/
 155    if (usage_mask & TGSI_WRITEMASK_W) {
 156       linear_coef(inputs, info, slot, 0, 3);
 157    }
 158 }
 159
 160
 161 /**
 162  * Setup the fragment input attribute with the front-facing value.
 163  * \param frontface  is the triangle front facing?
 164  */
 165 static void setup_facing_coef( struct lp_rast_shader_inputs *inputs,
 166                                unsigned slot,
 167                                boolean frontface,
 168                                unsigned usage_mask)
 169 {
 170    /* convert TRUE to 1.0 and FALSE to -1.0 */
 171    if (usage_mask & TGSI_WRITEMASK_X)
 172       constant_coef( inputs, slot, 2.0f * frontface - 1.0f, 0 );
 173
 174    if (usage_mask & TGSI_WRITEMASK_Y)
 175       constant_coef( inputs, slot, 0.0f, 1 ); /* wasted */
 176
 177    if (usage_mask & TGSI_WRITEMASK_Z)
 178       constant_coef( inputs, slot, 0.0f, 2 ); /* wasted */
 179
 180    if (usage_mask & TGSI_WRITEMASK_W)
 181       constant_coef( inputs, slot, 0.0f, 3 ); /* wasted */
 182 }
 183
 184
 185 /**
 186  * Compute the tri->coef[] array dadx, dady, a0 values.
 187  */
 188 void lp_setup_tri_coef( struct lp_setup_context *setup,
 189                         struct lp_rast_shader_inputs *inputs,
 190                         const float (*v0)[4],
 191                         const float (*v1)[4],
 192                         const float (*v2)[4],
 193                         boolean frontfacing)
 194 {
 195    unsigned fragcoord_usage_mask = TGSI_WRITEMASK_XYZ;
 196    unsigned slot;
 197    unsigned i;
 198    struct lp_tri_info info;
 199    float dx01 = v0[0][0] - v1[0][0];
 200    float dy01 = v0[0][1] - v1[0][1];
 201    float dx20 = v2[0][0] - v0[0][0];
 202    float dy20 = v2[0][1] - v0[0][1];
 203    float oneoverarea = 1.0f / (dx01 * dy20 - dx20 * dy01);
 204
 205    info.v0 = v0;
 206    info.v1 = v1;
 207    info.v2 = v2;
 208    info.frontfacing = frontfacing;
 209    info.x0_center = v0[0][0] - setup->pixel_offset;
 210    info.y0_center = v0[0][1] - setup->pixel_offset;
 211    info.dx01_ooa  = dx01 * oneoverarea;
 212    info.dx20_ooa  = dx20 * oneoverarea;
 213    info.dy01_ooa  = dy01 * oneoverarea;
 214    info.dy20_ooa  = dy20 * oneoverarea;
 215
 216
 217    /* setup interpolation for all the remaining attributes:
 218     */
 219    for (slot = 0; slot < setup->fs.nr_inputs; slot++) {
 220       unsigned vert_attr = setup->fs.input[slot].src_index;
 221       unsigned usage_mask = setup->fs.input[slot].usage_mask;
 222
 223       switch (setup->fs.input[slot].interp) {
 224       case LP_INTERP_CONSTANT:
 225          if (setup->flatshade_first) {
 226             for (i = 0; i < NUM_CHANNELS; i++)
 227                if (usage_mask & (1 << i))
 228                   constant_coef(inputs, slot+1, info.v0[vert_attr][i], i);
 229          }
 230          else {
 231             for (i = 0; i < NUM_CHANNELS; i++)
 232                if (usage_mask & (1 << i))
 233                   constant_coef(inputs, slot+1, info.v2[vert_attr][i], i);
 234          }
 235          break;
 236
 237       case LP_INTERP_LINEAR:
 238          for (i = 0; i < NUM_CHANNELS; i++)
 239             if (usage_mask & (1 << i))
 240                linear_coef(inputs, &info, slot+1, vert_attr, i);
 241          break;
 242
 243       case LP_INTERP_PERSPECTIVE:
 244          for (i = 0; i < NUM_CHANNELS; i++)
 245             if (usage_mask & (1 << i))
 246                perspective_coef(inputs, &info, slot+1, vert_attr, i);
 247          fragcoord_usage_mask |= TGSI_WRITEMASK_W;
 248          break;
 249
 250       case LP_INTERP_POSITION:
 251          /*
 252           * The generated pixel interpolators will pick up the coeffs from
 253           * slot 0, so all need to ensure that the usage mask is covers all
 254           * usages.
 255           */
 256          fragcoord_usage_mask |= usage_mask;
 257          break;
 258
 259       case LP_INTERP_FACING:
 260          setup_facing_coef(inputs, slot+1, info.frontfacing, usage_mask);
 261          break;
 262
 263       default:
 264          assert(0);
 265       }
 266    }
 267
 268    /* The internal position input is in slot zero:
 269     */
 270    setup_fragcoord_coef(inputs, &info, 0, fragcoord_usage_mask);
 271 }
 272
 273 #else
 274 extern void lp_setup_coef_dummy(void);
 275 void lp_setup_coef_dummy(void)
 276 {
 277 }
 278
 279 #endif