src/mesa/pipe/softpipe/sp_quad_fs.c

   1 /*
   2  * Mesa 3-D graphics library
   3  * Version:  6.5
   4  *
   5  * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the "Software"),
   9  * to deal in the Software without restriction, including without limitation
  10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11  * and/or sell copies of the Software, and to permit persons to whom the
  12  * Software is furnished to do so, subject to the following conditions:
  13  *
  14  * The above copyright notice and this permission notice shall be included
  15  * in all copies or substantial portions of the Software.
  16  *
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /* Vertices are just an array of floats, with all the attributes
  26  * packed.  We currently assume a layout like:
  27  *
  28  * attr[0][0..3] - window position
  29  * attr[1..n][0..3] - remaining attributes.
  30  *
  31  * Attributes are assumed to be 4 floats wide but are packed so that
  32  * all the enabled attributes run contiguously.
  33  */
  34
  35 #include "glheader.h"
  36 #include "imports.h"
  37 #include "sp_context.h"
  38 #include "sp_headers.h"
  39 #include "sp_quad.h"
  40 #include "sp_tex_sample.h"
  41 #include "tgsi/core/tgsi_core.h"
  42
  43 #if 0
  44 #if defined __GNUC__
  45 #define ALIGNED_ATTRIBS 1
  46 #else
  47 #define ALIGNED_ATTRIBS 0
  48 #endif
  49 #else
  50 #define ALIGNED_ATTRIBS 0
  51 #endif
  52
  53
  54 struct quad_shade_stage
  55 {
  56    struct quad_stage stage;
  57    struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS];
  58 };
  59
  60
  61 /** cast wrapper */
  62 static INLINE struct quad_shade_stage *
  63 quad_shade_stage(struct quad_stage *qs)
  64 {
  65    return (struct quad_shade_stage *) qs;
  66 }
  67
  68
  69
  70 struct exec_machine {
  71    const struct setup_coefficient *coef; /**< will point to quad->coef */
  72
  73 #if ALIGNED_ATTRIBS
  74    GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE] __attribute__(( aligned( 16 ) ));
  75 #else
  76    GLfloat attr[FRAG_ATTRIB_MAX][NUM_CHANNELS][QUAD_SIZE];
  77 #endif
  78 };
  79
  80
  81 /**
  82  * Compute quad's attributes values, as constants (GL_FLAT shading).
  83  */
  84 static INLINE void cinterp( struct exec_machine *exec,
  85                             GLuint attrib,
  86                             GLuint i )
  87 {
  88    GLuint j;
  89
  90    for (j = 0; j < QUAD_SIZE; j++) {
  91       exec->attr[attrib][i][j] = exec->coef[attrib].a0[i];
  92    }
  93 }
  94
  95
  96 /**
  97  * Compute quad's attribute values by linear interpolation.
  98  *
  99  * Push into the fp:
 100  *
 101  *   INPUT[attr] = MAD COEF_A0[attr], COEF_DADX[attr], INPUT_WPOS.xxxx
 102  *   INPUT[attr] = MAD INPUT[attr],   COEF_DADY[attr], INPUT_WPOS.yyyy
 103  */
 104 static INLINE void linterp( struct exec_machine *exec,
 105                             GLuint attrib,
 106                             GLuint i )
 107 {
 108    GLuint j;
 109
 110    for (j = 0; j < QUAD_SIZE; j++) {
 111       const GLfloat x = exec->attr[FRAG_ATTRIB_WPOS][0][j];
 112       const GLfloat y = exec->attr[FRAG_ATTRIB_WPOS][1][j];
 113       exec->attr[attrib][i][j] = (exec->coef[attrib].a0[i] +
 114                                   exec->coef[attrib].dadx[i] * x +
 115                                   exec->coef[attrib].dady[i] * y);
 116    }
 117 }
 118
 119
 120 /**
 121  * Compute quad's attribute values by linear interpolation with
 122  * perspective correction.
 123  *
 124  * Push into the fp:
 125  *
 126  *   INPUT[attr] = MAD COEF_DADX[attr], INPUT_WPOS.xxxx, COEF_A0[attr]
 127  *   INPUT[attr] = MAD COEF_DADY[attr], INPUT_WPOS.yyyy, INPUT[attr]
 128  *   TMP         = RCP INPUT_WPOS.w
 129  *   INPUT[attr] = MUL INPUT[attr], TMP.xxxx
 130  *
 131  */
 132 static INLINE void pinterp( struct exec_machine *exec,
 133                             GLuint attrib,
 134                             GLuint i )
 135 {
 136    GLuint j;
 137
 138    for (j = 0; j < QUAD_SIZE; j++) {
 139       const GLfloat x = exec->attr[FRAG_ATTRIB_WPOS][0][j];
 140       const GLfloat y = exec->attr[FRAG_ATTRIB_WPOS][1][j];
 141       /* FRAG_ATTRIB_WPOS.w here is really 1/w */
 142       const GLfloat w = 1.0 / exec->attr[FRAG_ATTRIB_WPOS][3][j];
 143       exec->attr[attrib][i][j] = ((exec->coef[attrib].a0[i] +
 144                                    exec->coef[attrib].dadx[i] * x +
 145                                    exec->coef[attrib].dady[i] * y) * w);
 146    }
 147 }
 148
 149
 150 /* This should be done by the fragment shader execution unit (code
 151  * generated from the decl instructions).  Do it here for now.
 152  */
 153 static void
 154 shade_quad( struct quad_stage *qs, struct quad_header *quad )
 155 {
 156    struct quad_shade_stage *qss = quad_shade_stage(qs);
 157    struct softpipe_context *softpipe = qs->softpipe;
 158    struct exec_machine exec;
 159    const GLfloat fx = quad->x0;
 160    const GLfloat fy = quad->y0;
 161    GLuint attr, i;
 162
 163    exec.coef = quad->coef;
 164
 165    /* Position:
 166     */
 167    exec.attr[FRAG_ATTRIB_WPOS][0][0] = fx;
 168    exec.attr[FRAG_ATTRIB_WPOS][0][1] = fx + 1.0;
 169    exec.attr[FRAG_ATTRIB_WPOS][0][2] = fx;
 170    exec.attr[FRAG_ATTRIB_WPOS][0][3] = fx + 1.0;
 171
 172    exec.attr[FRAG_ATTRIB_WPOS][1][0] = fy;
 173    exec.attr[FRAG_ATTRIB_WPOS][1][1] = fy;
 174    exec.attr[FRAG_ATTRIB_WPOS][1][2] = fy + 1.0;
 175    exec.attr[FRAG_ATTRIB_WPOS][1][3] = fy + 1.0;
 176
 177    /* Z and W are done by linear interpolation */
 178    if (softpipe->need_z) {
 179       linterp(&exec, 0, 2);   /* attr[0].z */
 180    }
 181
 182    if (softpipe->need_w) {
 183       linterp(&exec, 0, 3);  /* attr[0].w */
 184       /*invert(&exec, 0, 3);*/
 185    }
 186
 187    /* Interpolate all the remaining attributes.  This will get pushed
 188     * into the fragment program's responsibilities at some point.
 189     * Start at 1 to skip fragment position attribute (computed above).
 190     */
 191    for (attr = 1; attr < quad->nr_attrs; attr++) {
 192       switch (softpipe->interp[attr]) {
 193       case INTERP_CONSTANT:
 194          for (i = 0; i < NUM_CHANNELS; i++)
 195             cinterp(&exec, attr, i);
 196          break;
 197
 198       case INTERP_LINEAR:
 199          for (i = 0; i < NUM_CHANNELS; i++)
 200             linterp(&exec, attr, i);
 201          break;
 202
 203       case INTERP_PERSPECTIVE:
 204          for (i = 0; i < NUM_CHANNELS; i++)
 205             pinterp(&exec, attr, i);
 206          break;
 207       }
 208    }
 209
 210 #if 1
 211    /*softpipe->run_fs( tri->fp, quad, &tri->outputs );*/
 212
 213    {
 214       struct tgsi_exec_machine machine;
 215       struct tgsi_exec_vector outputs[FRAG_ATTRIB_MAX + 1];
 216       struct tgsi_exec_vector *aoutputs;
 217       GLuint i;
 218
 219 #if !ALIGNED_ATTRIBS
 220       struct tgsi_exec_vector inputs[FRAG_ATTRIB_MAX + 1];
 221       struct tgsi_exec_vector *ainputs;
 222 #endif
 223
 224 #ifdef DEBUG
 225       memset(&machine, 0, sizeof(machine));
 226 #endif
 227
 228       /* init machine state */
 229       tgsi_exec_machine_init(
 230          &machine,
 231          softpipe->fs.tokens,
 232          PIPE_MAX_SAMPLERS, qss->samplers);
 233
 234       /* Consts does not require 16 byte alignment. */
 235       machine.Consts = softpipe->fs.constants->constant;
 236
 237       aoutputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs );
 238       machine.Outputs = aoutputs;
 239
 240       assert( sizeof( struct tgsi_exec_vector ) == sizeof( exec.attr[0] ) );
 241
 242 #if ALIGNED_ATTRIBS
 243       machine.Inputs = (struct tgsi_exec_vector *) exec.attr;
 244
 245       for (i = 0; i < softpipe->nr_attrs; i++) {
 246          /* Make sure fp_attr_to_slot[] is an identity transform. */
 247          assert( softpipe->fp_attr_to_slot[i] == i );
 248       }
 249 #else
 250       ainputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs );
 251       machine.Inputs = ainputs;
 252
 253       /* load input registers */
 254       for (i = 0; i < softpipe->nr_attrs; i++) {
 255 #if 01
 256          /* Make sure fp_attr_to_slot[] is an identity transform. */
 257          /*
 258          assert( softpipe->fp_attr_to_slot[i] == i );
 259          */
 260          memcpy(
 261             &ainputs[i],
 262             exec.attr[i],
 263             sizeof( ainputs[0] ) );
 264 #else
 265          memcpy(
 266             &ainputs[i],
 267             exec.attr[softpipe->fp_attr_to_slot[i]],
 268             sizeof( ainputs[0] ) );
 269 #endif
 270       }
 271 #endif
 272
 273       /* run shader */
 274       tgsi_exec_machine_run( &machine );
 275
 276       /* store result color */
 277       memcpy(quad->outputs.color,
 278              &aoutputs[FRAG_ATTRIB_COL0].xyzw[0].f[0],
 279              sizeof(quad->outputs.color));
 280       if (softpipe->need_z) {
 281          /* XXX temporary */
 282          quad->outputs.depth[0] = exec.attr[0][2][0];
 283          quad->outputs.depth[1] = exec.attr[0][2][1];
 284          quad->outputs.depth[2] = exec.attr[0][2][2];
 285          quad->outputs.depth[3] = exec.attr[0][2][3];
 286       }
 287    }
 288 #else
 289    {
 290       GLuint attr = softpipe->fp_attr_to_slot[FRAG_ATTRIB_COL0];
 291       assert(attr);
 292
 293       memcpy(quad->outputs.color,
 294              exec.attr[attr],
 295              sizeof(quad->outputs.color));
 296
 297       if (softpipe->need_z) {
 298          quad->outputs.depth[0] = exec.attr[0][2][0];
 299          quad->outputs.depth[1] = exec.attr[0][2][1];
 300          quad->outputs.depth[2] = exec.attr[0][2][2];
 301          quad->outputs.depth[3] = exec.attr[0][2][3];
 302       }
 303    }
 304 #endif
 305
 306    /* shader may cull fragments */
 307    if (quad->mask)
 308       qs->next->run(qs->next, quad);
 309 }
 310
 311
 312 /**
 313  * Per-primitive (or per-begin?) setup
 314  */
 315 static void shade_begin(struct quad_stage *qs)
 316 {
 317    struct quad_shade_stage *qss = quad_shade_stage(qs);
 318    struct softpipe_context *softpipe = qs->softpipe;
 319    GLuint i;
 320    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
 321       qss->samplers[i].state = &softpipe->sampler[i];
 322       qss->samplers[i].texture = softpipe->texture[i];
 323       qss->samplers[i].get_sample = sp_get_sample;
 324       qss->samplers[i].pipe = &softpipe->pipe;
 325       /* init cache info here */
 326       qss->samplers[i].cache_x =
 327       qss->samplers[i].cache_y = -1;
 328       qss->samplers[i].cache_level = -1;
 329    }
 330
 331    if (qs->next)
 332       qs->next->begin(qs->next);
 333 }
 334
 335
 336 struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe )
 337 {
 338    struct quad_shade_stage *stage = CALLOC_STRUCT(quad_shade_stage);
 339
 340    stage->stage.softpipe = softpipe;
 341    stage->stage.begin = shade_begin;
 342    stage->stage.run = shade_quad;
 343
 344    return &stage->stage;
 345 }