src/gallium/drivers/llvmpipe/lp_quad_fs.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2008-2009 VMware, Inc.
   4  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /* Vertices are just an array of floats, with all the attributes
  30  * packed.  We currently assume a layout like:
  31  *
  32  * attr[0][0..3] - window position
  33  * attr[1..n][0..3] - remaining attributes.
  34  *
  35  * Attributes are assumed to be 4 floats wide but are packed so that
  36  * all the enabled attributes run contiguously.
  37  */
  38
  39 #include "util/u_math.h"
  40 #include "util/u_memory.h"
  41 #include "pipe/p_defines.h"
  42 #include "pipe/p_shader_tokens.h"
  43
  44 #include "lp_context.h"
  45 #include "lp_state.h"
  46 #include "lp_quad.h"
  47 #include "lp_quad_pipe.h"
  48 #include "lp_texture.h"
  49 #include "lp_tex_sample.h"
  50
  51
  52 struct quad_shade_stage
  53 {
  54    struct quad_stage stage;  /**< base class */
  55    struct tgsi_exec_machine *machine;
  56    struct tgsi_exec_vector *inputs, *outputs;
  57 };
  58
  59
  60 /** cast wrapper */
  61 static INLINE struct quad_shade_stage *
  62 quad_shade_stage(struct quad_stage *qs)
  63 {
  64    return (struct quad_shade_stage *) qs;
  65 }
  66
  67
  68 static void
  69 shader_prepare( const struct lp_fragment_shader *shader,
  70                 struct tgsi_exec_machine *machine,
  71                 struct tgsi_sampler **samplers )
  72 {
  73    /*
  74     * Bind tokens/shader to the interpreter's machine state.
  75     * Avoid redundant binding.
  76     */
  77    if (machine->Tokens != shader->base.tokens) {
  78       tgsi_exec_machine_bind_shader( machine,
  79                                      shader->base.tokens,
  80                                      PIPE_MAX_SAMPLERS,
  81                                      samplers );
  82    }
  83 }
  84
  85
  86
  87 static void
  88 setup_pos_vector(struct lp_fragment_shader *shader,
  89                  const struct tgsi_interp_coef *coef,
  90                  float x, float y)
  91 {
  92    uint chan;
  93
  94    /* do X */
  95    shader->pos[0].f[0] = x;
  96    shader->pos[0].f[1] = x + 1;
  97    shader->pos[0].f[2] = x;
  98    shader->pos[0].f[3] = x + 1;
  99
 100    /* do Y */
 101    shader->pos[1].f[0] = y;
 102    shader->pos[1].f[1] = y;
 103    shader->pos[1].f[2] = y + 1;
 104    shader->pos[1].f[3] = y + 1;
 105
 106    /* do Z and W for all fragments in the quad */
 107    for (chan = 2; chan < 4; chan++) {
 108       const float dadx = coef->dadx[chan];
 109       const float dady = coef->dady[chan];
 110       const float a0 = coef->a0[chan] + dadx * x + dady * y;
 111       shader->pos[chan].f[0] = a0;
 112       shader->pos[chan].f[1] = a0 + dadx;
 113       shader->pos[chan].f[2] = a0 + dady;
 114       shader->pos[chan].f[3] = a0 + dadx + dady;
 115    }
 116 }
 117
 118
 119 static void
 120 setup_coef_vector(struct lp_fragment_shader *shader,
 121                   const struct tgsi_interp_coef *coef)
 122 {
 123    unsigned attrib, chan, i;
 124
 125    for (attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; ++attrib) {
 126       for (chan = 0; chan < NUM_CHANNELS; ++chan) {
 127          for( i = 0; i < QUAD_SIZE; ++i ) {
 128             shader->a0[attrib][chan].f[i] = coef[attrib].a0[chan];
 129             shader->dadx[attrib][chan].f[i] = coef[attrib].dadx[chan];
 130             shader->dady[attrib][chan].f[i] = coef[attrib].dady[chan];
 131          }
 132       }
 133    }
 134 }
 135
 136
 137 /* TODO: codegenerate the whole run function, skip this wrapper.
 138  * TODO: break dependency on tgsi_exec_machine struct
 139  * TODO: push Position calculation into the generated shader
 140  * TODO: process >1 quad at a time
 141  */
 142 static unsigned
 143 shader_run( struct lp_fragment_shader *shader,
 144             struct tgsi_exec_machine *machine,
 145             struct quad_header *quad )
 146 {
 147    unsigned mask;
 148
 149    /* Compute X, Y, Z, W vals for this quad */
 150    setup_pos_vector(shader,
 151                     quad->posCoef,
 152                     (float)quad->input.x0, (float)quad->input.y0);
 153
 154    setup_coef_vector(shader,
 155                      quad->coef);
 156
 157    /* init kill mask */
 158    tgsi_set_kill_mask(machine, 0x0);
 159    tgsi_set_exec_mask(machine, 1, 1, 1, 1);
 160
 161    memset(machine->Outputs, 0, sizeof machine->Outputs);
 162
 163    shader->jit_function( shader->pos,
 164                          shader->a0, shader->dadx, shader->dady,
 165                          machine->Consts,
 166                          machine->Outputs,
 167                          machine->Samplers);
 168
 169    /* FIXME */
 170    mask = ~0;
 171
 172    return mask;
 173 }
 174
 175
 176 /**
 177  * Execute fragment shader for the four fragments in the quad.
 178  */
 179 static boolean
 180 shade_quad(struct quad_stage *qs, struct quad_header *quad)
 181 {
 182    struct quad_shade_stage *qss = quad_shade_stage( qs );
 183    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 184    struct tgsi_exec_machine *machine = qss->machine;
 185    boolean z_written;
 186
 187    /* run shader */
 188    quad->inout.mask &= shader_run( llvmpipe->fs, machine, quad );
 189    if (quad->inout.mask == 0)
 190       return FALSE;
 191
 192    /* store outputs */
 193    z_written = FALSE;
 194    {
 195       const ubyte *sem_name = llvmpipe->fs->info.output_semantic_name;
 196       const ubyte *sem_index = llvmpipe->fs->info.output_semantic_index;
 197       const uint n = qss->stage.llvmpipe->fs->info.num_outputs;
 198       uint i;
 199       for (i = 0; i < n; i++) {
 200          switch (sem_name[i]) {
 201          case TGSI_SEMANTIC_COLOR:
 202             {
 203                uint cbuf = sem_index[i];
 204                memcpy(quad->output.color[cbuf],
 205                       &machine->Outputs[i].xyzw[0].f[0],
 206                       sizeof(quad->output.color[0]) );
 207             }
 208             break;
 209          case TGSI_SEMANTIC_POSITION:
 210             {
 211                uint j;
 212                for (j = 0; j < 4; j++) {
 213                   quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j];
 214                }
 215                z_written = TRUE;
 216             }
 217             break;
 218          }
 219       }
 220    }
 221
 222    return TRUE;
 223 }
 224
 225
 226
 227 static void
 228 coverage_quad(struct quad_stage *qs, struct quad_header *quad)
 229 {
 230    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 231    uint cbuf;
 232
 233    /* loop over colorbuffer outputs */
 234    for (cbuf = 0; cbuf < llvmpipe->framebuffer.nr_cbufs; cbuf++) {
 235       float (*quadColor)[4] = quad->output.color[cbuf];
 236       unsigned j;
 237       for (j = 0; j < QUAD_SIZE; j++) {
 238          assert(quad->input.coverage[j] >= 0.0);
 239          assert(quad->input.coverage[j] <= 1.0);
 240          quadColor[3][j] *= quad->input.coverage[j];
 241       }
 242    }
 243 }
 244
 245
 246
 247 static void
 248 shade_quads(struct quad_stage *qs,
 249                  struct quad_header *quads[],
 250                  unsigned nr)
 251 {
 252    struct quad_shade_stage *qss = quad_shade_stage( qs );
 253    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 254    struct tgsi_exec_machine *machine = qss->machine;
 255
 256    unsigned i, pass = 0;
 257
 258    machine->Consts = llvmpipe->mapped_constants[PIPE_SHADER_FRAGMENT];
 259    machine->InterpCoefs = quads[0]->coef;
 260
 261    for (i = 0; i < nr; i++) {
 262       if (!shade_quad(qs, quads[i]))
 263          continue;
 264
 265       if (/*do_coverage*/ 0)
 266          coverage_quad( qs, quads[i] );
 267
 268       quads[pass++] = quads[i];
 269    }
 270
 271    if (pass)
 272       qs->next->run(qs->next, quads, pass);
 273 }
 274
 275
 276
 277
 278
 279 /**
 280  * Per-primitive (or per-begin?) setup
 281  */
 282 static void
 283 shade_begin(struct quad_stage *qs)
 284 {
 285    struct quad_shade_stage *qss = quad_shade_stage(qs);
 286    struct llvmpipe_context *llvmpipe = qs->llvmpipe;
 287
 288    shader_prepare( llvmpipe->fs,
 289                    qss->machine,
 290                    (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list );
 291
 292    qs->next->begin(qs->next);
 293 }
 294
 295
 296 static void
 297 shade_destroy(struct quad_stage *qs)
 298 {
 299    struct quad_shade_stage *qss = (struct quad_shade_stage *) qs;
 300
 301    tgsi_exec_machine_destroy(qss->machine);
 302
 303    FREE( qs );
 304 }
 305
 306
 307 struct quad_stage *
 308 lp_quad_shade_stage( struct llvmpipe_context *llvmpipe )
 309 {
 310    struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage);
 311    if (!qss)
 312       goto fail;
 313
 314    qss->stage.llvmpipe = llvmpipe;
 315    qss->stage.begin = shade_begin;
 316    qss->stage.run = shade_quads;
 317    qss->stage.destroy = shade_destroy;
 318
 319    qss->machine = tgsi_exec_machine_create();
 320    if (!qss->machine)
 321       goto fail;
 322
 323    return &qss->stage;
 324
 325 fail:
 326    if (qss && qss->machine)
 327       tgsi_exec_machine_destroy(qss->machine);
 328
 329    FREE(qss);
 330    return NULL;
 331 }