src/gallium/auxiliary/draw/draw_vs_varient.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28  /*
  29   * Authors:
  30   *   Keith Whitwell <keith@tungstengraphics.com>
  31   */
  32
  33 #include "pipe/p_util.h"
  34 #include "draw/draw_context.h"
  35 #include "draw/draw_private.h"
  36 #include "draw/draw_vbuf.h"
  37 #include "draw/draw_vertex.h"
  38 #include "draw/draw_vs.h"
  39 #include "translate/translate.h"
  40 #include "translate/translate_cache.h"
  41
  42 /* A first pass at incorporating vertex fetch/emit functionality into
  43  */
  44 struct draw_vs_varient_generic {
  45    struct draw_vs_varient base;
  46
  47    struct draw_vertex_shader *shader;
  48    struct draw_context *draw;
  49
  50    /* Basic plan is to run these two translate functions before/after
  51     * the vertex shader's existing run_linear() routine to simulate
  52     * the inclusion of this functionality into the shader...
  53     *
  54     * Next will look at actually including it.
  55     */
  56    struct translate *fetch;
  57    struct translate *emit;
  58
  59    unsigned temp_vertex_stride;
  60 };
  61
  62
  63
  64
  65
  66 static void vsvg_set_input( struct draw_vs_varient *varient,
  67                             unsigned buffer,
  68                             const void *ptr,
  69                             unsigned stride )
  70 {
  71    struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
  72
  73    vsvg->fetch->set_buffer(vsvg->fetch,
  74                            buffer,
  75                            ptr,
  76                            stride);
  77 }
  78
  79
  80 /* Mainly for debug at this stage:
  81  */
  82 static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg,
  83                              unsigned count,
  84                              void *output_buffer )
  85 {
  86    char *ptr = (char *)output_buffer;
  87    const float *scale = vsvg->base.vs->draw->viewport.scale;
  88    const float *trans = vsvg->base.vs->draw->viewport.translate;
  89    unsigned stride = vsvg->temp_vertex_stride;
  90    unsigned j;
  91
  92    for (j = 0; j < count; j++, ptr += stride) {
  93       float *data = (float *)ptr;
  94       float w = 1.0f / data[3];
  95
  96       data[0] = data[0] * w * scale[0] + trans[0];
  97       data[1] = data[1] * w * scale[1] + trans[1];
  98       data[2] = data[2] * w * scale[2] + trans[2];
  99       data[3] = w;
 100    }
 101 }
 102
 103 static void do_viewport( struct draw_vs_varient_generic *vsvg,
 104                          unsigned count,
 105                          void *output_buffer )
 106 {
 107    char *ptr = (char *)output_buffer;
 108    const float *scale = vsvg->base.vs->draw->viewport.scale;
 109    const float *trans = vsvg->base.vs->draw->viewport.translate;
 110    unsigned stride = vsvg->temp_vertex_stride;
 111    unsigned j;
 112
 113    for (j = 0; j < count; j++, ptr += stride) {
 114       float *data = (float *)ptr;
 115
 116       data[0] = data[0] * scale[0] + trans[0];
 117       data[1] = data[1] * scale[1] + trans[1];
 118       data[2] = data[2] * scale[2] + trans[2];
 119    }
 120 }
 121
 122
 123 static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
 124                                       const unsigned *elts,
 125                                       unsigned count,
 126                                       void *output_buffer)
 127 {
 128    struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
 129    unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
 130    void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
 131
 132    if (0) debug_printf("%s %d \n", __FUNCTION__,  count);
 133
 134    /* Want to do this in small batches for cache locality?
 135     */
 136
 137    vsvg->fetch->run_elts( vsvg->fetch,
 138                           elts,
 139                           count,
 140                           temp_buffer );
 141
 142    vsvg->base.vs->run_linear( vsvg->base.vs,
 143                               temp_buffer,
 144                               temp_buffer,
 145                               (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
 146                               count,
 147                               temp_vertex_stride,
 148                               temp_vertex_stride);
 149
 150
 151    if (vsvg->base.key.clip) {
 152       /* not really handling clipping, just do the rhw so we can
 153        * see the results...
 154        */
 155       do_rhw_viewport( vsvg,
 156                        count,
 157                        temp_buffer );
 158    }
 159    else if (vsvg->base.key.viewport) {
 160       do_viewport( vsvg,
 161                    count,
 162                    temp_buffer );
 163    }
 164
 165
 166    vsvg->emit->set_buffer( vsvg->emit,
 167                            0,
 168                            temp_buffer,
 169                            temp_vertex_stride );
 170
 171    vsvg->emit->set_buffer( vsvg->emit,
 172                            1,
 173                            &vsvg->draw->rasterizer->point_size,
 174                            0);
 175
 176    vsvg->emit->run( vsvg->emit,
 177                     0, count,
 178                     output_buffer );
 179
 180    FREE(temp_buffer);
 181 }
 182
 183
 184 static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
 185                                         unsigned start,
 186                                         unsigned count,
 187                                         void *output_buffer )
 188 {
 189    struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient;
 190    unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
 191    void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride );
 192
 193    if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
 194                        vsvg->base.key.output_stride,
 195                        temp_vertex_stride);
 196
 197    vsvg->fetch->run( vsvg->fetch,
 198                      start,
 199                      count,
 200                      temp_buffer );
 201
 202    vsvg->base.vs->run_linear( vsvg->base.vs,
 203                               temp_buffer,
 204                               temp_buffer,
 205                               (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
 206                               count,
 207                               temp_vertex_stride,
 208                               temp_vertex_stride);
 209
 210    if (vsvg->base.key.clip) {
 211       /* not really handling clipping, just do the rhw so we can
 212        * see the results...
 213        */
 214       do_rhw_viewport( vsvg,
 215                        count,
 216                        temp_buffer );
 217    }
 218    else if (vsvg->base.key.viewport) {
 219       do_viewport( vsvg,
 220                    count,
 221                    temp_buffer );
 222    }
 223
 224    vsvg->emit->set_buffer( vsvg->emit,
 225                            0,
 226                            temp_buffer,
 227                            temp_vertex_stride );
 228
 229    vsvg->emit->set_buffer( vsvg->emit,
 230                            1,
 231                            &vsvg->draw->rasterizer->point_size,
 232                            0);
 233
 234    vsvg->emit->run( vsvg->emit,
 235                     0, count,
 236                     output_buffer );
 237
 238    FREE(temp_buffer);
 239 }
 240
 241
 242
 243
 244
 245 static void vsvg_destroy( struct draw_vs_varient *varient )
 246 {
 247    FREE(varient);
 248 }
 249
 250
 251 struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs,
 252                                                  const struct draw_vs_varient_key *key )
 253 {
 254    unsigned i;
 255    struct translate_key fetch, emit;
 256
 257    struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic );
 258    if (vsvg == NULL)
 259       return NULL;
 260
 261    vsvg->base.key = *key;
 262    vsvg->base.vs = vs;
 263    vsvg->base.set_input     = vsvg_set_input;
 264    vsvg->base.run_elts      = vsvg_run_elts;
 265    vsvg->base.run_linear    = vsvg_run_linear;
 266    vsvg->base.destroy       = vsvg_destroy;
 267
 268    vsvg->draw = vs->draw;
 269
 270    vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
 271                                    vsvg->base.vs->info.num_outputs) * 4 * sizeof(float);
 272
 273    /* Build free-standing fetch and emit functions:
 274     */
 275    fetch.nr_elements = key->nr_inputs;
 276    fetch.output_stride = vsvg->temp_vertex_stride;
 277    for (i = 0; i < key->nr_inputs; i++) {
 278       fetch.element[i].input_format = key->element[i].in.format;
 279       fetch.element[i].input_buffer = key->element[i].in.buffer;
 280       fetch.element[i].input_offset = key->element[i].in.offset;
 281       fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
 282       fetch.element[i].output_offset = i * 4 * sizeof(float);
 283       assert(fetch.element[i].output_offset < fetch.output_stride);
 284    }
 285
 286
 287    emit.nr_elements = key->nr_outputs;
 288    emit.output_stride = key->output_stride;
 289    for (i = 0; i < key->nr_outputs; i++) {
 290       if (key->element[i].out.format != EMIT_1F_PSIZE)
 291       {
 292          emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
 293          emit.element[i].input_buffer = 0;
 294          emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
 295          emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
 296          emit.element[i].output_offset = key->element[i].out.offset;
 297          assert(emit.element[i].input_offset < fetch.output_stride);
 298       }
 299       else {
 300          emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
 301          emit.element[i].input_buffer = 1;
 302          emit.element[i].input_offset = 0;
 303          emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
 304          emit.element[i].output_offset = key->element[i].out.offset;
 305       }
 306    }
 307
 308    vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
 309    vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
 310
 311    return &vsvg->base;
 312 }
 313
 314
 315
 316
 317