src/gallium/drivers/cell/spu/spu_vertex_fetch.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * (C) Copyright IBM Corporation 2008
   5  * All Rights Reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29  /*
  30   * Authors:
  31   *   Keith Whitwell <keith@tungstengraphics.com>
  32   *   Ian Romanick <idr@us.ibm.com>
  33   */
  34
  35 #include <spu_mfcio.h>
  36
  37 #include "pipe/p_util.h"
  38 #include "pipe/p_state.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "spu_exec.h"
  41 #include "spu_vertex_shader.h"
  42 #include "spu_main.h"
  43
  44 #define CACHE_NAME            attribute
  45 #define CACHED_TYPE           qword
  46 #define CACHE_TYPE            CACHE_TYPE_RO
  47 #define CACHE_SET_TAGID(set)  TAG_VERTEX_BUFFER
  48 #define CACHE_LOG2NNWAY       2
  49 #define CACHE_LOG2NSETS       6
  50 #include <cache-api.h>
  51
  52 /* Yes folks, this is ugly.
  53  */
  54 #undef CACHE_NWAY
  55 #undef CACHE_NSETS
  56 #define CACHE_NAME            attribute
  57 #define CACHE_NWAY            4
  58 #define CACHE_NSETS           (1U << 6)
  59
  60
  61 #define DRAW_DBG 0
  62
  63 typedef void (*spu_fetch_func)(qword *out, const qword *in,
  64                                const qword *shuffle_data);
  65
  66
  67 static const qword fetch_shuffle_data[] = {
  68    /* Shuffle used by CVT_64_FLOAT
  69     */
  70    {
  71       0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
  72       0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
  73    },
  74
  75    /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
  76     */
  77    {
  78       0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
  79       0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
  80    },
  81
  82    /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
  83     */
  84    {
  85       0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
  86       0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
  87    },
  88
  89    /* High value shuffle used by trans4x4.
  90     */
  91    {
  92       0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
  93       0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
  94    },
  95
  96    /* Low value shuffle used by trans4x4.
  97     */
  98    {
  99       0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
 100       0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
 101    }
 102 };
 103
 104
 105 /**
 106  * Fetch between 1 and 32 bytes from an unaligned address
 107  */
 108 static INLINE void
 109 fetch_unaligned(qword *dst, unsigned ea, unsigned size)
 110 {
 111    qword tmp[4];
 112    const int shift = ea & 0x0f;
 113    const unsigned aligned_start_ea = ea & ~0x0f;
 114    const unsigned aligned_end_ea = (ea + size) & ~0x0f;
 115    const unsigned num_entries = ((aligned_end_ea - aligned_start_ea) / 16) + 1;
 116    unsigned i;
 117
 118
 119    if (shift == 0) {
 120       /* Data is already aligned.  Fetch directly into the destination buffer.
 121        */
 122       for (i = 0; i < num_entries; i++) {
 123          dst[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
 124       }
 125    } else {
 126       /* Fetch data from the cache to the local buffer.
 127        */
 128       for (i = 0; i < num_entries; i++) {
 129          tmp[i] = cache_rd(attribute, (ea & ~0x0f) + (i * 16));
 130       }
 131
 132
 133       /* Fix the alignment of the data and write to the destination buffer.
 134        */
 135       for (i = 0; i < ((size + 15) / 16); i++) {
 136          dst[i] = si_or((qword) spu_slqwbyte(tmp[i], shift),
 137                         (qword) spu_rlmaskqwbyte(tmp[i + 1], shift - 16));
 138       }
 139    }
 140 }
 141
 142
 143 /**
 144  * Fetch vertex attributes for 'count' vertices.
 145  */
 146 static void generic_vertex_fetch(struct spu_vs_context *draw,
 147                                  struct spu_exec_machine *machine,
 148                                  const unsigned *elts,
 149                                  unsigned count)
 150 {
 151    unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
 152    unsigned attr;
 153
 154    assert(count <= 4);
 155
 156 #if DRAW_DBG
 157    printf("SPU: %s count = %u, nr_attrs = %u\n",
 158           __FUNCTION__, count, nr_attrs);
 159 #endif
 160
 161    /* loop over vertex attributes (vertex shader inputs)
 162     */
 163    for (attr = 0; attr < nr_attrs; attr++) {
 164       const unsigned pitch = draw->vertex_fetch.pitch[attr];
 165       const uint64_t src = draw->vertex_fetch.src_ptr[attr];
 166       const spu_fetch_func fetch = (spu_fetch_func)
 167           (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]);
 168       unsigned i;
 169       unsigned idx;
 170       const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
 171       const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
 172       qword in[2 * 4];
 173
 174
 175       /* Fetch four attributes for four vertices.
 176        */
 177       idx = 0;
 178       for (i = 0; i < count; i++) {
 179          const uint64_t addr = src + (elts[i] * pitch);
 180
 181 #if DRAW_DBG
 182          printf("SPU: fetching = 0x%llx\n", addr);
 183 #endif
 184
 185          fetch_unaligned(& in[idx], addr, bytes_per_entry);
 186          idx += quads_per_entry;
 187       }
 188
 189       /* Be nice and zero out any missing vertices.
 190        */
 191       (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
 192
 193
 194       /* Convert all 4 vertices to vectors of float.
 195        */
 196       (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data);
 197    }
 198 }
 199
 200
 201 void spu_update_vertex_fetch( struct spu_vs_context *draw )
 202 {
 203    unsigned i;
 204
 205
 206    /* Invalidate the vertex cache.
 207     */
 208    for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
 209       CACHELINE_CLEARVALID(i);
 210    }
 211
 212
 213    draw->vertex_fetch.fetch_func = generic_vertex_fetch;
 214 }