From: Ian Romanick Date: Fri, 15 Feb 2008 19:03:54 +0000 (-0800) Subject: Cell: Enable code gen for SPE attribute fetch X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3320b1874e810583f95b93a89697b2955987b84f;p=mesa.git Cell: Enable code gen for SPE attribute fetch Doubles are still unsupported. --- diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 4de514c3586..74b131fbefc 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -90,6 +90,7 @@ #define CELL_CMD_STATE_VS_ARRAY_INFO 16 #define CELL_CMD_STATE_BLEND 17 #define CELL_CMD_VS_EXECUTE 18 +#define CELL_CMD_STATE_ATTRIB_FETCH 19 #define CELL_NUM_BUFFERS 4 @@ -128,13 +129,19 @@ struct cell_command_clear_surface */ struct cell_array_info { - uint64_t base; /**< Base address of the 0th element. */ - uint attr; /**< Attribute that this state is for. */ - uint pitch; /**< Byte pitch from one entry to the next. */ - uint format; /**< Pipe format of each entry. */ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint size; + uint function_offset; } ALIGN16_ATTRIB; +struct cell_attribute_fetch_code { + uint64_t base; + uint size; +}; + struct cell_shader_info { unsigned num_outputs; diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index a4c3f29e8a4..196ab777f54 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -34,6 +34,7 @@ SOURCES = \ cell_surface.c \ cell_texture.c \ cell_vbuf.c \ + cell_vertex_fetch.c \ cell_vertex_shader.c \ cell_winsys.c diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 6196c0c72f9..91f8e542a25 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "cell_winsys.h" #include "cell/common.h" +#include "ppc/rtasm/spe_asm.h" struct cell_vbuf_render; @@ -111,6 +112,9 @@ struct cell_context /** [4] to ensure 16-byte alignment for each status word */ uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + + struct spe_function attrib_fetch; + unsigned attrib_fetch_offsets[PIPE_ATTRIB_MAX]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index f2432f4ff52..f10689a959e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -27,10 +27,10 @@ #include "pipe/p_context.h" #include "pipe/p_format.h" -#include "pipe/draw/draw_context.h" -#include "pipe/draw/draw_private.h" +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" -#include "pipe/cell/ppu/cell_context.h" +#include "cell_context.h" #include "ppc/rtasm/spe_asm.h" typedef uint64_t register_mask; @@ -380,13 +380,4 @@ void cell_update_vertex_fetch(struct draw_context *draw) cell->attrib_fetch_offsets[function_index[i]]; } } - - static first_time = 1; - if (first_time) { - first_time = 0; - const unsigned instructions = p->csr - p->store; - for (i = 0; i < instructions; i++) { - printf("\t.long\t0x%08x\n", p->store[i]); - } - } } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 0ba4506505e..6a1d3bc20a1 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -55,14 +55,32 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) uint64_t *batch; struct cell_array_info *array_info; unsigned i, j; + struct cell_attribute_fetch_code *cf; assert(draw->vs.queue_nr != 0); /* XXX: do this on statechange: */ draw_update_vertex_fetch(draw); + cell_update_vertex_fetch(draw); + + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); + batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; + cf = (struct cell_attribute_fetch_code *) (&batch[1]); + cf->base = cell->attrib_fetch.store; + cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr + - (void *) cell->attrib_fetch.store)); + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format format = draw->vertex_element[i].src_format; + const unsigned count = ((pf_size_x(format) != 0) + + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + + (pf_size_w(format) != 0)); + const unsigned size = pf_size_x(format) * count; + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; @@ -72,7 +90,8 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; array_info->attr = i; array_info->pitch = draw->vertex_fetch.pitch[i]; - array_info->format = draw->vertex_element[i].src_format; + array_info->size = size; + array_info->function_offset = cell->attrib_fetch_offsets[i]; } batch = cell_batch_alloc(cell, sizeof(batch[0]) diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 1e7243b8639..fcbf0f841e6 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -54,6 +54,9 @@ struct spu_global spu; struct spu_vs_context draw; +static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] + ALIGN16_ATTRIB; + /** * Tell the PPU that this SPU has finished copying a buffer to * local store and that it may be reused by the PPU. @@ -306,7 +309,8 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) ASSERT(attr < PIPE_ATTRIB_MAX); draw.vertex_fetch.src_ptr[attr] = vs_info->base; draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; draw.vertex_fetch.dirty = 1; } @@ -433,6 +437,22 @@ cmd_batch(uint opcode) cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; + case CELL_CMD_STATE_ATTRIB_FETCH: { + struct cell_attribute_fetch_code *code = + (struct cell_attribute_fetch_code *) &buffer[pos+1]; + + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; + pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); + break; + } default: printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 45e3c26c001..55c6c287175 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * (C) Copyright IBM Corporation 2008 * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,10 +29,10 @@ /* * Authors: * Keith Whitwell + * Ian Romanick */ #include -#include #include "pipe/p_util.h" #include "pipe/p_state.h" @@ -59,6 +60,10 @@ #define DRAW_DBG 0 +typedef void (*spu_fetch_func)(qword *out, const qword *in, + const qword *shuffle_data); + + static const qword fetch_shuffle_data[] = { /* Shuffle used by CVT_64_FLOAT */ @@ -97,22 +102,6 @@ static const qword fetch_shuffle_data[] = { }; -static INLINE void -trans4x4(qword row0, qword row1, qword row2, qword row3, qword *out, - const qword *shuffle) -{ - qword t1 = si_shufb(row0, row2, shuffle[3]); - qword t2 = si_shufb(row0, row2, shuffle[4]); - qword t3 = si_shufb(row1, row3, shuffle[3]); - qword t4 = si_shufb(row1, row3, shuffle[4]); - - out[0] = si_shufb(t1, t3, shuffle[3]); - out[1] = si_shufb(t1, t3, shuffle[4]); - out[2] = si_shufb(t2, t4, shuffle[3]); - out[3] = si_shufb(t2, t4, shuffle[4]); -} - - /** * Fetch between 1 and 32 bytes from an unaligned address */ @@ -151,446 +140,6 @@ fetch_unaligned(qword *dst, unsigned ea, unsigned size) } -#define CVT_32_FLOAT(q, s) (*(q)) - -static INLINE qword -CVT_64_FLOAT(const qword *qw, const qword *shuffle) -{ - qword a = si_frds(qw[0]); - qword b = si_frds(si_rotqbyi(qw[0], 8)); - qword c = si_frds(qw[1]); - qword d = si_frds(si_rotqbyi(qw[1], 8)); - - qword ab = si_shufb(a, b, shuffle[0]); - qword cd = si_shufb(c, d, si_rotqbyi(shuffle[0], 8)); - - return si_or(ab, cd); -} - - -static INLINE qword -CVT_8_USCALED(const qword *qw, const qword *shuffle) -{ - return si_cuflt(si_shufb(*qw, *qw, shuffle[1]), 0); -} - - -static INLINE qword -CVT_16_USCALED(const qword *qw, const qword *shuffle) -{ - return si_cuflt(si_shufb(*qw, *qw, shuffle[2]), 0); -} - - -static INLINE qword -CVT_32_USCALED(const qword *qw, const qword *shuffle) -{ - (void) shuffle; - return si_cuflt(*qw, 0); -} - -static INLINE qword -CVT_8_SSCALED(const qword *qw, const qword *shuffle) -{ - return si_csflt(si_shufb(*qw, *qw, shuffle[1]), 0); -} - - -static INLINE qword -CVT_16_SSCALED(const qword *qw, const qword *shuffle) -{ - return si_csflt(si_shufb(*qw, *qw, shuffle[2]), 0); -} - - -static INLINE qword -CVT_32_SSCALED(const qword *qw, const qword *shuffle) -{ - (void) shuffle; - return si_csflt(*qw, 0); -} - - -static INLINE qword -CVT_8_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 255.0f); - return si_fm(CVT_8_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_16_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 65535.0f); - return si_fm(CVT_16_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_32_UNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 4294967295.0f); - return si_fm(CVT_32_USCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_8_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 127.0f); - return si_fm(CVT_8_SSCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_16_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 32767.0f); - return si_fm(CVT_16_SSCALED(qw, shuffle), scale); -} - - -static INLINE qword -CVT_32_SNORM(const qword *qw, const qword *shuffle) -{ - const qword scale = (qword) spu_splats(1.0f / 2147483647.0f); - return si_fm(CVT_32_SSCALED(qw, shuffle), scale); -} - -#define SZ_4 si_il(0U) -#define SZ_3 si_fsmbi(0x000f) -#define SZ_2 si_fsmbi(0x00ff) -#define SZ_1 si_fsmbi(0x0fff) - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT, N ) \ -static void \ -fetch_##NAME(qword *out, const qword *in, qword defaults, \ - const qword *shuffle) \ -{ \ - qword tmp[4]; \ - \ - tmp[0] = si_selb(CVT(in + (0 * N), shuffle), defaults, SZ); \ - tmp[1] = si_selb(CVT(in + (1 * N), shuffle), defaults, SZ); \ - tmp[2] = si_selb(CVT(in + (2 * N), shuffle), defaults, SZ); \ - tmp[3] = si_selb(CVT(in + (3 * N), shuffle), defaults, SZ); \ - trans4x4(tmp[0], tmp[1], tmp[2], tmp[3], out, shuffle); \ -} - - -FETCH_ATTRIB( R64G64B64A64_FLOAT, SZ_4, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64G64B64_FLOAT, SZ_3, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64G64_FLOAT, SZ_2, CVT_64_FLOAT, 2 ) -FETCH_ATTRIB( R64_FLOAT, SZ_1, CVT_64_FLOAT, 2 ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, SZ_4, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32G32B32_FLOAT, SZ_3, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32G32_FLOAT, SZ_2, CVT_32_FLOAT, 1 ) -FETCH_ATTRIB( R32_FLOAT, SZ_1, CVT_32_FLOAT, 1 ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, SZ_4, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32G32B32_USCALED, SZ_3, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32G32_USCALED, SZ_2, CVT_32_USCALED, 1 ) -FETCH_ATTRIB( R32_USCALED, SZ_1, CVT_32_USCALED, 1 ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, SZ_4, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32G32B32_SSCALED, SZ_3, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32G32_SSCALED, SZ_2, CVT_32_SSCALED, 1 ) -FETCH_ATTRIB( R32_SSCALED, SZ_1, CVT_32_SSCALED, 1 ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, SZ_4, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32G32B32_UNORM, SZ_3, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32G32_UNORM, SZ_2, CVT_32_UNORM, 1 ) -FETCH_ATTRIB( R32_UNORM, SZ_1, CVT_32_UNORM, 1 ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, SZ_4, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32G32B32_SNORM, SZ_3, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32G32_SNORM, SZ_2, CVT_32_SNORM, 1 ) -FETCH_ATTRIB( R32_SNORM, SZ_1, CVT_32_SNORM, 1 ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, SZ_4, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16G16B16_USCALED, SZ_3, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16G16_USCALED, SZ_2, CVT_16_USCALED, 1 ) -FETCH_ATTRIB( R16_USCALED, SZ_1, CVT_16_USCALED, 1 ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, SZ_4, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16G16B16_SSCALED, SZ_3, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16G16_SSCALED, SZ_2, CVT_16_SSCALED, 1 ) -FETCH_ATTRIB( R16_SSCALED, SZ_1, CVT_16_SSCALED, 1 ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, SZ_4, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16G16B16_UNORM, SZ_3, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16G16_UNORM, SZ_2, CVT_16_UNORM, 1 ) -FETCH_ATTRIB( R16_UNORM, SZ_1, CVT_16_UNORM, 1 ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, SZ_4, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16G16B16_SNORM, SZ_3, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16G16_SNORM, SZ_2, CVT_16_SNORM, 1 ) -FETCH_ATTRIB( R16_SNORM, SZ_1, CVT_16_SNORM, 1 ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, SZ_4, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8G8B8_USCALED, SZ_3, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8G8_USCALED, SZ_2, CVT_8_USCALED, 1 ) -FETCH_ATTRIB( R8_USCALED, SZ_1, CVT_8_USCALED, 1 ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, SZ_4, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8G8B8_SSCALED, SZ_3, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8G8_SSCALED, SZ_2, CVT_8_SSCALED, 1 ) -FETCH_ATTRIB( R8_SSCALED, SZ_1, CVT_8_SSCALED, 1 ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, SZ_4, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8G8B8_UNORM, SZ_3, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8G8_UNORM, SZ_2, CVT_8_UNORM, 1 ) -FETCH_ATTRIB( R8_UNORM, SZ_1, CVT_8_UNORM, 1 ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, SZ_4, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8G8B8_SNORM, SZ_3, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8G8_SNORM, SZ_2, CVT_8_SNORM, 1 ) -FETCH_ATTRIB( R8_SNORM, SZ_1, CVT_8_SNORM, 1 ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, SZ_4, CVT_8_UNORM, 1 ) - - - -static spu_fetch_func get_fetch_func( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - assert(0); - return NULL; - } -} - - -static unsigned get_vertex_size( enum pipe_format format ) -{ - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return 8; - case PIPE_FORMAT_R64G64_FLOAT: - return 2 * 8; - case PIPE_FORMAT_R64G64B64_FLOAT: - return 3 * 8; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return 4 * 8; - - case PIPE_FORMAT_R32_SSCALED: - case PIPE_FORMAT_R32_SNORM: - case PIPE_FORMAT_R32_USCALED: - case PIPE_FORMAT_R32_UNORM: - case PIPE_FORMAT_R32_FLOAT: - return 4; - case PIPE_FORMAT_R32G32_SSCALED: - case PIPE_FORMAT_R32G32_SNORM: - case PIPE_FORMAT_R32G32_USCALED: - case PIPE_FORMAT_R32G32_UNORM: - case PIPE_FORMAT_R32G32_FLOAT: - return 2 * 4; - case PIPE_FORMAT_R32G32B32_SSCALED: - case PIPE_FORMAT_R32G32B32_SNORM: - case PIPE_FORMAT_R32G32B32_USCALED: - case PIPE_FORMAT_R32G32B32_UNORM: - case PIPE_FORMAT_R32G32B32_FLOAT: - return 3 * 4; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - case PIPE_FORMAT_R32G32B32A32_SNORM: - case PIPE_FORMAT_R32G32B32A32_USCALED: - case PIPE_FORMAT_R32G32B32A32_UNORM: - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return 4 * 4; - - case PIPE_FORMAT_R16_SSCALED: - case PIPE_FORMAT_R16_SNORM: - case PIPE_FORMAT_R16_UNORM: - case PIPE_FORMAT_R16_USCALED: - return 2; - case PIPE_FORMAT_R16G16_SSCALED: - case PIPE_FORMAT_R16G16_SNORM: - case PIPE_FORMAT_R16G16_USCALED: - case PIPE_FORMAT_R16G16_UNORM: - return 2 * 2; - case PIPE_FORMAT_R16G16B16_SSCALED: - case PIPE_FORMAT_R16G16B16_SNORM: - case PIPE_FORMAT_R16G16B16_USCALED: - case PIPE_FORMAT_R16G16B16_UNORM: - return 3 * 2; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - case PIPE_FORMAT_R16G16B16A16_SNORM: - case PIPE_FORMAT_R16G16B16A16_USCALED: - case PIPE_FORMAT_R16G16B16A16_UNORM: - return 4 * 2; - - case PIPE_FORMAT_R8_SSCALED: - case PIPE_FORMAT_R8_SNORM: - case PIPE_FORMAT_R8_USCALED: - case PIPE_FORMAT_R8_UNORM: - return 1; - case PIPE_FORMAT_R8G8_SSCALED: - case PIPE_FORMAT_R8G8_SNORM: - case PIPE_FORMAT_R8G8_USCALED: - case PIPE_FORMAT_R8G8_UNORM: - return 2 * 1; - case PIPE_FORMAT_R8G8B8_SSCALED: - case PIPE_FORMAT_R8G8B8_SNORM: - case PIPE_FORMAT_R8G8B8_USCALED: - case PIPE_FORMAT_R8G8B8_UNORM: - return 3 * 1; - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R8G8B8A8_SSCALED: - case PIPE_FORMAT_R8G8B8A8_SNORM: - case PIPE_FORMAT_R8G8B8A8_USCALED: - case PIPE_FORMAT_R8G8B8A8_UNORM: - return 4 * 1; - - case 0: - return 0; /* not sure why this is needed */ - - default: - assert(0); - return 0; - } -} - - /** * Fetch vertex attributes for 'count' vertices. */ @@ -612,10 +161,10 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* loop over vertex attributes (vertex shader inputs) */ for (attr = 0; attr < nr_attrs; attr++) { - const qword default_values = (qword)(vec_float4){ 0.0, 0.0, 0.0, 1.0 }; const unsigned pitch = draw->vertex_fetch.pitch[attr]; const uint64_t src = draw->vertex_fetch.src_ptr[attr]; - const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + const spu_fetch_func fetch = (spu_fetch_func) + (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); unsigned i; unsigned idx; const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; @@ -644,8 +193,7 @@ static void generic_vertex_fetch(struct spu_vs_context *draw, /* Convert all 4 vertices to vectors of float. */ - (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, default_values, - fetch_shuffle_data); + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); } } @@ -662,12 +210,5 @@ void spu_update_vertex_fetch( struct spu_vs_context *draw ) } - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - draw->vertex_fetch.fetch[i] = - get_fetch_func(draw->vertex_fetch.format[i]); - draw->vertex_fetch.size[i] = - get_vertex_size(draw->vertex_fetch.format[i]); - } - draw->vertex_fetch.fetch_func = generic_vertex_fetch; } diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index b5bf31e67db..0fb0bc28d03 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -6,8 +6,6 @@ struct spu_vs_context; -typedef void (*spu_fetch_func)(qword *out, const qword *in, qword defaults, - const qword *shuffle_data); typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, struct spu_exec_machine *machine, const unsigned *elts, @@ -20,12 +18,12 @@ struct spu_vs_context { uint64_t src_ptr[PIPE_ATTRIB_MAX]; unsigned pitch[PIPE_ATTRIB_MAX]; unsigned size[PIPE_ATTRIB_MAX]; - enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned code_offset[PIPE_ATTRIB_MAX]; unsigned nr_attrs; boolean dirty; - spu_fetch_func fetch[PIPE_ATTRIB_MAX]; spu_full_fetch_func fetch_func; + void *code; } vertex_fetch; /* Clip derived state: