From: Kai Wasserbäch Date: Tue, 29 Nov 2011 17:17:47 +0000 (+0100) Subject: gallium/cell: Remove the driver. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ccd4d4367f2b4e5aebfc59b832599812a4a1c7d8;p=mesa.git gallium/cell: Remove the driver. Complicates Gallium3D development and doesn't seem to have active users. Signed-off-by: Kai Wasserbäch Signed-off-by: José Fonseca --- diff --git a/Makefile b/Makefile index d65d4c4968e..cf6555c782d 100644 --- a/Makefile +++ b/Makefile @@ -112,8 +112,6 @@ linux \ linux-i965 \ linux-alpha \ linux-alpha-static \ -linux-cell \ -linux-cell-debug \ linux-debug \ linux-dri \ linux-dri-debug \ diff --git a/common.py b/common.py index 5e2967fc59b..5578f72af1f 100644 --- a/common.py +++ b/common.py @@ -83,7 +83,7 @@ def AddOptions(opts): opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) opts.Add(EnumOption('platform', 'target platform', host_platform, - allowed_values=('linux', 'cell', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8'))) + allowed_values=('linux', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8'))) opts.Add(BoolOption('embedded', 'embedded build', 'no')) opts.Add('toolchain', 'compiler toolchain', default_toolchain) opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no')) diff --git a/configs/linux-cell b/configs/linux-cell deleted file mode 100644 index 7f38da971d1..00000000000 --- a/configs/linux-cell +++ /dev/null @@ -1,71 +0,0 @@ -# linux-cell (non-debug build) - -include $(TOP)/configs/linux - -CONFIG_NAME = linux-cell - - -# Omiting other gallium drivers: -GALLIUM_DRIVERS_DIRS = cell softpipe trace rbug identity - - -# Compiler and flags -CC = ppu32-gcc -CXX = ppu32-g++ -HOST_CC = gcc -APP_CC = gcc -APP_CXX = g++ - -OPT_FLAGS = -O3 - -# Cell SDK location -## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below) -#SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr -## For SDK 3.0: -SDK = /opt/cell/sdk/usr - - - -COMMON_C_CPP_FLAGS = $(OPT_FLAGS) -Wall -Winline \ - -fPIC -m32 -mabi=altivec -maltivec \ - -I. -I$(SDK)/include \ - -DGALLIUM_CELL $(DEFINES) - -CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99 - -CXXFLAGS = $(COMMON_C_CPP_FLAGS) - - -SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \ - gallium gallium/winsys gallium/targets glu - -# Build no traditional Mesa drivers: -DRIVER_DIRS = - - -MKDEP_OPTIONS = -fdepend -Y - - -GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \ - -L$(SDK)/lib -m32 -Wl,-m,elf32ppc -R$(SDK)/lib -lspe2 - - -CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a - - -### SPU stuff - -SPU_CC = spu-gcc - -SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \ - -I. -I$(SDK)/spu/include -I$(TOP)/src/mesa/ $(INCLUDE_DIRS) \ - -DSPU_MAIN_PARAM_LONG_LONG \ - -include spu_intrinsics.h - -SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm - -SPU_AR = ppu-ar -SPU_AR_FLAGS = -qcs - -SPU_EMBED = ppu32-embedspu -SPU_EMBED_FLAGS = -m32 diff --git a/configs/linux-cell-debug b/configs/linux-cell-debug deleted file mode 100644 index 42f3245edc9..00000000000 --- a/configs/linux-cell-debug +++ /dev/null @@ -1,10 +0,0 @@ -# linux-cell-debug - -include $(TOP)/configs/linux-cell - -# just override name and OPT_FLAGS here: - -CONFIG_NAME = linux-cell-debug - -OPT_FLAGS = -g -DDEBUG - diff --git a/docs/cell.html b/docs/cell.html deleted file mode 100644 index 30626b60b42..00000000000 --- a/docs/cell.html +++ /dev/null @@ -1,138 +0,0 @@ - - -Cell Driver - - - - - -

Mesa/Gallium Cell Driver

- -

-The Mesa -Cell -driver is part of the -Gallium3D -architecture. -Tungsten Graphics did the original implementation of the Cell driver. -

- - -

Source Code

- -

-The latest Cell driver source code is on the master branch of the Mesa -git repository. -

-To build the driver you'll need the IBM Cell SDK (version 2.1 or 3.0). -To use the driver you'll need a Cell system, such as a PS3 running Linux, -or the Cell Simulator (untested, though). -

- -

-If using Cell SDK 2.1, see the configs/linux-cell file for some -special changes. -

- -

-To compile the code, run make linux-cell. -Or to build in debug mode, run make linux-cell-debug. -

- -

-To use the library, make sure your current directory is the top of the -Mesa tree, then set LD_LIBRARY_PATH like this: -

-  export LD_LIBRARY_PATH=$PWD/lib/gallium:$PWD/lib/
-

- -

-Verify that the Cell driver is being used by running -progs/xdemos/glxinfo and looking for: -

-  OpenGL renderer string: Gallium 0.3, Cell on Xlib
-

- - -

Driver Implementation Summary

- -

-Rasterization is parallelized across the SPUs in a tiled-based manner. -Batches of transformed triangles are sent to the SPUs (actually, pulled by from -main memory by the SPUs). -Each SPU loops over a set of 32x32-pixel screen tiles, rendering the triangles -into each tile. -Because of the limited SPU memory, framebuffer tiles are paged in/out of -SPU local store as needed. -Similarly, textures are tiled and brought into local store as needed. -

- - -

Status

- -

-As of October 2008, the driver runs quite a few OpenGL demos. -Features that work include: -

Point/line/triangle rendering, glDrawPixels -
2D, NPOT and cube texture maps with nearest/linear/mipmap filtering -
Dynamic SPU code generation for fragment shaders, but not complete -
Dynamic SPU code generation for fragment ops (blend, Z-test, etc), but not complete -
Dynamic PPU/PPC code generation for vertex shaders, but not complete -

-Performance has recently improved with the addition of PPC code generation -for vertex shaders, but the code quality isn't too great yet. -

-Another bottleneck is SwapBuffers. It may be the limiting factor for -many simple GL tests. -

- - - -

Debug Options

- -

-The CELL_DEBUG env var can be set to a comma-separated list of one or -more of the following debug options: -

checker - use a different background clear color for each SPU. - This lets you see which SPU is rendering which screen tiles. -
sync - wait/synchronize after each DMA transfer -
asm - print generated SPU assembly code to stdout -
fragops - emit fragment ops debug messages -
fragopfallback - don't use codegen for fragment ops -
cmd - print SPU commands as their received -
cache - print texture cache statistics when program exits -

-Note that some of these options may only work for linux-cell-debug builds. -

- -

-If the GALLIUM_NOPPC env var is set, PPC code generation will not be used -and vertex shaders will be run with the TGSI interpreter. -

-If the GALLIUM_NOCELL env var is set, the softpipe driver will be used -intead of the Cell driver. -This is useful for comparison/validation. -

- - - -

Contributing

- -

-If you're interested in contributing to the effort, familiarize yourself -with the code, join the mesa3d-dev mailing list, -and describe what you'd like to do. -

- - - - diff --git a/docs/contents.html b/docs/contents.html index 8882e731879..e3cea2a7ce3 100644 --- a/docs/contents.html +++ b/docs/contents.html @@ -78,8 +78,7 @@ a:visited {

GL Dispatch Links diff --git a/docs/news.html b/docs/news.html index 91284922880..d6a2aa8e6dd 100644 --- a/docs/news.html +++ b/docs/news.html @@ -217,7 +217,7 @@ This is a bug-fix release.

January 24, 2008

-Added a new page describing the Mesa Cell driver. +Added a new page describing the Mesa Cell driver.

diff --git a/docs/relnotes-7.12.html b/docs/relnotes-7.12.html index 0d2211358cf..393b1124e58 100644 --- a/docs/relnotes-7.12.html +++ b/docs/relnotes-7.12.html @@ -74,6 +74,8 @@ tbd by the gallium drivers for this hardware.

Removed the i965g driver, which was broken and with nobody in sight to fix the situation

Removed the Gallium cell driver, it was just a burden on Gallium + development and nobody seems to use it.

diff --git a/docs/relnotes-7.5.html b/docs/relnotes-7.5.html index 56deca6a86c..a25ca8efc11 100644 --- a/docs/relnotes-7.5.html +++ b/docs/relnotes-7.5.html @@ -61,7 +61,7 @@ baa7a1e850b6e39bae58868fd0684004 MesaGLUT-7.5.tar.bz2

softpipe - a software/reference driver
i915 - Intel 915/945 driver -
Cell - IBM/Sony/Toshiba Cell processor driver +
Cell - IBM/Sony/Toshiba Cell processor driver
nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300). PLEASE NOTE: these drivers are incomplete and still under development. It's probably NOT worthwhile to report any bugs unless you have patches. diff --git a/docs/sourcetree.html b/docs/sourcetree.html index 3f100df49e1..e26c653abbe 100644 --- a/docs/sourcetree.html +++ b/docs/sourcetree.html @@ -86,7 +86,6 @@ each directory. interfaces
drivers - Gallium3D device drivers
-
cell - Driver for Cell processor.
i915 - Driver for Intel i915/i945.
llvmpipe - Software driver using LLVM for runtime code generation.
nv* - Drivers for NVIDIA GPUs. diff --git a/doxygen/gallium.doc b/doxygen/gallium.doc index f0ff36075a5..e81b02e1aa6 100644 --- a/doxygen/gallium.doc +++ b/doxygen/gallium.doc @@ -34,7 +34,6 @@ - Pipe drivers: - \ref softpipe - \ref i915g - - Cell driver (cell_context.h, cell_winsys.h) - \ref failover - Winsys drivers: diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources index 15de20cb3a3..5c65533308c 100644 --- a/src/gallium/auxiliary/Makefile.sources +++ b/src/gallium/auxiliary/Makefile.sources @@ -63,7 +63,6 @@ C_SOURCES := \ rtasm/rtasm_cpu.c \ rtasm/rtasm_execmem.c \ rtasm/rtasm_ppc.c \ - rtasm/rtasm_ppc_spe.c \ rtasm/rtasm_x86sse.c \ tgsi/tgsi_build.c \ tgsi/tgsi_dump.c \ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c deleted file mode 100644 index 53a0e722cff..00000000000 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ /dev/null @@ -1,1067 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Real-time assembly generation interface for Cell B.E. SPEs. - * - * \author Ian Romanick - * \author Brian Paul - */ - - -#include -#include "pipe/p_compiler.h" -#include "util/u_memory.h" -#include "rtasm_ppc_spe.h" - - -#ifdef GALLIUM_CELL -/** - * SPE instruction types - * - * There are 6 primary instruction encodings used on the Cell's SPEs. Each of - * the following unions encodes one type. - * - * \bug - * If, at some point, we start generating SPE code from a little-endian host - * these unions will not work. - */ -/*@{*/ -/** - * Encode one output register with two input registers - */ -union spe_inst_RR { - uint32_t bits; - struct { - unsigned op:11; - unsigned rB:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with three input registers - */ -union spe_inst_RRR { - uint32_t bits; - struct { - unsigned op:4; - unsigned rT:7; - unsigned rB:7; - unsigned rA:7; - unsigned rC:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 7-bit signed immed - */ -union spe_inst_RI7 { - uint32_t bits; - struct { - unsigned op:11; - unsigned i7:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and an 8-bit signed immed - */ -union spe_inst_RI8 { - uint32_t bits; - struct { - unsigned op:10; - unsigned i8:8; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 10-bit signed immed - */ -union spe_inst_RI10 { - uint32_t bits; - struct { - unsigned op:8; - unsigned i10:10; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 16-bit signed immediate - */ -union spe_inst_RI16 { - uint32_t bits; - struct { - unsigned op:9; - unsigned i16:16; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 18-bit signed immediate - */ -union spe_inst_RI18 { - uint32_t bits; - struct { - unsigned op:7; - unsigned i18:18; - unsigned rT:7; - } inst; -}; -/*@}*/ - - -static void -indent(const struct spe_function *p) -{ - int i; - for (i = 0; i < p->indent; i++) { - putchar(' '); - } -} - - -static const char * -rem_prefix(const char *longname) -{ - return longname + 4; -} - - -static const char * -reg_name(int reg) -{ - switch (reg) { - case SPE_REG_SP: - return "$sp"; - case SPE_REG_RA: - return "$lr"; - default: - { - /* cycle through four buffers to handle multiple calls per printf */ - static char buf[4][10]; - static int b = 0; - b = (b + 1) % 4; - sprintf(buf[b], "$%d", reg); - return buf[b]; - } - } -} - - -static void -emit_instruction(struct spe_function *p, uint32_t inst_bits) -{ - if (!p->store) - return; /* out of memory, drop the instruction */ - - if (p->num_inst == p->max_inst) { - /* allocate larger buffer */ - uint32_t *newbuf; - p->max_inst *= 2; /* 2x larger */ - newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16); - if (newbuf) { - memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE); - } - align_free(p->store); - p->store = newbuf; - if (!p->store) { - /* out of memory */ - p->num_inst = 0; - return; - } - } - - p->store[p->num_inst++] = inst_bits; -} - - - -static void emit_RR(struct spe_function *p, unsigned op, int rT, - int rA, int rB, const char *name) -{ - union spe_inst_RR inst; - inst.inst.op = op; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, %s\n", - rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); - } -} - - -static void emit_RRR(struct spe_function *p, unsigned op, int rT, - int rA, int rB, int rC, const char *name) -{ - union spe_inst_RRR inst; - inst.inst.op = op; - inst.inst.rT = rT; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rC = rC; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), - reg_name(rA), reg_name(rB), reg_name(rC)); - } -} - - -static void emit_RI7(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - union spe_inst_RI7 inst; - inst.inst.op = op; - inst.inst.i7 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } -} - - - -static void emit_RI8(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - union spe_inst_RI8 inst; - inst.inst.op = op; - inst.inst.i8 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } -} - - - -static void emit_RI10(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - union spe_inst_RI10 inst; - inst.inst.op = op; - inst.inst.i10 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } -} - - -/** As above, but do range checking on signed immediate value */ -static void emit_RI10s(struct spe_function *p, unsigned op, int rT, - int rA, int imm, const char *name) -{ - assert(imm <= 511); - assert(imm >= -512); - emit_RI10(p, op, rT, rA, imm, name); -} - - -static void emit_RI16(struct spe_function *p, unsigned op, int rT, - int imm, const char *name) -{ - union spe_inst_RI16 inst; - inst.inst.op = op; - inst.inst.i16 = imm; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); - } -} - - -static void emit_RI18(struct spe_function *p, unsigned op, int rT, - int imm, const char *name) -{ - union spe_inst_RI18 inst; - inst.inst.op = op; - inst.inst.i18 = imm; - inst.inst.rT = rT; - emit_instruction(p, inst.bits); - if (p->print) { - indent(p); - printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); - } -} - - -#define EMIT(_name, _op) \ -void _name (struct spe_function *p) \ -{ \ - emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \ -} - -#define EMIT_(_name, _op) \ -void _name (struct spe_function *p, int rT) \ -{ \ - emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ -} - -#define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA) \ -{ \ - emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ -} - -#define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int rB) \ -{ \ - emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ -} - -#define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \ -{ \ - emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ -} - -#define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ -} - -#define EMIT_RI8(_name, _op, bias) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ -} - -#define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ -} - -#define EMIT_RI10s(_name, _op) \ -void _name (struct spe_function *p, int rT, int rA, int imm) \ -{ \ - emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ -} - -#define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, int rT, int imm) \ -{ \ - emit_RI16(p, _op, rT, imm, __FUNCTION__); \ -} - -#define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, int rT, int imm) \ -{ \ - emit_RI18(p, _op, rT, imm, __FUNCTION__); \ -} - -#define EMIT_I16(_name, _op) \ -void _name (struct spe_function *p, int imm) \ -{ \ - emit_RI16(p, _op, 0, imm, __FUNCTION__); \ -} - -#include "rtasm_ppc_spe.h" - - - -/** - * Initialize an spe_function. - * \param code_size initial size of instruction buffer to allocate, in bytes. - * If zero, use a default. - */ -void spe_init_func(struct spe_function *p, unsigned code_size) -{ - uint i; - - if (!code_size) - code_size = 64; - - p->num_inst = 0; - p->max_inst = code_size / SPE_INST_SIZE; - p->store = align_malloc(code_size, 16); - - p->set_count = 0; - memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); - - /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. - */ - p->regs[0] = p->regs[1] = p->regs[2] = 1; - for (i = 80; i <= 127; i++) { - p->regs[i] = 1; - } - - p->print = FALSE; - p->indent = 0; -} - - -void spe_release_func(struct spe_function *p) -{ - assert(p->num_inst <= p->max_inst); - if (p->store != NULL) { - align_free(p->store); - } - p->store = NULL; -} - - -/** Return current code size in bytes. */ -unsigned spe_code_size(const struct spe_function *p) -{ - return p->num_inst * SPE_INST_SIZE; -} - - -/** - * Allocate a SPE register. - * \return register index or -1 if none left. - */ -int spe_allocate_available_register(struct spe_function *p) -{ - unsigned i; - for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] == 0) { - p->regs[i] = 1; - return i; - } - } - - return -1; -} - - -/** - * Mark the given SPE register as "allocated". - */ -int spe_allocate_register(struct spe_function *p, int reg) -{ - assert(reg < SPE_NUM_REGS); - assert(p->regs[reg] == 0); - p->regs[reg] = 1; - return reg; -} - - -/** - * Mark the given SPE register as "unallocated". Note that this should - * only be used on registers allocated in the current register set; an - * assertion will fail if an attempt is made to deallocate a register - * allocated in an earlier register set. - */ -void spe_release_register(struct spe_function *p, int reg) -{ - assert(reg >= 0); - assert(reg < SPE_NUM_REGS); - assert(p->regs[reg] == 1); - - p->regs[reg] = 0; -} - -/** - * Start a new set of registers. This can be called if - * it will be difficult later to determine exactly what - * registers were actually allocated during a code generation - * sequence, and you really just want to deallocate all of them. - */ -void spe_allocate_register_set(struct spe_function *p) -{ - uint i; - - /* Keep track of the set count. If it ever wraps around to 0, - * we're in trouble. - */ - p->set_count++; - assert(p->set_count > 0); - - /* Increment the allocation count of all registers currently - * allocated. Then any registers that are allocated in this set - * will be the only ones with a count of 1; they'll all be released - * when the register set is released. - */ - for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] > 0) - p->regs[i]++; - } -} - -void spe_release_register_set(struct spe_function *p) -{ - uint i; - - /* If the set count drops below zero, we're in trouble. */ - assert(p->set_count > 0); - p->set_count--; - - /* Drop the allocation level of all registers. Any allocated - * during this register set will drop to 0 and then become - * available. - */ - for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] > 0) - p->regs[i]--; - } -} - - -unsigned -spe_get_registers_used(const struct spe_function *p, ubyte used[]) -{ - unsigned i, num = 0; - /* only count registers in the range available to callers */ - for (i = 2; i < 80; i++) { - if (p->regs[i]) { - used[num++] = i; - } - } - return num; -} - - -void -spe_print_code(struct spe_function *p, boolean enable) -{ - p->print = enable; -} - - -void -spe_indent(struct spe_function *p, int spaces) -{ - p->indent += spaces; -} - - -void -spe_comment(struct spe_function *p, int rel_indent, const char *s) -{ - if (p->print) { - p->indent += rel_indent; - indent(p); - p->indent -= rel_indent; - printf("# %s\n", s); - } -} - - -/** - * Load quad word. - * NOTE: offset is in bytes and the least significant 4 bits must be zero! - */ -void spe_lqd(struct spe_function *p, int rT, int rA, int offset) -{ - const boolean pSave = p->print; - - /* offset must be a multiple of 16 */ - assert(offset % 16 == 0); - /* offset must fit in 10-bit signed int field, after shifting */ - assert((offset >> 4) <= 511); - assert((offset >> 4) >= -512); - - p->print = FALSE; - emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); - p->print = pSave; - - if (p->print) { - indent(p); - printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); - } -} - - -/** - * Store quad word. - * NOTE: offset is in bytes and the least significant 4 bits must be zero! - */ -void spe_stqd(struct spe_function *p, int rT, int rA, int offset) -{ - const boolean pSave = p->print; - - /* offset must be a multiple of 16 */ - assert(offset % 16 == 0); - /* offset must fit in 10-bit signed int field, after shifting */ - assert((offset >> 4) <= 511); - assert((offset >> 4) >= -512); - - p->print = FALSE; - emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); - p->print = pSave; - - if (p->print) { - indent(p); - printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); - } -} - - -/** - * For branch instructions: - * \param d if 1, disable interupts if branch is taken - * \param e if 1, enable interupts if branch is taken - * If d and e are both zero, don't change interupt status (right?) - */ - -/** Branch Indirect to address in rA */ -void spe_bi(struct spe_function *p, int rA, int d, int e) -{ - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Interupt Return */ -void spe_iret(struct spe_function *p, int rA, int d, int e) -{ - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect and set link on external data */ -void spe_bisled(struct spe_function *p, int rT, int rA, int d, - int e) -{ - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect and set link. Save PC in rT, jump to rA. */ -void spe_bisl(struct spe_function *p, int rT, int rA, int d, - int e) -{ - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ -void spe_biz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ -void spe_binz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ -void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - -/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ -void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e) -{ - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); -} - - -/* Hint-for-branch instructions - */ -#if 0 -hbr; -hbra; -hbrr; -#endif - - -/* Control instructions - */ -#if 0 -stop; -EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_nop, 0x201); -sync; -EMIT_ (spe_dsync, 0x003); -EMIT_R (spe_mfspr, 0x00c); -EMIT_R (spe_mtspr, 0x10c); -#endif - - -/** - ** Helper / "macro" instructions. - ** Use somewhat verbose names as a reminder that these aren't native - ** SPE instructions. - **/ - - -void -spe_load_float(struct spe_function *p, int rT, float x) -{ - if (x == 0.0f) { - spe_il(p, rT, 0x0); - } - else if (x == 0.5f) { - spe_ilhu(p, rT, 0x3f00); - } - else if (x == 1.0f) { - spe_ilhu(p, rT, 0x3f80); - } - else if (x == -1.0f) { - spe_ilhu(p, rT, 0xbf80); - } - else { - union { - float f; - unsigned u; - } bits; - bits.f = x; - spe_ilhu(p, rT, bits.u >> 16); - spe_iohl(p, rT, bits.u & 0xffff); - } -} - - -void -spe_load_int(struct spe_function *p, int rT, int i) -{ - if (-32768 <= i && i <= 32767) { - spe_il(p, rT, i); - } - else { - spe_ilhu(p, rT, i >> 16); - if (i & 0xffff) - spe_iohl(p, rT, i & 0xffff); - } -} - -void spe_load_uint(struct spe_function *p, int rT, uint ui) -{ - /* If the whole value is in the lower 18 bits, use ila, which - * doesn't sign-extend. Otherwise, if the two halfwords of - * the constant are identical, use ilh. Otherwise, if every byte of - * the desired value is 0x00 or 0xff, we can use Form Select Mask for - * Bytes Immediate (fsmbi) to load the value in a single instruction. - * Otherwise, in the general case, we have to use ilhu followed by iohl. - */ - if ((ui & 0x0003ffff) == ui) { - spe_ila(p, rT, ui); - } - else if ((ui >> 16) == (ui & 0xffff)) { - spe_ilh(p, rT, ui & 0xffff); - } - else if ( - ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && - ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && - ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && - ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) - ) { - uint mask = 0; - /* fsmbi duplicates each bit in the given mask eight times, - * using a 16-bit value to initialize a 16-byte quadword. - * Each 4-bit nybble of the mask corresponds to a full word - * of the result; look at the value and figure out the mask - * (replicated for each word in the quadword), and then - * form the "select mask" to get the value. - */ - if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; - if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; - if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; - if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; - spe_fsmbi(p, rT, mask); - } - else { - /* The general case: this usually uses two instructions, but - * may use only one if the low-order 16 bits of each word are 0. - */ - spe_ilhu(p, rT, ui >> 16); - if (ui & 0xffff) - spe_iohl(p, rT, ui & 0xffff); - } -} - -/** - * This function is constructed identically to spe_xor_uint() below. - * Changes to one should be made in the other. - */ -void -spe_and_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If we can, emit a single instruction, either And Byte Immediate - * (which uses the same constant across each byte), And Halfword Immediate - * (which sign-extends a 10-bit immediate to 16 bits and uses that - * across each halfword), or And Word Immediate (which sign-extends - * a 10-bit immediate to 32 bits). - * - * Otherwise, we'll need to use a temporary register. - */ - uint tmp; - - /* If the upper 23 bits are all 0s or all 1s, sign extension - * will work and we can use And Word Immediate - */ - tmp = ui & 0xfffffe00; - if (tmp == 0xfffffe00 || tmp == 0) { - spe_andi(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric along halfword boundaries and - * the upper 7 bits of each halfword are all 0s or 1s, we - * can use And Halfword Immediate - */ - tmp = ui & 0xfe00fe00; - if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { - spe_andhi(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric in each byte, then we can use - * the And Byte Immediate instruction. - */ - tmp = ui & 0x000000ff; - if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { - spe_andbi(p, rT, rA, tmp); - return; - } - - /* Otherwise, we'll have to use a temporary register. */ - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_and(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); -} - - -/** - * This function is constructed identically to spe_and_uint() above. - * Changes to one should be made in the other. - */ -void -spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If we can, emit a single instruction, either Exclusive Or Byte - * Immediate (which uses the same constant across each byte), Exclusive - * Or Halfword Immediate (which sign-extends a 10-bit immediate to - * 16 bits and uses that across each halfword), or Exclusive Or Word - * Immediate (which sign-extends a 10-bit immediate to 32 bits). - * - * Otherwise, we'll need to use a temporary register. - */ - uint tmp; - - /* If the upper 23 bits are all 0s or all 1s, sign extension - * will work and we can use Exclusive Or Word Immediate - */ - tmp = ui & 0xfffffe00; - if (tmp == 0xfffffe00 || tmp == 0) { - spe_xori(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric along halfword boundaries and - * the upper 7 bits of each halfword are all 0s or 1s, we - * can use Exclusive Or Halfword Immediate - */ - tmp = ui & 0xfe00fe00; - if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { - spe_xorhi(p, rT, rA, ui & 0x000003ff); - return; - } - - /* If the ui field is symmetric in each byte, then we can use - * the Exclusive Or Byte Immediate instruction. - */ - tmp = ui & 0x000000ff; - if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { - spe_xorbi(p, rT, rA, tmp); - return; - } - - /* Otherwise, we'll have to use a temporary register. */ - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_xor(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); -} - -void -spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If the comparison value is 9 bits or less, it fits inside a - * Compare Equal Word Immediate instruction. - */ - if ((ui & 0x000001ff) == ui) { - spe_ceqi(p, rT, rA, ui); - } - /* Otherwise, we're going to have to load a word first. */ - else { - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_ceq(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); - } -} - -void -spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui) -{ - /* If the comparison value is 10 bits or less, it fits inside a - * Compare Logical Greater Than Word Immediate instruction. - */ - if ((ui & 0x000003ff) == ui) { - spe_clgti(p, rT, rA, ui); - } - /* Otherwise, we're going to have to load a word first. */ - else { - int tmp_reg = spe_allocate_available_register(p); - spe_load_uint(p, tmp_reg, ui); - spe_clgt(p, rT, rA, tmp_reg); - spe_release_register(p, tmp_reg); - } -} - -void -spe_splat(struct spe_function *p, int rT, int rA) -{ - /* Use a temporary, just in case rT == rA */ - int tmp_reg = spe_allocate_available_register(p); - /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ - spe_ila(p, tmp_reg, 0x00010203); - spe_shufb(p, rT, rA, rA, tmp_reg); - spe_release_register(p, tmp_reg); -} - - -void -spe_complement(struct spe_function *p, int rT, int rA) -{ - spe_nor(p, rT, rA, rA); -} - - -void -spe_move(struct spe_function *p, int rT, int rA) -{ - /* Use different instructions depending on the instruction address - * to take advantage of the dual pipelines. - */ - if (p->num_inst & 1) - spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ - else - spe_ori(p, rT, rA, 0); /* even pipe */ -} - - -void -spe_zero(struct spe_function *p, int rT) -{ - spe_xor(p, rT, rT, rT); -} - - -void -spe_splat_word(struct spe_function *p, int rT, int rA, int word) -{ - assert(word >= 0); - assert(word <= 3); - - if (word == 0) { - int tmp1 = rT; - spe_ila(p, tmp1, 66051); - spe_shufb(p, rT, rA, rA, tmp1); - } - else { - /* XXX review this, we may not need the rotqbyi instruction */ - int tmp1 = rT; - int tmp2 = spe_allocate_available_register(p); - - spe_ila(p, tmp1, 66051); - spe_rotqbyi(p, tmp2, rA, 4 * word); - spe_shufb(p, rT, tmp2, tmp2, tmp1); - - spe_release_register(p, tmp2); - } -} - -/** - * For each 32-bit float element of rA and rB, choose the smaller of the - * two, compositing them into the rT register. - * - * The Float Compare Greater Than (fcgt) instruction will put 1s into - * compare_reg where rA > rB, and 0s where rA <= rB. - * - * Then the Select Bits (selb) instruction will take bits from rA where - * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA - * where rA <= rB and from rB where rB > rA, which is exactly the - * "min" operation. - * - * The compare_reg could in many cases be the same as rT, unless - * rT == rA || rt == rB. But since this is common in constructions - * like "x = min(x, a)", we always allocate a new register to be safe. - */ -void -spe_float_min(struct spe_function *p, int rT, int rA, int rB) -{ - int compare_reg = spe_allocate_available_register(p); - spe_fcgt(p, compare_reg, rA, rB); - spe_selb(p, rT, rA, rB, compare_reg); - spe_release_register(p, compare_reg); -} - -/** - * For each 32-bit float element of rA and rB, choose the greater of the - * two, compositing them into the rT register. - * - * The logic is similar to that of spe_float_min() above; the only - * difference is that the registers on spe_selb() have been reversed, - * so that the larger of the two is selected instead of the smaller. - */ -void -spe_float_max(struct spe_function *p, int rT, int rA, int rB) -{ - int compare_reg = spe_allocate_available_register(p); - spe_fcgt(p, compare_reg, rA, rB); - spe_selb(p, rT, rB, rA, compare_reg); - spe_release_register(p, compare_reg); -} - -#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h deleted file mode 100644 index 65d9c774154..00000000000 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ /dev/null @@ -1,433 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Real-time assembly generation interface for Cell B.E. SPEs. - * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf - * - * \author Ian Romanick - * \author Brian Paul - */ - -#ifndef RTASM_PPC_SPE_H -#define RTASM_PPC_SPE_H - -/** 4 bytes per instruction */ -#define SPE_INST_SIZE 4 - -/** number of general-purpose SIMD registers */ -#define SPE_NUM_REGS 128 - -/** Return Address register (aka $lr / Link Register) */ -#define SPE_REG_RA 0 - -/** Stack Pointer register (aka $sp) */ -#define SPE_REG_SP 1 - - -struct spe_function -{ - uint32_t *store; /**< instruction buffer */ - uint num_inst; - uint max_inst; - - /** - * The "set count" reflects the number of nested register sets - * are allowed. In the unlikely case that we exceed the set count, - * register allocation will start to be confused, which is critical - * enough that we check for it. - */ - unsigned char set_count; - - /** - * Flags for used and unused registers. Each byte corresponds to a - * register; a 0 in that byte means that the register is available. - * A value of 1 means that the register was allocated in the current - * register set. Any other value N means that the register was allocated - * N register sets ago. - * - * \sa - * spe_allocate_register, spe_allocate_available_register, - * spe_allocate_register_set, spe_release_register_set, spe_release_register, - */ - unsigned char regs[SPE_NUM_REGS]; - - boolean print; /**< print/dump instructions as they're emitted? */ - int indent; /**< number of spaces to indent */ -}; - - -extern void spe_init_func(struct spe_function *p, uint code_size); -extern void spe_release_func(struct spe_function *p); -extern uint spe_code_size(const struct spe_function *p); - -extern int spe_allocate_available_register(struct spe_function *p); -extern int spe_allocate_register(struct spe_function *p, int reg); -extern void spe_release_register(struct spe_function *p, int reg); -extern void spe_allocate_register_set(struct spe_function *p); -extern void spe_release_register_set(struct spe_function *p); - -extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]); - -extern void spe_print_code(struct spe_function *p, boolean enable); -extern void spe_indent(struct spe_function *p, int spaces); -extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); - - -#endif /* RTASM_PPC_SPE_H */ - -#ifndef EMIT -#define EMIT(_name, _op) \ - extern void _name (struct spe_function *p); -#define EMIT_(_name, _op) \ - extern void _name (struct spe_function *p, int rT); -#define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA); -#define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int rB); -#define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC); -#define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI8(_name, _op, bias) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI10s(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int rA, int imm); -#define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int imm); -#define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, int rT, int imm); -#define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm); -#define UNDEF_EMIT_MACROS -#endif /* EMIT */ - - -/* Memory load / store instructions - */ -EMIT_RR (spe_lqx, 0x1c4) -EMIT_RI16(spe_lqa, 0x061) -EMIT_RI16(spe_lqr, 0x067) -EMIT_RR (spe_stqx, 0x144) -EMIT_RI16(spe_stqa, 0x041) -EMIT_RI16(spe_stqr, 0x047) -EMIT_RI7 (spe_cbd, 0x1f4) -EMIT_RR (spe_cbx, 0x1d4) -EMIT_RI7 (spe_chd, 0x1f5) -EMIT_RI7 (spe_chx, 0x1d5) -EMIT_RI7 (spe_cwd, 0x1f6) -EMIT_RI7 (spe_cwx, 0x1d6) -EMIT_RI7 (spe_cdd, 0x1f7) -EMIT_RI7 (spe_cdx, 0x1d7) - - -/* Constant formation instructions - */ -EMIT_RI16(spe_ilh, 0x083) -EMIT_RI16(spe_ilhu, 0x082) -EMIT_RI16(spe_il, 0x081) -EMIT_RI18(spe_ila, 0x021) -EMIT_RI16(spe_iohl, 0x0c1) -EMIT_RI16(spe_fsmbi, 0x065) - - - -/* Integer and logical instructions - */ -EMIT_RR (spe_ah, 0x0c8) -EMIT_RI10(spe_ahi, 0x01d) -EMIT_RR (spe_a, 0x0c0) -EMIT_RI10s(spe_ai, 0x01c) -EMIT_RR (spe_sfh, 0x048) -EMIT_RI10(spe_sfhi, 0x00d) -EMIT_RR (spe_sf, 0x040) -EMIT_RI10(spe_sfi, 0x00c) -EMIT_RR (spe_addx, 0x340) -EMIT_RR (spe_cg, 0x0c2) -EMIT_RR (spe_cgx, 0x342) -EMIT_RR (spe_sfx, 0x341) -EMIT_RR (spe_bg, 0x042) -EMIT_RR (spe_bgx, 0x343) -EMIT_RR (spe_mpy, 0x3c4) -EMIT_RR (spe_mpyu, 0x3cc) -EMIT_RI10(spe_mpyi, 0x074) -EMIT_RI10(spe_mpyui, 0x075) -EMIT_RRR (spe_mpya, 0x00c) -EMIT_RR (spe_mpyh, 0x3c5) -EMIT_RR (spe_mpys, 0x3c7) -EMIT_RR (spe_mpyhh, 0x3c6) -EMIT_RR (spe_mpyhha, 0x346) -EMIT_RR (spe_mpyhhu, 0x3ce) -EMIT_RR (spe_mpyhhau, 0x34e) -EMIT_R (spe_clz, 0x2a5) -EMIT_R (spe_cntb, 0x2b4) -EMIT_R (spe_fsmb, 0x1b6) -EMIT_R (spe_fsmh, 0x1b5) -EMIT_R (spe_fsm, 0x1b4) -EMIT_R (spe_gbb, 0x1b2) -EMIT_R (spe_gbh, 0x1b1) -EMIT_R (spe_gb, 0x1b0) -EMIT_RR (spe_avgb, 0x0d3) -EMIT_RR (spe_absdb, 0x053) -EMIT_RR (spe_sumb, 0x253) -EMIT_R (spe_xsbh, 0x2b6) -EMIT_R (spe_xshw, 0x2ae) -EMIT_R (spe_xswd, 0x2a6) -EMIT_RR (spe_and, 0x0c1) -EMIT_RR (spe_andc, 0x2c1) -EMIT_RI10s(spe_andbi, 0x016) -EMIT_RI10s(spe_andhi, 0x015) -EMIT_RI10s(spe_andi, 0x014) -EMIT_RR (spe_or, 0x041) -EMIT_RR (spe_orc, 0x2c9) -EMIT_RI10s(spe_orbi, 0x006) -EMIT_RI10s(spe_orhi, 0x005) -EMIT_RI10s(spe_ori, 0x004) -EMIT_R (spe_orx, 0x1f0) -EMIT_RR (spe_xor, 0x241) -EMIT_RI10s(spe_xorbi, 0x046) -EMIT_RI10s(spe_xorhi, 0x045) -EMIT_RI10s(spe_xori, 0x044) -EMIT_RR (spe_nand, 0x0c9) -EMIT_RR (spe_nor, 0x049) -EMIT_RR (spe_eqv, 0x249) -EMIT_RRR (spe_selb, 0x008) -EMIT_RRR (spe_shufb, 0x00b) - - -/* Shift and rotate instructions - */ -EMIT_RR (spe_shlh, 0x05f) -EMIT_RI7 (spe_shlhi, 0x07f) -EMIT_RR (spe_shl, 0x05b) -EMIT_RI7 (spe_shli, 0x07b) -EMIT_RR (spe_shlqbi, 0x1db) -EMIT_RI7 (spe_shlqbii, 0x1fb) -EMIT_RR (spe_shlqby, 0x1df) -EMIT_RI7 (spe_shlqbyi, 0x1ff) -EMIT_RR (spe_shlqbybi, 0x1cf) -EMIT_RR (spe_roth, 0x05c) -EMIT_RI7 (spe_rothi, 0x07c) -EMIT_RR (spe_rot, 0x058) -EMIT_RI7 (spe_roti, 0x078) -EMIT_RR (spe_rotqby, 0x1dc) -EMIT_RI7 (spe_rotqbyi, 0x1fc) -EMIT_RR (spe_rotqbybi, 0x1cc) -EMIT_RR (spe_rotqbi, 0x1d8) -EMIT_RI7 (spe_rotqbii, 0x1f8) -EMIT_RR (spe_rothm, 0x05d) -EMIT_RI7 (spe_rothmi, 0x07d) -EMIT_RR (spe_rotm, 0x059) -EMIT_RI7 (spe_rotmi, 0x079) -EMIT_RR (spe_rotqmby, 0x1dd) -EMIT_RI7 (spe_rotqmbyi, 0x1fd) -EMIT_RR (spe_rotqmbybi, 0x1cd) -EMIT_RR (spe_rotqmbi, 0x1c9) -EMIT_RI7 (spe_rotqmbii, 0x1f9) -EMIT_RR (spe_rotmah, 0x05e) -EMIT_RI7 (spe_rotmahi, 0x07e) -EMIT_RR (spe_rotma, 0x05a) -EMIT_RI7 (spe_rotmai, 0x07a) - - -/* Compare, branch, and halt instructions - */ -EMIT_RR (spe_heq, 0x3d8) -EMIT_RI10(spe_heqi, 0x07f) -EMIT_RR (spe_hgt, 0x258) -EMIT_RI10(spe_hgti, 0x04f) -EMIT_RR (spe_hlgt, 0x2d8) -EMIT_RI10(spe_hlgti, 0x05f) -EMIT_RR (spe_ceqb, 0x3d0) -EMIT_RI10(spe_ceqbi, 0x07e) -EMIT_RR (spe_ceqh, 0x3c8) -EMIT_RI10(spe_ceqhi, 0x07d) -EMIT_RR (spe_ceq, 0x3c0) -EMIT_RI10(spe_ceqi, 0x07c) -EMIT_RR (spe_cgtb, 0x250) -EMIT_RI10(spe_cgtbi, 0x04e) -EMIT_RR (spe_cgth, 0x248) -EMIT_RI10(spe_cgthi, 0x04d) -EMIT_RR (spe_cgt, 0x240) -EMIT_RI10(spe_cgti, 0x04c) -EMIT_RR (spe_clgtb, 0x2d0) -EMIT_RI10(spe_clgtbi, 0x05e) -EMIT_RR (spe_clgth, 0x2c8) -EMIT_RI10(spe_clgthi, 0x05d) -EMIT_RR (spe_clgt, 0x2c0) -EMIT_RI10(spe_clgti, 0x05c) -EMIT_I16 (spe_br, 0x064) -EMIT_I16 (spe_bra, 0x060) -EMIT_RI16(spe_brsl, 0x066) -EMIT_RI16(spe_brasl, 0x062) -EMIT_RI16(spe_brnz, 0x042) -EMIT_RI16(spe_brz, 0x040) -EMIT_RI16(spe_brhnz, 0x046) -EMIT_RI16(spe_brhz, 0x044) - -/* Control instructions - */ -EMIT (spe_lnop, 0x001) - -extern void -spe_lqd(struct spe_function *p, int rT, int rA, int offset); - -extern void -spe_stqd(struct spe_function *p, int rT, int rA, int offset); - -extern void spe_bi(struct spe_function *p, int rA, int d, int e); -extern void spe_iret(struct spe_function *p, int rA, int d, int e); -extern void spe_bisled(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_bisl(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_biz(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_binz(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_bihz(struct spe_function *p, int rT, int rA, - int d, int e); -extern void spe_bihnz(struct spe_function *p, int rT, int rA, - int d, int e); - - -/** Load/splat immediate float into rT. */ -extern void -spe_load_float(struct spe_function *p, int rT, float x); - -/** Load/splat immediate int into rT. */ -extern void -spe_load_int(struct spe_function *p, int rT, int i); - -/** Load/splat immediate unsigned int into rT. */ -extern void -spe_load_uint(struct spe_function *p, int rT, uint ui); - -/** And immediate value into rT. */ -extern void -spe_and_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Xor immediate value into rT. */ -extern void -spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Compare equal with immediate value. */ -extern void -spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Compare greater with immediate value. */ -extern void -spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui); - -/** Replicate word 0 of rA across rT. */ -extern void -spe_splat(struct spe_function *p, int rT, int rA); - -/** rT = complement_all_bits(rA). */ -extern void -spe_complement(struct spe_function *p, int rT, int rA); - -/** rT = rA. */ -extern void -spe_move(struct spe_function *p, int rT, int rA); - -/** rT = {0,0,0,0}. */ -extern void -spe_zero(struct spe_function *p, int rT); - -/** rT = splat(rA, word) */ -extern void -spe_splat_word(struct spe_function *p, int rT, int rA, int word); - -/** rT = float min(rA, rB) */ -extern void -spe_float_min(struct spe_function *p, int rT, int rA, int rB); - -/** rT = float max(rA, rB) */ -extern void -spe_float_max(struct spe_function *p, int rT, int rA, int rB); - - -/* Floating-point instructions - */ -EMIT_RR (spe_fa, 0x2c4) -EMIT_RR (spe_dfa, 0x2cc) -EMIT_RR (spe_fs, 0x2c5) -EMIT_RR (spe_dfs, 0x2cd) -EMIT_RR (spe_fm, 0x2c6) -EMIT_RR (spe_dfm, 0x2ce) -EMIT_RRR (spe_fma, 0x00e) -EMIT_RR (spe_dfma, 0x35c) -EMIT_RRR (spe_fnms, 0x00d) -EMIT_RR (spe_dfnms, 0x35e) -EMIT_RRR (spe_fms, 0x00f) -EMIT_RR (spe_dfms, 0x35d) -EMIT_RR (spe_dfnma, 0x35f) -EMIT_R (spe_frest, 0x1b8) -EMIT_R (spe_frsqest, 0x1b9) -EMIT_RR (spe_fi, 0x3d4) -EMIT_RI8 (spe_csflt, 0x1da, 155) -EMIT_RI8 (spe_cflts, 0x1d8, 173) -EMIT_RI8 (spe_cuflt, 0x1db, 155) -EMIT_RI8 (spe_cfltu, 0x1d9, 173) -EMIT_R (spe_frds, 0x3b9) -EMIT_R (spe_fesd, 0x3b8) -EMIT_RR (spe_dfceq, 0x3c3) -EMIT_RR (spe_dfcmeq, 0x3cb) -EMIT_RR (spe_dfcgt, 0x2c3) -EMIT_RR (spe_dfcmgt, 0x2cb) -EMIT_RI7 (spe_dftsv, 0x3bf) -EMIT_RR (spe_fceq, 0x3c2) -EMIT_RR (spe_fcmeq, 0x3ca) -EMIT_RR (spe_fcgt, 0x2c2) -EMIT_RR (spe_fcmgt, 0x2ca) -EMIT_R (spe_fscrwr, 0x3ba) -EMIT_ (spe_fscrrd, 0x398) - - -/* Channel instructions - */ -EMIT_R (spe_rdch, 0x00d) -EMIT_R (spe_rdchcnt, 0x00f) -EMIT_R (spe_wrch, 0x10d) - - -#ifdef UNDEF_EMIT_MACROS -#undef EMIT -#undef EMIT_ -#undef EMIT_R -#undef EMIT_RR -#undef EMIT_RRR -#undef EMIT_RI7 -#undef EMIT_RI8 -#undef EMIT_RI10 -#undef EMIT_RI10s -#undef EMIT_RI16 -#undef EMIT_RI18 -#undef EMIT_I16 -#undef UNDEF_EMIT_MACROS -#endif /* EMIT_ */ diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h index 34bfa527db0..596c691e9c1 100644 --- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h @@ -8,7 +8,7 @@ /* Helper function to choose and instantiate one of the software rasterizers: - * cell, llvmpipe, softpipe. + * llvmpipe, softpipe. */ #ifdef GALLIUM_SOFTPIPE @@ -19,21 +19,12 @@ #include "llvmpipe/lp_public.h" #endif -#ifdef GALLIUM_CELL -#include "cell/ppu/cell_public.h" -#endif - static INLINE struct pipe_screen * sw_screen_create_named(struct sw_winsys *winsys, const char *driver) { struct pipe_screen *screen = NULL; -#if defined(GALLIUM_CELL) - if (screen == NULL && strcmp(driver, "cell") == 0) - screen = cell_create_screen(winsys); -#endif - #if defined(GALLIUM_LLVMPIPE) if (screen == NULL && strcmp(driver, "llvmpipe") == 0) screen = llvmpipe_create_screen(winsys); @@ -54,9 +45,7 @@ sw_screen_create(struct sw_winsys *winsys) const char *default_driver; const char *driver; -#if defined(GALLIUM_CELL) - default_driver = "cell"; -#elif defined(GALLIUM_LLVMPIPE) +#if defined(GALLIUM_LLVMPIPE) default_driver = "llvmpipe"; #elif defined(GALLIUM_SOFTPIPE) default_driver = "softpipe"; diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile deleted file mode 100644 index 47aef7b05f6..00000000000 --- a/src/gallium/drivers/cell/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# Cell Gallium driver Makefile - - -default: - ( cd spu ; make ) - ( cd ppu ; make ) - - - -clean: - ( cd spu ; make clean ) - ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h deleted file mode 100644 index a8cdde34aa7..00000000000 --- a/src/gallium/drivers/cell/common.h +++ /dev/null @@ -1,377 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Types and tokens which are common to the SPU and PPU code. - */ - - -#ifndef CELL_COMMON_H -#define CELL_COMMON_H - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include - -/** The standard assert macro doesn't seem to work reliably */ -#define ASSERT(x) \ - if (!(x)) { \ - ubyte *p = NULL; \ - fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ - __FILE__, __LINE__, __FUNCTION__, #x); \ - *p = 0; \ - exit(1); \ - } - - -#define JOIN(x, y) JOIN_AGAIN(x, y) -#define JOIN_AGAIN(x, y) x ## y - -#define STATIC_ASSERT(e) \ -{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];} - - - -/** for sanity checking */ -#define ASSERT_ALIGN16(ptr) \ - ASSERT((((unsigned long) (ptr)) & 0xf) == 0); - - -/** round up value to next multiple of 4 */ -#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) - -/** round up value to next multiple of 8 */ -#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) - -/** round up value to next multiple of 16 */ -#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) - - -#define CELL_MAX_SPUS 8 - -#define CELL_MAX_SAMPLERS 4 -#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ -#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ -#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ -#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ - -#define TILE_SIZE 32 - - -/** - * The low byte of a mailbox word contains the command opcode. - * Remaining higher bytes are command specific. - */ -#define CELL_CMD_OPCODE_MASK 0xff - -#define CELL_CMD_EXIT 1 -#define CELL_CMD_CLEAR_SURFACE 2 -#define CELL_CMD_FINISH 3 -#define CELL_CMD_RENDER 4 -#define CELL_CMD_BATCH 5 -#define CELL_CMD_RELEASE_VERTS 6 -#define CELL_CMD_STATE_FRAMEBUFFER 10 -#define CELL_CMD_STATE_FRAGMENT_OPS 11 -#define CELL_CMD_STATE_SAMPLER 12 -#define CELL_CMD_STATE_TEXTURE 13 -#define CELL_CMD_STATE_VERTEX_INFO 14 -#define CELL_CMD_STATE_VIEWPORT 15 -#define CELL_CMD_STATE_UNIFORMS 16 -#define CELL_CMD_STATE_VS_ARRAY_INFO 17 -#define CELL_CMD_STATE_BIND_VS 18 -#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 -#define CELL_CMD_STATE_ATTRIB_FETCH 20 -#define CELL_CMD_STATE_FS_CONSTANTS 21 -#define CELL_CMD_STATE_RASTERIZER 22 -#define CELL_CMD_VS_EXECUTE 23 -#define CELL_CMD_FLUSH_BUFFER_RANGE 24 -#define CELL_CMD_FENCE 25 - - -/** Command/batch buffers */ -#define CELL_NUM_BUFFERS 4 -#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ - -#define CELL_BUFFER_STATUS_FREE 10 -#define CELL_BUFFER_STATUS_USED 20 - -/** Debug flags */ -#define CELL_DEBUG_CHECKER (1 << 0) -#define CELL_DEBUG_ASM (1 << 1) -#define CELL_DEBUG_SYNC (1 << 2) -#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) -#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) -#define CELL_DEBUG_CMD (1 << 5) -#define CELL_DEBUG_CACHE (1 << 6) - -#define CELL_FENCE_IDLE 0 -#define CELL_FENCE_EMITTED 1 -#define CELL_FENCE_SIGNALLED 2 - -#define CELL_FACING_FRONT 0 -#define CELL_FACING_BACK 1 - -struct cell_fence -{ - /** There's a 16-byte status qword per SPU */ - volatile uint status[CELL_MAX_SPUS][4]; -}; - -#ifdef __SPU__ -typedef vector unsigned int opcode_t; -#else -typedef unsigned int opcode_t[4]; -#endif - -/** - * Fence command sent to SPUs. In response, the SPUs will write - * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. - */ -struct cell_command_fence -{ - opcode_t opcode; /**< CELL_CMD_FENCE */ - struct cell_fence *fence; - uint32_t pad_[3]; -}; - - -/** - * Command to specify per-fragment operations state and generated code. - * Note that this is a variant-length structure, allocated with as - * much memory as needed to hold the generated code; the "code" - * field *must* be the last field in the structure. Also, the entire - * length of the structure (including the variant code field) must be - * a multiple of 8 bytes; we require that this structure itself be - * a multiple of 8 bytes, and that the generated code also be a multiple - * of 8 bytes. - * - * Also note that the dsa, blend, blend_color fields are really only needed - * for the fallback/C per-pixel code. They're not used when we generate - * dynamic SPU fragment code (which is the normal case), and will eventually - * be removed from this structure. - */ -struct cell_command_fragment_ops -{ - opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ - - /* Fields for the fallback case */ - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - - /* Fields for the generated SPU code */ - unsigned total_code_size; - unsigned front_code_index; - unsigned back_code_index; - /* this field has variant length, and must be the last field in - * the structure - */ - unsigned code[0]; -}; - - -/** Max instructions for fragment programs */ -#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 - -/** - * Command to send a fragment program to SPUs. - */ -struct cell_command_fragment_program -{ - opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ - uint num_inst; /**< Number of instructions */ - uint32_t pad[3]; - unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; -}; - - -/** - * Tell SPUs about the framebuffer size, location - */ -struct cell_command_framebuffer -{ - opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ - int width, height; - void *color_start, *depth_start; - enum pipe_format color_format, depth_format; - uint32_t pad_[2]; -}; - - -/** - * Tell SPUs about rasterizer state. - */ -struct cell_command_rasterizer -{ - opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ - struct pipe_rasterizer_state rasterizer; - /*uint32_t pad[1];*/ -}; - - -/** - * Clear framebuffer to the given value/color. - */ -struct cell_command_clear_surface -{ - opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ - uint surface; /**< Temporary: 0=color, 1=Z */ - uint value; - uint32_t pad[2]; -}; - - -/** - * Array info used by the vertex shader's vertex puller. - */ -struct cell_array_info -{ - uint64_t base; /**< Base address of the 0th element. */ - uint attr; /**< Attribute that this state is for. */ - uint pitch; /**< Byte pitch from one entry to the next. */ - uint size; - uint function_offset; -}; - - -struct cell_attribute_fetch_code -{ - uint64_t base; - uint size; -}; - - -struct cell_buffer_range -{ - uint64_t base; - unsigned size; -}; - - -struct cell_shader_info -{ - uint64_t declarations; - uint64_t instructions; - uint64_t immediates; - - unsigned num_outputs; - unsigned num_declarations; - unsigned num_instructions; - unsigned num_immediates; -}; - - -#define SPU_VERTS_PER_BATCH 64 -struct cell_command_vs -{ - opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */ - uint64_t vOut[SPU_VERTS_PER_BATCH]; - unsigned num_elts; - unsigned elts[SPU_VERTS_PER_BATCH]; - float plane[12][4]; - unsigned nr_planes; - unsigned nr_attrs; -}; - - -struct cell_command_render -{ - opcode_t opcode; /**< CELL_CMD_RENDER */ - uint prim_type; /**< PIPE_PRIM_x */ - uint num_verts; - uint vertex_size; /**< bytes per vertex */ - uint num_indexes; - uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ - float xmin, ymin, xmax, ymax; /* XXX another dummy field */ - uint min_index; - boolean inline_verts; - uint32_t pad_[1]; -}; - - -struct cell_command_release_verts -{ - opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */ - uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ - uint32_t pad_[3]; -}; - - -struct cell_command_sampler -{ - opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */ - uint unit; - struct pipe_sampler_state state; - uint32_t pad_[3]; -}; - - -struct cell_command_texture -{ - opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */ - uint target; /**< PIPE_TEXTURE_x */ - uint unit; - void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ - ushort width[CELL_MAX_TEXTURE_LEVELS]; - ushort height[CELL_MAX_TEXTURE_LEVELS]; - ushort depth[CELL_MAX_TEXTURE_LEVELS]; -}; - - -#define MAX_SPU_FUNCTIONS 12 -/** - * Used to tell the PPU about the address of particular functions in the - * SPU's address space. - */ -struct cell_spu_function_info -{ - uint num; - char names[MAX_SPU_FUNCTIONS][16]; - uint addrs[MAX_SPU_FUNCTIONS]; - char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ -}; - - -/** This is the object passed to spe_create_thread() */ -PIPE_ALIGN_TYPE(16, -struct cell_init_info -{ - unsigned id; - unsigned num_spus; - unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ - float inv_timebase; /**< 1.0/timebase, for perf measurement */ - - /** Buffers for command batches, vertex/index data */ - ubyte *buffers[CELL_NUM_BUFFERS]; - uint *buffer_status; /**< points at cell_context->buffer_status */ - - struct cell_spu_function_info *spu_functions; -}); - - -#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile deleted file mode 100644 index c92f8e5cba2..00000000000 --- a/src/gallium/drivers/cell/ppu/Makefile +++ /dev/null @@ -1,86 +0,0 @@ -# Gallium3D Cell driver: PPU code - -# This makefile builds the libcell.a library which gets pulled into -# the main libGL.so library - - -TOP = ../../../../.. -include $(TOP)/configs/current - - -# This is the "top-level" cell PPU driver code, will get pulled into libGL.so -# by the winsys Makefile. -CELL_LIB = ../libcell.a - - -# This is the SPU code. We'd like to be able to put this into the libcell.a -# archive with the PPU code, but nesting .a libs doesn't seem to work. -# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile -SPU_CODE_MODULE = ../spu/g3d_spu.a - - -SOURCES = \ - cell_batch.c \ - cell_clear.c \ - cell_context.c \ - cell_draw_arrays.c \ - cell_fence.c \ - cell_flush.c \ - cell_gen_fragment.c \ - cell_gen_fp.c \ - cell_state_derived.c \ - cell_state_emit.c \ - cell_state_shader.c \ - cell_pipe_state.c \ - cell_screen.c \ - cell_state_vertex.c \ - cell_spu.c \ - cell_surface.c \ - cell_texture.c \ - cell_vbuf.c \ - cell_vertex_fetch.c \ - cell_vertex_shader.c - - -OBJECTS = $(SOURCES:.c=.o) \ - -INCLUDE_DIRS = \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers - -.c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -.c.s: - $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ - - -default: $(CELL_LIB) - - -$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) -# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work - ar -ru $(CELL_LIB) $(OBJECTS) - -#$(PROG): $(PPU_OBJECTS) -# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) - - - -clean: - rm -f *.o *~ $(CELL_LIB) - - - -depend: $(SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null - -include depend - - - diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c deleted file mode 100644 index fe144f8b849..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ /dev/null @@ -1,260 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_fence.h" -#include "cell_spu.h" - - - -/** - * Search the buffer pool for an empty/free buffer and return its index. - * Buffers are used for storing vertex data, state and commands which - * will be sent to the SPUs. - * If no empty buffers are available, wait for one. - * \return buffer index in [0, CELL_NUM_BUFFERS-1] - */ -uint -cell_get_empty_buffer(struct cell_context *cell) -{ - static uint prev_buffer = 0; - uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; - uint tries = 0; - - /* Find a buffer that's marked as free by all SPUs */ - while (1) { - uint spu, num_free = 0; - - for (spu = 0; spu < cell->num_spus; spu++) { - if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { - num_free++; - - if (num_free == cell->num_spus) { - /* found a free buffer, now mark status as used */ - for (spu = 0; spu < cell->num_spus; spu++) { - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - } - /* - printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); - */ - prev_buffer = buf; - - /* release tex buffer associated w/ prev use of this batch buf */ - cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); - - return buf; - } - } - else { - break; - } - } - - /* try next buf */ - buf = (buf + 1) % CELL_NUM_BUFFERS; - - tries++; - if (tries == 100) { - /* - printf("PPU WAITING for buffer...\n"); - */ - } - } -} - - -/** - * Append a fence command to the current batch buffer. - * Note that we're sure there's always room for this because of the - * adjusted size check in cell_batch_free_space(). - */ -static void -emit_fence(struct cell_context *cell) -{ - const uint batch = cell->cur_batch; - const uint size = cell->buffer_size[batch]; - struct cell_command_fence *fence_cmd; - struct cell_fence *fence = &cell->fenced_buffers[batch].fence; - uint i; - - /* set fence status to emitted, not yet signalled */ - for (i = 0; i < cell->num_spus; i++) { - fence->status[i][0] = CELL_FENCE_EMITTED; - } - - STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); - ASSERT(size % 16 == 0); - ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); - - fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); - fence_cmd->opcode[0] = CELL_CMD_FENCE; - fence_cmd->fence = fence; - - /* update batch buffer size */ - cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); -} - - -/** - * Flush the current batch buffer to the SPUs. - * An empty buffer will be found and set as the new current batch buffer - * for subsequent commands/data. - */ -void -cell_batch_flush(struct cell_context *cell) -{ - static boolean flushing = FALSE; - uint batch = cell->cur_batch; - uint size = cell->buffer_size[batch]; - uint spu, cmd_word; - - assert(!flushing); - - if (size == 0) - return; - - /* Before we use this batch buffer, make sure any fenced texture buffers - * are released. - */ - if (cell->fenced_buffers[batch].head) { - emit_fence(cell); - size = cell->buffer_size[batch]; - } - - flushing = TRUE; - - assert(batch < CELL_NUM_BUFFERS); - - /* - printf("cell_batch_dispatch: buf %u at %p, size %u\n", - batch, &cell->buffer[batch][0], size); - */ - - /* - * Build "BATCH" command and send to all SPUs. - */ - cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); - - for (spu = 0; spu < cell->num_spus; spu++) { - assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); - send_mbox_message(cell_global.spe_contexts[spu], cmd_word); - } - - /* When the SPUs are done copying the buffer into their locals stores - * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] - * array indicating that the PPU can re-use the buffer. - */ - - batch = cell_get_empty_buffer(cell); - - cell->buffer_size[batch] = 0; /* empty */ - cell->cur_batch = batch; - - flushing = FALSE; -} - - -/** - * Return the number of bytes free in the current batch buffer. - */ -uint -cell_batch_free_space(const struct cell_context *cell) -{ - uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; - free -= sizeof(struct cell_command_fence); - return free; -} - - -/** - * Allocate space in the current batch buffer for 'bytes' space. - * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned. - * \return address in batch buffer to put data - */ -void * -cell_batch_alloc16(struct cell_context *cell, uint bytes) -{ - void *pos; - uint size; - - ASSERT(bytes % 16 == 0); - ASSERT(bytes <= CELL_BUFFER_SIZE); - ASSERT(cell->cur_batch >= 0); - -#ifdef ASSERT - { - uint spu; - for (spu = 0; spu < cell->num_spus; spu++) { - ASSERT(cell->buffer_status[spu][cell->cur_batch][0] - == CELL_BUFFER_STATUS_USED); - } - } -#endif - - size = cell->buffer_size[cell->cur_batch]; - - if (bytes > cell_batch_free_space(cell)) { - cell_batch_flush(cell); - size = 0; - } - - ASSERT(size % 16 == 0); - ASSERT(size + bytes <= CELL_BUFFER_SIZE); - - pos = (void *) (cell->buffer[cell->cur_batch] + size); - - cell->buffer_size[cell->cur_batch] = size + bytes; - - return pos; -} - - -/** - * One-time init of batch buffers. - */ -void -cell_init_batch_buffers(struct cell_context *cell) -{ - uint spu, buf; - - /* init command, vertex/index buffer info */ - for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { - cell->buffer_size[buf] = 0; - - /* init batch buffer status values, - * mark 0th buffer as used, rest as free. - */ - for (spu = 0; spu < cell->num_spus; spu++) { - if (buf == 0) - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; - else - cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; - } - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h deleted file mode 100644 index 290136031a1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ /dev/null @@ -1,54 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_BATCH_H -#define CELL_BATCH_H - -#include "pipe/p_compiler.h" - - -struct cell_context; - - -extern uint -cell_get_empty_buffer(struct cell_context *cell); - -extern void -cell_batch_flush(struct cell_context *cell); - -extern uint -cell_batch_free_space(const struct cell_context *cell); - -extern void * -cell_batch_alloc16(struct cell_context *cell, uint bytes); - -extern void -cell_init_batch_buffers(struct cell_context *cell); - - -#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c deleted file mode 100644 index 6a525ef4e41..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Authors - * Brian Paul - */ - -#include -#include -#include -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_pack_color.h" -#include "cell/common.h" -#include "cell_clear.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_state.h" - - -/** - * Called via pipe->clear() - */ -void -cell_clear(struct pipe_context *pipe, unsigned buffers, - const pipe_color_union *color, - double depth, unsigned stencil) -{ - struct cell_context *cell = cell_context(pipe); - - if (cell->dirty) - cell_update_derived(cell); - - if (buffers & PIPE_CLEAR_COLOR) { - uint surfIndex = 0; - union util_color uc; - - util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc); - - /* Build a CLEAR command and place it in the current batch buffer */ - STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) - cell_batch_alloc16(cell, sizeof(*clr)); - clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; - clr->surface = surfIndex; - clr->value = uc.ui; - } - - if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { - uint surfIndex = 1; - uint clearValue; - - clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format, - depth, stencil); - - /* Build a CLEAR command and place it in the current batch buffer */ - STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) - cell_batch_alloc16(cell, sizeof(*clr)); - clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; - clr->surface = surfIndex; - clr->value = clearValue; - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h deleted file mode 100644 index a365feb0f00..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_clear.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_CLEAR_H -#define CELL_CLEAR_H - - -struct pipe_context; - - -extern void -cell_clear(struct pipe_context *pipe, unsigned buffers, - const union pipe_color_union *color, - double depth, unsigned stencil); - - -#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c deleted file mode 100644 index 58e647a39fa..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Authors - * Brian Paul - */ - - -#include - -#include "pipe/p_defines.h" -#include "pipe/p_format.h" -#include "util/u_memory.h" -#include "pipe/p_screen.h" -#include "util/u_inlines.h" - -#include "draw/draw_context.h" -#include "draw/draw_private.h" - -#include "cell/common.h" -#include "cell_batch.h" -#include "cell_clear.h" -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_fence.h" -#include "cell_flush.h" -#include "cell_state.h" -#include "cell_surface.h" -#include "cell_spu.h" -#include "cell_pipe_state.h" -#include "cell_texture.h" -#include "cell_vbuf.h" - - - -static void -cell_destroy_context( struct pipe_context *pipe ) -{ - struct cell_context *cell = cell_context(pipe); - unsigned i; - - for (i = 0; i < cell->num_vertex_buffers; i++) { - pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL); - } - - util_delete_keymap(cell->fragment_ops_cache, NULL); - - cell_spu_exit(cell); - - align_free(cell); -} - - -static struct draw_context * -cell_draw_create(struct cell_context *cell) -{ - struct draw_context *draw = draw_create(&cell->pipe); - -#if 0 /* broken */ - if (getenv("GALLIUM_CELL_VS")) { - /* plug in SPU-based vertex transformation code */ - draw->shader_queue_flush = cell_vertex_shader_queue_flush; - draw->driver_private = cell; - } -#endif - - return draw; -} - - -static const struct debug_named_value cell_debug_flags[] = { - {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */ - {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */ - {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */ - {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/ - {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/ - {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */ - {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */ - DEBUG_NAMED_VALUE_END -}; - - -struct pipe_context * -cell_create_context(struct pipe_screen *screen, - void *priv ) -{ - struct cell_context *cell; - uint i; - - /* some fields need to be 16-byte aligned, so align the whole object */ - cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); - if (!cell) - return NULL; - - memset(cell, 0, sizeof(*cell)); - - cell->winsys = NULL; /* XXX: fixme - get this from screen? */ - cell->pipe.winsys = NULL; - cell->pipe.screen = screen; - cell->pipe.priv = priv; - cell->pipe.destroy = cell_destroy_context; - - cell->pipe.clear = cell_clear; - cell->pipe.flush = cell_flush; - -#if 0 - cell->pipe.begin_query = cell_begin_query; - cell->pipe.end_query = cell_end_query; - cell->pipe.wait_query = cell_wait_query; -#endif - - cell_init_draw_functions(cell); - cell_init_state_functions(cell); - cell_init_shader_functions(cell); - cell_init_surface_functions(cell); - cell_init_vertex_functions(cell); - cell_init_texture_transfer_funcs(cell); - - cell->draw = cell_draw_create(cell); - - /* Create cache of fragment ops generated code */ - cell->fragment_ops_cache = - util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); - - cell_init_vbuf(cell); - - draw_set_rasterize_stage(cell->draw, cell->vbuf); - - /* convert all points/lines to tris for the time being */ - draw_wide_point_threshold(cell->draw, 0.0); - draw_wide_line_threshold(cell->draw, 0.0); - - /* get env vars or read config file to get debug flags */ - cell->debug_flags = debug_get_flags_option("CELL_DEBUG", - cell_debug_flags, - 0 ); - - for (i = 0; i < CELL_NUM_BUFFERS; i++) - cell_fence_init(&cell->fenced_buffers[i].fence); - - - /* - * SPU stuff - */ - /* This call only works with SDK 3.0. Anyone still using 2.1??? */ - cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); - cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); - if (cell->debug_flags) { - printf("Cell: found %d Cell(s) with %u SPUs\n", - cell->num_cells, cell->num_spus); - } - if (getenv("CELL_NUM_SPUS")) { - cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); - assert(cell->num_spus > 0); - } - - cell_start_spus(cell); - - cell_init_batch_buffers(cell); - - /* make sure SPU initializations are done before proceeding */ - cell_flush_int(cell, CELL_FLUSH_WAIT); - - return &cell->pipe; -} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h deleted file mode 100644 index d1aee62ba1e..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_CONTEXT_H -#define CELL_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "draw/draw_vertex.h" -#include "draw/draw_vbuf.h" -/*#include "cell_winsys.h"*/ -#include "cell/common.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "tgsi/tgsi_scan.h" -#include "util/u_keymap.h" - - -struct cell_vbuf_render; - - -/** - * Cell vertex shader state, subclass of pipe_shader_state. - */ -struct cell_vertex_shader_state -{ - struct pipe_shader_state shader; - struct tgsi_shader_info info; - void *draw_data; -}; - - -/** - * Cell fragment shader state, subclass of pipe_shader_state. - */ -struct cell_fragment_shader_state -{ - struct pipe_shader_state shader; - struct tgsi_shader_info info; - struct spe_function code; - void *data; -}; - - -/** - * Key for mapping per-fragment state to cached SPU machine code. - * keymap(cell_fragment_ops_key) => cell_command_fragment_ops - */ -struct cell_fragment_ops_key -{ - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_depth_stencil_alpha_state dsa; - enum pipe_format color_format; - enum pipe_format zs_format; -}; - - -struct cell_buffer_node; - -/** - * Fenced buffer list. List of buffers which can be unreferenced after - * the fence has been executed/signalled. - */ -struct cell_buffer_list -{ - PIPE_ALIGN_VAR(16) struct cell_fence fence; - struct cell_buffer_node *head; -}; - -struct cell_velems_state -{ - unsigned count; - struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS]; -}; - -/** - * Per-context state, subclass of pipe_context. - */ -struct cell_context -{ - struct pipe_context pipe; - - struct cell_winsys *winsys; - - const struct pipe_blend_state *blend; - const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - uint num_samplers; - const struct pipe_depth_stencil_alpha_state *depth_stencil; - const struct pipe_rasterizer_state *rasterizer; - const struct cell_vertex_shader_state *vs; - const struct cell_fragment_shader_state *fs; - const struct cell_velems_state *velems; - - struct spe_function logic_op; - - struct pipe_blend_color blend_color; - struct pipe_stencil_ref stencil_ref; - struct pipe_clip_state clip; - struct pipe_resource *constants[2]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct cell_resource *texture[PIPE_MAX_SAMPLERS]; - struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS]; - uint num_textures; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - uint num_vertex_buffers; - struct pipe_index_buffer index_buffer; - - ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; - ubyte *zsbuf_map; - - uint dirty; - uint dirty_textures; /* bitmask of texture units */ - uint dirty_samplers; /* bitmask of sampler units */ - - /** Cache of code generated for per-fragment ops */ - struct keymap *fragment_ops_cache; - - /** The primitive drawing context */ - struct draw_context *draw; - struct draw_stage *render_stage; - - /** For post-transformed vertex buffering: */ - struct cell_vbuf_render *vbuf_render; - struct draw_stage *vbuf; - - struct vertex_info vertex_info; - - /** Mapped constant buffers */ - const void *mapped_constants[PIPE_SHADER_TYPES]; - - PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions; - - uint num_cells, num_spus; - - /** Buffers for command batches, vertex/index data */ - uint buffer_size[CELL_NUM_BUFFERS]; - PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE]; - - int cur_batch; /**< which buffer is being filled w/ commands */ - - /** [4] to ensure 16-byte alignment for each status word */ - PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4]; - - - /** Associated with each command/batch buffer is a list of pipe_buffers - * that are fenced. When the last command in a buffer is executed, the - * fence will be signalled, indicating that any pipe_buffers preceeding - * that fence can be unreferenced (and probably freed). - */ - struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; - - - struct spe_function attrib_fetch; - unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; - - unsigned debug_flags; -}; - - - - -static INLINE struct cell_context * -cell_context(struct pipe_context *pipe) -{ - return (struct cell_context *) pipe; -} - - -struct pipe_context * -cell_create_context(struct pipe_screen *screen, - void *priv ); - -extern void -cell_vertex_shader_queue_flush(struct draw_context *draw); - - -/* XXX find a better home for this */ -extern void cell_update_vertex_fetch(struct draw_context *draw); - - -#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c deleted file mode 100644 index a367fa3fe15..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Brian Paul - * Keith Whitwell - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "util/u_inlines.h" - -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_state.h" -#include "cell_flush.h" -#include "cell_texture.h" - -#include "draw/draw_context.h" - - - - - - -/** - * Draw vertex arrays, with optional indexing. - * Basically, map the vertex buffers (and drawing surfaces), then hand off - * the drawing to the 'draw' module. - * - * XXX should the element buffer be specified/bound with a separate function? - */ -static void -cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) -{ - struct cell_context *cell = cell_context(pipe); - struct draw_context *draw = cell->draw; - void *mapped_indices = NULL; - unsigned i; - - if (cell->dirty) - cell_update_derived( cell ); - -#if 0 - cell_map_surfaces(cell); -#endif - - /* - * Map vertex buffers - */ - for (i = 0; i < cell->num_vertex_buffers; i++) { - void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data; - draw_set_mapped_vertex_buffer(draw, i, buf); - } - /* Map index buffer, if present */ - if (info->indexed && cell->index_buffer.buffer) - mapped_indices = cell_resource(cell->index_buffer.buffer)->data; - - draw_set_mapped_index_buffer(draw, mapped_indices); - - /* draw! */ - draw_vbo(draw, info); - - /* - * unmap vertex/index buffers - will cause draw module to flush - */ - for (i = 0; i < cell->num_vertex_buffers; i++) { - draw_set_mapped_vertex_buffer(draw, i, NULL); - } - if (mapped_indices) { - draw_set_mapped_index_buffer(draw, NULL); - } - - /* - * TODO: Flush only when a user vertex/index buffer is present - * (or even better, modify draw module to do this - * internally when this condition is seen?) - */ - draw_flush(draw); -} - - -void -cell_init_draw_functions(struct cell_context *cell) -{ - cell->pipe.draw_vbo = cell_draw_vbo; -} - diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h deleted file mode 100644 index 148873aa675..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_DRAW_ARRAYS_H -#define CELL_DRAW_ARRAYS_H - - -extern void -cell_init_draw_functions(struct cell_context *cell); - - -#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c deleted file mode 100644 index 181fef44f45..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.c +++ /dev/null @@ -1,172 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_fence.h" -#include "cell_texture.h" - - -void -cell_fence_init(struct cell_fence *fence) -{ - uint i; - ASSERT_ALIGN16(fence->status); - for (i = 0; i < CELL_MAX_SPUS; i++) { - fence->status[i][0] = CELL_FENCE_IDLE; - } -} - - -boolean -cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence) -{ - uint i; - for (i = 0; i < cell->num_spus; i++) { - if (fence->status[i][0] != CELL_FENCE_SIGNALLED) - return FALSE; - /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ - } - return TRUE; -} - - -boolean -cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence, - uint64_t timeout) -{ - while (!cell_fence_signalled(cell, fence)) { - usleep(10); - } - -#ifdef DEBUG - { - uint i; - for (i = 0; i < cell->num_spus; i++) { - assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); - } - } -#endif - return TRUE; -} - - - - -struct cell_buffer_node -{ - struct pipe_resource *buffer; - struct cell_buffer_node *next; -}; - - -#if 0 -static void -cell_add_buffer_to_list(struct cell_context *cell, - struct cell_buffer_list *list, - struct pipe_resource *buffer) -{ - struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); - /* create new list node which references the buffer, insert at head */ - if (node) { - pipe_resource_reference(&node->buffer, buffer); - node->next = list->head; - list->head = node; - } -} -#endif - - -/** - * Wait for completion of the given fence, then unreference any buffers - * on the list. - * This typically unrefs/frees texture buffers after any rendering which uses - * them has completed. - */ -void -cell_free_fenced_buffers(struct cell_context *cell, - struct cell_buffer_list *list) -{ - if (list->head) { - /*struct pipe_screen *ps = cell->pipe.screen;*/ - struct cell_buffer_node *node; - - cell_fence_finish(cell, &list->fence); - - /* traverse the list, unreferencing buffers, freeing nodes */ - node = list->head; - while (node) { - struct cell_buffer_node *next = node->next; - assert(node->buffer); - /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/ -#if 0 - printf("Unref buffer %p\n", node->buffer); - if (node->buffer->reference.count == 1) - printf(" Delete!\n"); -#endif - pipe_resource_reference(&node->buffer, NULL); - FREE(node); - node = next; - } - list->head = NULL; - } -} - - -/** - * This should be called for each render command. - * Any texture buffers that are current bound will be added to a fenced - * list to be freed later when the fence is executed/signalled. - */ -void -cell_add_fenced_textures(struct cell_context *cell) -{ - /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/ - uint i; - - for (i = 0; i < cell->num_textures; i++) { - struct cell_resource *ct = cell->texture[i]; - if (ct) { -#if 0 - printf("Adding texture %p buffer %p to list\n", - ct, ct->tiled_buffer[level]); -#endif -#if 00 - /* XXX this needs to be fixed/restored! - * Maybe keep pointers to textures, not buffers. - */ - if (ct->base.buffer) - cell_add_buffer_to_list(cell, list, ct->buffer); -#endif - } - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h deleted file mode 100644 index 3568230b1c0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_fence.h +++ /dev/null @@ -1,60 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_FENCE_H -#define CELL_FENCE_H - - -extern void -cell_fence_init(struct cell_fence *fence); - - -extern boolean -cell_fence_signalled(const struct cell_context *cell, - const struct cell_fence *fence, - unsigned flags); - - -extern boolean -cell_fence_finish(const struct cell_context *cell, - const struct cell_fence *fence, - unsigned flags, - uint64_t timeout); - - - -extern void -cell_free_fenced_buffers(struct cell_context *cell, - struct cell_buffer_list *list); - - -extern void -cell_add_fenced_textures(struct cell_context *cell); - - -#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c deleted file mode 100644 index 463f4d03eb9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_render.h" -#include "draw/draw_context.h" - - -/** - * Called via pipe->flush() - */ -void -cell_flush(struct pipe_context *pipe, - struct pipe_fence_handle **fence) -{ - struct cell_context *cell = cell_context(pipe); - - if (fence) { - *fence = NULL; - } - - flags |= CELL_FLUSH_WAIT; - - draw_flush( cell->draw ); - cell_flush_int(cell, flags); -} - - -/** - * Cell internal flush function. Send the current batch buffer to all SPUs. - * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. - * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero - */ -void -cell_flush_int(struct cell_context *cell, unsigned flags) -{ - static boolean flushing = FALSE; /* recursion catcher */ - uint i; - - ASSERT(!flushing); - flushing = TRUE; - - if (flags & CELL_FLUSH_WAIT) { - STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); - opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); - *cmd[0] = CELL_CMD_FINISH; - } - - cell_batch_flush(cell); - -#if 0 - /* Send CMD_FINISH to all SPUs */ - for (i = 0; i < cell->num_spus; i++) { - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); - } -#endif - - if (flags & CELL_FLUSH_WAIT) { - /* Wait for ack */ - for (i = 0; i < cell->num_spus; i++) { - uint k = wait_mbox_message(cell_global.spe_contexts[i]); - assert(k == CELL_CMD_FINISH); - } - } - - flushing = FALSE; -} - - -void -cell_flush_buffer_range(struct cell_context *cell, void *ptr, - unsigned size) -{ - STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); - uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, - sizeof(opcode_t) + sizeof(struct cell_buffer_range)); - struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; - batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; - br->base = (uintptr_t) ptr; - br->size = size; -} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h deleted file mode 100644 index 509ae6239ac..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ /dev/null @@ -1,45 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_FLUSH -#define CELL_FLUSH - -#define CELL_FLUSH_WAIT 0x80000000 - -extern void -cell_flush(struct pipe_context *pipe, unsigned flags, - struct pipe_fence_handle **fence); - -extern void -cell_flush_int(struct cell_context *cell, unsigned flags); - -extern void -cell_flush_buffer_range(struct cell_context *cell, void *ptr, - unsigned size); - -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c deleted file mode 100644 index 1d8a11a4ac9..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ /dev/null @@ -1,2036 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -/** - * Generate SPU fragment program/shader code. - * - * Note that we generate SOA-style code here. So each TGSI instruction - * operates on four pixels (and is translated into four SPU instructions, - * generally speaking). - * - * \author Brian Paul - */ - -#include -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_exec.h" -#include "tgsi/tgsi_dump.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "util/u_memory.h" -#include "cell_context.h" -#include "cell_gen_fp.h" - - -#define MAX_TEMPS 16 -#define MAX_IMMED 8 - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - -/** - * Context needed during code generation. - */ -struct codegen -{ - struct cell_context *cell; - int inputs_reg; /**< 1st function parameter */ - int outputs_reg; /**< 2nd function parameter */ - int constants_reg; /**< 3rd function parameter */ - int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ - int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ - - int num_imm; /**< number of immediates */ - - int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ - - int addr_reg; /**< address register, integer values */ - - /** Per-instruction temps / intermediate temps */ - int num_itemps; - int itemps[12]; - - /** Current IF/ELSE/ENDIF nesting level */ - int if_nesting; - /** Current BGNLOOP/ENDLOOP nesting level */ - int loop_nesting; - /** Location of start of current loop */ - int loop_start; - - /** Index of if/conditional mask register */ - int cond_mask_reg; - /** Index of loop mask register */ - int loop_mask_reg; - - /** Index of master execution mask register */ - int exec_mask_reg; - - /** KIL mask: indicates which fragments have been killed */ - int kill_mask_reg; - - int frame_size; /**< Stack frame size, in words */ - - struct spe_function *f; - boolean error; -}; - - -/** - * Allocate an intermediate temporary register. - */ -static int -get_itemp(struct codegen *gen) -{ - int t = spe_allocate_available_register(gen->f); - assert(gen->num_itemps < Elements(gen->itemps)); - gen->itemps[gen->num_itemps++] = t; - return t; -} - -/** - * Free all intermediate temporary registers. To be called after each - * instruction has been emitted. - */ -static void -free_itemps(struct codegen *gen) -{ - int i; - for (i = 0; i < gen->num_itemps; i++) { - spe_release_register(gen->f, gen->itemps[i]); - } - gen->num_itemps = 0; -} - - -/** - * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. - * The register is allocated and initialized upon the first call. - */ -static int -get_const_one_reg(struct codegen *gen) -{ - if (gen->one_reg <= 0) { - gen->one_reg = spe_allocate_available_register(gen->f); - - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "init constant reg = 1.0:"); - - /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(gen->f, gen->one_reg, 1.0f); - - spe_indent(gen->f, -4); - } - - return gen->one_reg; -} - - -/** - * Return index of the address register. - * Used for indirect register loads/stores. - */ -static int -get_address_reg(struct codegen *gen) -{ - if (gen->addr_reg <= 0) { - gen->addr_reg = spe_allocate_available_register(gen->f); - - spe_indent(gen->f, 4); - spe_comment(gen->f, -4, "init address reg = 0:"); - - /* init addr = {0, 0, 0, 0} */ - spe_zero(gen->f, gen->addr_reg); - - spe_indent(gen->f, -4); - } - - return gen->addr_reg; -} - - -/** - * Return index of the master execution mask. - * The register is allocated an initialized upon the first call. - * - * The master execution mask controls which pixels in a quad are - * modified, according to surrounding conditionals, loops, etc. - */ -static int -get_exec_mask_reg(struct codegen *gen) -{ - if (gen->exec_mask_reg <= 0) { - gen->exec_mask_reg = spe_allocate_available_register(gen->f); - - /* XXX this may not be needed */ - spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); - spe_load_int(gen->f, gen->exec_mask_reg, ~0); - } - - return gen->exec_mask_reg; -} - - -/** Return index of the conditional (if/else) execution mask register */ -static int -get_cond_mask_reg(struct codegen *gen) -{ - if (gen->cond_mask_reg <= 0) { - gen->cond_mask_reg = spe_allocate_available_register(gen->f); - } - - return gen->cond_mask_reg; -} - - -/** Return index of the loop execution mask register */ -static int -get_loop_mask_reg(struct codegen *gen) -{ - if (gen->loop_mask_reg <= 0) { - gen->loop_mask_reg = spe_allocate_available_register(gen->f); - } - - return gen->loop_mask_reg; -} - - - -static boolean -is_register_src(struct codegen *gen, int channel, - const struct tgsi_full_src_register *src) -{ - int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); - int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); - - if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { - return FALSE; - } - if (src->Register.File == TGSI_FILE_TEMPORARY || - src->Register.File == TGSI_FILE_IMMEDIATE) { - return TRUE; - } - return FALSE; -} - - -static boolean -is_memory_dst(struct codegen *gen, int channel, - const struct tgsi_full_dst_register *dst) -{ - if (dst->Register.File == TGSI_FILE_OUTPUT) { - return TRUE; - } - else { - return FALSE; - } -} - - -/** - * Return the index of the SPU temporary containing the named TGSI - * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we - * just return the corresponding SPE register. If the TGIS register - * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register - * and emit an SPE load instruction. - */ -static int -get_src_reg(struct codegen *gen, - int channel, - const struct tgsi_full_src_register *src) -{ - int reg = -1; - int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel); - boolean reg_is_itemp = FALSE; - uint sign_op; - - assert(swizzle >= TGSI_SWIZZLE_X); - assert(swizzle <= TGSI_SWIZZLE_W); - - { - int index = src->Register.Index; - - assert(swizzle < 4); - - if (src->Register.Indirect) { - /* XXX unfinished */ - } - - switch (src->Register.File) { - case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[index][swizzle]; - break; - case TGSI_FILE_INPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = index * 4 + swizzle; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); - } - break; - case TGSI_FILE_IMMEDIATE: - reg = gen->imm_regs[index][swizzle]; - break; - case TGSI_FILE_CONSTANT: - { - /* offset is measured in quadwords, not bytes */ - int offset = index * 4 + swizzle; - reg = get_itemp(gen); - reg_is_itemp = TRUE; - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); - } - break; - default: - assert(0); - } - } - - /* - * Handle absolute value, negate or set-negative of src register. - */ - sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); - if (sign_op != TGSI_UTIL_SIGN_KEEP) { - /* - * All sign ops are done by manipulating bit 31, the IEEE float sign bit. - */ - const int bit31mask_reg = get_itemp(gen); - int result_reg; - - if (reg_is_itemp) { - /* re-use 'reg' for the result */ - result_reg = reg; - } - else { - /* alloc a new reg for the result */ - result_reg = get_itemp(gen); - } - - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - - if (sign_op == TGSI_UTIL_SIGN_CLEAR) { - spe_andc(gen->f, result_reg, reg, bit31mask_reg); - } - else if (sign_op == TGSI_UTIL_SIGN_SET) { - spe_and(gen->f, result_reg, reg, bit31mask_reg); - } - else { - assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); - spe_xor(gen->f, result_reg, reg, bit31mask_reg); - } - - reg = result_reg; - } - - return reg; -} - - -/** - * Return the index of an SPE register to use for the given TGSI register. - * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the - * corresponding SPE register is returned. If the TGSI register is - * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. - * See store_dest_reg() below... - */ -static int -get_dst_reg(struct codegen *gen, - int channel, - const struct tgsi_full_dst_register *dest) -{ - int reg = -1; - - switch (dest->Register.File) { - case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0 || gen->loop_nesting > 0) - reg = get_itemp(gen); - else - reg = gen->temp_regs[dest->Register.Index][channel]; - break; - case TGSI_FILE_OUTPUT: - reg = get_itemp(gen); - break; - default: - assert(0); - } - - return reg; -} - - -/** - * When a TGSI instruction is writing to an output register, this - * function emits the SPE store instruction to store the value_reg. - * \param value_reg the SPE register containing the value to store. - * This would have been returned by get_dst_reg(). - */ -static void -store_dest_reg(struct codegen *gen, - int value_reg, int channel, - const struct tgsi_full_dst_register *dest) -{ - /* - * XXX need to implement dst reg clamping/saturation - */ -#if 0 - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - break; - case TGSI_SAT_MINUS_PLUS_ONE: - break; - default: - assert( 0 ); - } -#endif - - switch (dest->Register.File) { - case TGSI_FILE_TEMPORARY: - if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int d_reg = gen->temp_regs[dest->Register.Index][channel]; - int exec_reg = get_exec_mask_reg(gen); - /* Mix d with new value according to exec mask: - * d[i] = mask_reg[i] ? value_reg : d_reg - */ - spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); - } - else { - /* we're not inside a condition or loop: do nothing special */ - - } - break; - case TGSI_FILE_OUTPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = dest->Register.Index * 4 + channel; - if (gen->if_nesting > 0 || gen->loop_nesting > 0) { - int exec_reg = get_exec_mask_reg(gen); - int curval_reg = get_itemp(gen); - /* First read the current value from memory: - * Load: curval = memory[(machine_reg) + offset] - */ - spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); - /* Mix curval with newvalue according to exec mask: - * d[i] = mask_reg[i] ? value_reg : d_reg - */ - spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); - /* Store: memory[(machine_reg) + offset] = curval */ - spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); - } - else { - /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); - } - } - break; - default: - assert(0); - } -} - - - -static void -emit_prologue(struct codegen *gen) -{ - gen->frame_size = 1024; /* XXX temporary, should be dynamic */ - - spe_comment(gen->f, 0, "Function prologue:"); - - /* save $lr on stack # stqd $lr,16($sp) */ - spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - if (gen->frame_size >= 512) { - /* offset is too large for ai instruction */ - int offset_reg = spe_allocate_available_register(gen->f); - int sp_reg = spe_allocate_available_register(gen->f); - /* offset = -framesize */ - spe_load_int(gen->f, offset_reg, -gen->frame_size); - /* sp = $sp */ - spe_move(gen->f, sp_reg, SPE_REG_SP); - /* $sp = $sp + offset_reg */ - spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); - /* save $sp in stack frame */ - spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); - /* clean up */ - spe_release_register(gen->f, offset_reg); - spe_release_register(gen->f, sp_reg); - } - else { - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); - } -} - - -static void -emit_epilogue(struct codegen *gen) -{ - const int return_reg = 3; - - spe_comment(gen->f, 0, "Function epilogue:"); - - spe_comment(gen->f, 0, "return the killed mask"); - if (gen->kill_mask_reg > 0) { - /* shader called KIL, return the "alive" mask */ - spe_move(gen->f, return_reg, gen->kill_mask_reg); - } - else { - /* return {0,0,0,0} */ - spe_load_uint(gen->f, return_reg, 0); - } - - spe_comment(gen->f, 0, "restore stack and return"); - if (gen->frame_size >= 512) { - /* offset is too large for ai instruction */ - int offset_reg = spe_allocate_available_register(gen->f); - /* offset = framesize */ - spe_load_int(gen->f, offset_reg, gen->frame_size); - /* $sp = $sp + offset */ - spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); - /* clean up */ - spe_release_register(gen->f, offset_reg); - } - else { - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); - } - - /* restore $lr # lqd $lr,16($sp) */ - spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); -} - - -#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ - for (ch = 0; ch < 4; ch++) \ - if (inst->Dst[0].Register.WriteMask & (1 << ch)) - - -static boolean -emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch = 0, src_reg, addr_reg; - - src_reg = get_src_reg(gen, ch, &inst->Src[0]); - addr_reg = get_address_reg(gen); - - /* convert float to int */ - spe_cflts(gen->f, addr_reg, src_reg, 0); - - free_itemps(gen); - - return TRUE; -} - - -static boolean -emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, src_reg[4], dst_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (is_register_src(gen, ch, &inst->Src[0]) && - is_memory_dst(gen, ch, &inst->Dst[0])) { - /* special-case: register to memory store */ - store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]); - } - else { - spe_move(gen->f, dst_reg[ch], src_reg[ch]); - store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]); - } - } - - free_itemps(gen); - - return TRUE; -} - -/** - * Emit binary operation - */ -static boolean -emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4]; - - /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* Emit actual SPE instruction: d = s1 + s2 */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SUB: - spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_MUL: - spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - default: - ; - } - } - - /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - /* Free any intermediate temps we allocated */ - free_itemps(gen); - - return TRUE; -} - - -/** - * Emit multiply add. See emit_ADD for comments. - */ -static boolean -emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - free_itemps(gen); - return TRUE; -} - - -/** - * Emit linear interpolate. See emit_ADD for comments. - */ -static boolean -emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; - - /* setup/get src/dst/temp regs */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* d = s3 + s1(s2 - s3) */ - /* do all subtracts, then all fma, then all stores to better pipeline */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - free_itemps(gen); - return TRUE; -} - - - -/** - * Emit reciprocal or recip sqrt. - */ -static boolean -emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { - /* tmp = 1/s1 */ - spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); - } - else { - /* tmp = 1/sqrt(s1) */ - spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); - } - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* d = float_interp(s1, tmp) */ - spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit absolute value. See emit_ADD for comments. - */ -static boolean -emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4]; - const int bit31mask_reg = get_itemp(gen); - - /* mask with bit 31 set, the rest cleared */ - spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* d = sign bit cleared in s1 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 3 component dot product. See emit_ADD for comments. - */ -static boolean -emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s1x_reg, s1y_reg, s1z_reg; - int s2x_reg, s2y_reg, s2z_reg; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - - /* t0 = x0 * x1 */ - spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); - - /* t1 = y0 * y1 */ - spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); - - /* t0 = z0 * z1 + t0 */ - spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 4 component dot product. See emit_ADD for comments. - */ -static boolean -emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s0x_reg, s0y_reg, s0z_reg, s0w_reg; - int s1x_reg, s1y_reg, s1z_reg, s1w_reg; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]); - s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); - - /* t0 = x0 * x1 */ - spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); - - /* t1 = y0 * y1 */ - spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); - - /* t0 = z0 * z1 + t0 */ - spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); - - /* t1 = w0 * w1 + t1 */ - spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, t0_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit homogeneous dot product. See emit_ADD for comments. - */ -static boolean -emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - /* XXX rewrite this function to look more like DP3/DP4 */ - int ch; - int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - int tmp_reg = get_itemp(gen); - - /* t = x0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - /* t = y0 * y1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = z0 * z1 + t */ - spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]); - /* t = w1 + t */ - spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - spe_move(gen->f, d_reg, tmp_reg); - store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - -/** - * Emit 3-component vector normalize. - */ -static boolean -emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int src_reg[3]; - int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); - - src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]); - src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - - /* t0 = x * x */ - spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); - - /* t1 = y * y */ - spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); - - /* t0 = z * z + t0 */ - spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); - - /* t0 = t0 + t1 */ - spe_fa(gen->f, t0_reg, t0_reg, t1_reg); - - /* t1 = 1.0 / sqrt(t0) */ - spe_frsqest(gen->f, t1_reg, t0_reg); - spe_fi(gen->f, t1_reg, t0_reg, t1_reg); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - /* dst = src[ch] * t1 */ - spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit cross product. See emit_ADD for comments. - */ -static boolean -emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - int tmp_reg = get_itemp(gen); - - /* t = z0 * y1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = y0 * z1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) { - store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]); - } - - s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]); - /* t = x0 * z1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - /* t = z0 * x1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) { - store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]); - } - - s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]); - /* t = y0 * x1 */ - spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); - - s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]); - s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]); - /* t = x0 * y1 - t */ - spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); - - if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) { - store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit inequality instruction. - * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as - * the result but OpenGL/TGSI needs 0.0 and 1.0 results. - * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. - */ -static boolean -emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; - boolean complement = FALSE; - - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SGT: - spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SLT: - spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); - break; - case TGSI_OPCODE_SGE: - spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); - complement = TRUE; - break; - case TGSI_OPCODE_SLE: - spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - complement = TRUE; - break; - case TGSI_OPCODE_SEQ: - spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - break; - case TGSI_OPCODE_SNE: - spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); - complement = TRUE; - break; - default: - assert(0); - } - } - - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - /* d = d & one_reg */ - if (complement) - spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); - else - spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit compare. - */ -static boolean -emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s1_reg = get_src_reg(gen, ch, &inst->Src[0]); - int s2_reg = get_src_reg(gen, ch, &inst->Src[1]); - int s3_reg = get_src_reg(gen, ch, &inst->Src[2]); - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - int zero_reg = get_itemp(gen); - - spe_zero(gen->f, zero_reg); - - /* d = (s1 < 0) ? s2 : s3 */ - spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); - spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); - - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - free_itemps(gen); - } - - return TRUE; -} - -/** - * Emit trunc. - * Convert float to signed int - * Convert signed int to float - */ -static boolean -emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit floor. - * If negative int subtract one - * Convert float to signed int - * Convert signed int to float - */ -static boolean -emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* If negative, subtract 1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Compute frac = Input - FLR(Input) - */ -static boolean -emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; - - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - one_reg = get_const_one_reg(gen); - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* If negative, subtract 1.0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* Convert float to int */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* Convert int to float */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); - } - - /* d = s1 - FLR(s1) */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); - } - - /* store result */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -#if 0 -static void -print_functions(struct cell_context *cell) -{ - struct cell_spu_function_info *funcs = &cell->spu_functions; - uint i; - for (i = 0; i < funcs->num; i++) { - printf("SPU func %u: %s at %u\n", - i, funcs->names[i], funcs->addrs[i]); - } -} -#endif - - -static uint -lookup_function(struct cell_context *cell, const char *funcname) -{ - const struct cell_spu_function_info *funcs = &cell->spu_functions; - uint i, addr = 0; - for (i = 0; i < funcs->num; i++) { - if (strcmp(funcs->names[i], funcname) == 0) { - addr = funcs->addrs[i]; - } - } - assert(addr && "spu function not found"); - return addr / 4; /* discard 2 least significant bits */ -} - - -/** - * Emit code to call a SPU function. - * Used to implement instructions like SIN/COS/POW/TEX/etc. - * If scalar, only the X components of the src regs are used, and the - * result is replicated across the dest register's XYZW components. - */ -static boolean -emit_function_call(struct codegen *gen, - const struct tgsi_full_instruction *inst, - char *funcname, uint num_args, boolean scalar) -{ - const uint addr = lookup_function(gen->cell, funcname); - char comment[100]; - int s_regs[3]; - int func_called = FALSE; - uint a, ch; - int retval_reg = -1; - - assert(num_args <= 3); - - snprintf(comment, sizeof(comment), "CALL %s:", funcname); - spe_comment(gen->f, -4, comment); - - if (scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]); - } - /* we'll call the function, put the return value in this register, - * then replicate it across all write-enabled components in d_reg. - */ - retval_reg = spe_allocate_available_register(gen->f); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int d_reg; - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - if (!scalar) { - for (a = 0; a < num_args; a++) { - s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]); - } - } - - d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - - if (!scalar || !func_called) { - /* for a scalar function, we'll really only call the function once */ - - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); - - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - - /* setup function arguments */ - for (a = 0; a < num_args; a++) { - spe_move(gen->f, 3 + a, s_regs[a]); - } - - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return value */ - if (scalar) - spe_move(gen->f, retval_reg, 3); - else - spe_move(gen->f, d_reg, 3); - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_reg && reg != retval_reg) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } - - func_called = TRUE; - } - - if (scalar) { - spe_move(gen->f, d_reg, retval_reg); - } - - store_dest_reg(gen, d_reg, ch, &inst->Dst[0]); - free_itemps(gen); - } - - if (scalar) { - spe_release_register(gen->f, retval_reg); - } - - return TRUE; -} - - -static boolean -emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const uint target = inst->Texture.Texture; - const uint unit = inst->Src[1].Register.Index; - uint addr; - int ch; - int coord_regs[4], d_regs[4]; - - switch (target) { - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_2D: - addr = lookup_function(gen->cell, "spu_tex_2d"); - break; - case TGSI_TEXTURE_3D: - addr = lookup_function(gen->cell, "spu_tex_3d"); - break; - case TGSI_TEXTURE_CUBE: - addr = lookup_function(gen->cell, "spu_tex_cube"); - break; - default: - ASSERT(0 && "unsupported texture target"); - return FALSE; - } - - assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER); - - spe_comment(gen->f, -4, "CALL tex:"); - - /* get src/dst reg info */ - for (ch = 0; ch < 4; ch++) { - coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); - d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - } - - { - ubyte usedRegs[SPE_NUM_REGS]; - uint i, numUsed; - - numUsed = spe_get_registers_used(gen->f, usedRegs); - assert(numUsed < gen->frame_size / 16 - 2); - - /* save registers to stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - int offset = 2 + i; - spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - - /* setup function arguments (XXX depends on target) */ - for (i = 0; i < 4; i++) { - spe_move(gen->f, 3 + i, coord_regs[i]); - } - spe_load_uint(gen->f, 7, unit); /* sampler unit */ - - /* branch to function, save return addr */ - spe_brasl(gen->f, SPE_REG_RA, addr); - - /* save function's return values (four pixel's colors) */ - for (i = 0; i < 4; i++) { - spe_move(gen->f, d_regs[i], 3 + i); - } - - /* restore registers from stack */ - for (i = 0; i < numUsed; i++) { - uint reg = usedRegs[i]; - if (reg != d_regs[0] && - reg != d_regs[1] && - reg != d_regs[2] && - reg != d_regs[3]) { - int offset = 2 + i; - spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); - } - } - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]); - free_itemps(gen); - } - - return TRUE; -} - - -/** - * KILL if any of src reg values are less than zero. - */ -static boolean -emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch; - int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; - - spe_comment(gen->f, -4, "CALL kil:"); - - /* zero = {0,0,0,0} */ - zero_reg = get_itemp(gen); - spe_zero(gen->f, zero_reg); - - cmp_reg = get_itemp(gen); - - /* get src regs */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]); - } - - /* test if any src regs are < 0 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (kil_reg >= 0) { - /* cmp = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); - /* kil = kil | cmp */ - spe_or(gen->f, kil_reg, kil_reg, cmp_reg); - } - else { - kil_reg = get_itemp(gen); - /* kil = 0 > src ? : ~0 : 0 */ - spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); - } - } - - if (gen->if_nesting || gen->loop_nesting) { - /* may have been a conditional kil */ - spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); - } - - /* allocate the kill mask reg if needed */ - if (gen->kill_mask_reg <= 0) { - gen->kill_mask_reg = spe_allocate_available_register(gen->f); - spe_move(gen->f, gen->kill_mask_reg, kil_reg); - } - else { - spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); - } - - free_itemps(gen); - - return TRUE; -} - - - -/** - * Emit min or max. - */ -static boolean -emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]); - s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]); - d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]); - tmp_reg[ch] = get_itemp(gen); - } - - /* d = (s0 > s1) ? s0 : s1 */ - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) - spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); - else - spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); - } - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); - } - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]); - } - - free_itemps(gen); - return TRUE; -} - - -/** - * Emit code to update the execution mask. - * This needs to be done whenever the execution status of a conditional - * or loop is changed. - */ -static void -emit_update_exec_mask(struct codegen *gen) -{ - const int exec_reg = get_exec_mask_reg(gen); - const int cond_reg = gen->cond_mask_reg; - const int loop_reg = gen->loop_mask_reg; - - spe_comment(gen->f, 0, "Update master execution mask"); - - if (gen->if_nesting > 0 && gen->loop_nesting > 0) { - /* exec_mask = cond_mask & loop_mask */ - assert(cond_reg > 0); - assert(loop_reg > 0); - spe_and(gen->f, exec_reg, cond_reg, loop_reg); - } - else if (gen->if_nesting > 0) { - assert(cond_reg > 0); - spe_move(gen->f, exec_reg, cond_reg); - } - else if (gen->loop_nesting > 0) { - assert(loop_reg > 0); - spe_move(gen->f, exec_reg, loop_reg); - } - else { - spe_load_int(gen->f, exec_reg, ~0x0); - } -} - - -static boolean -emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int channel = 0; - int cond_reg; - - cond_reg = get_cond_mask_reg(gen); - - /* XXX push cond exec mask */ - - spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); - spe_load_int(gen->f, cond_reg, ~0); - - /* update conditional execution mask with the predicate register */ - int tmp_reg = get_itemp(gen); - int s1_reg = get_src_reg(gen, channel, &inst->Src[0]); - - /* tmp = (s1_reg == 0) */ - spe_ceqi(gen->f, tmp_reg, s1_reg, 0); - /* tmp = !tmp */ - spe_complement(gen->f, tmp_reg, tmp_reg); - /* cond_mask = cond_mask & tmp */ - spe_and(gen->f, cond_reg, cond_reg, tmp_reg); - - gen->if_nesting++; - - /* update the master execution mask */ - emit_update_exec_mask(gen); - - free_itemps(gen); - - return TRUE; -} - - -static boolean -emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int cond_reg = get_cond_mask_reg(gen); - - spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); - spe_complement(gen->f, cond_reg, cond_reg); - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - /* XXX todo: pop cond exec mask */ - - gen->if_nesting--; - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - int exec_reg, loop_reg; - - exec_reg = get_exec_mask_reg(gen); - loop_reg = get_loop_mask_reg(gen); - - /* XXX push loop_exec mask */ - - spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); - spe_load_int(gen->f, loop_reg, ~0x0); - - gen->loop_nesting++; - gen->loop_start = spe_code_size(gen->f); /* in bytes */ - - return TRUE; -} - - -static boolean -emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int loop_reg = get_loop_mask_reg(gen); - const int tmp_reg = get_itemp(gen); - int offset; - - /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ - spe_orx(gen->f, tmp_reg, loop_reg); - - offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ - - /* branch back to top of loop if tmp_reg != 0 */ - spe_brnz(gen->f, tmp_reg, offset / 4); - - /* XXX pop loop_exec mask */ - - gen->loop_nesting--; - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - const int exec_reg = get_exec_mask_reg(gen); - const int loop_reg = get_loop_mask_reg(gen); - - assert(gen->loop_nesting > 0); - - spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); - spe_andc(gen->f, loop_reg, loop_reg, exec_reg); - - emit_update_exec_mask(gen); - - return TRUE; -} - - -static boolean -emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) -{ - assert(gen->loop_nesting > 0); - - return TRUE; -} - - -static boolean -emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, - boolean ddx) -{ - int ch; - - FOR_EACH_ENABLED_CHANNEL(inst, ch) { - int s_reg = get_src_reg(gen, ch, &inst->Src[0]); - int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]); - - int t1_reg = get_itemp(gen); - int t2_reg = get_itemp(gen); - - spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ - if (ddx) { - spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ - } - else { - spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ - } - spe_fs(gen->f, d_reg, t2_reg, t1_reg); - - free_itemps(gen); - } - - return TRUE; -} - - - - -/** - * Emit END instruction. - * We just return from the shader function at this point. - * - * Note that there may be more code after this that would be - * called by TGSI_OPCODE_CALL. - */ -static boolean -emit_END(struct codegen *gen) -{ - emit_epilogue(gen); - return TRUE; -} - - -/** - * Emit code for the given instruction. Just a big switch stmt. - */ -static boolean -emit_instruction(struct codegen *gen, - const struct tgsi_full_instruction *inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - return emit_ARL(gen, inst); - case TGSI_OPCODE_MOV: - return emit_MOV(gen, inst); - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_MUL: - return emit_binop(gen, inst); - case TGSI_OPCODE_MAD: - return emit_MAD(gen, inst); - case TGSI_OPCODE_LRP: - return emit_LRP(gen, inst); - case TGSI_OPCODE_DP3: - return emit_DP3(gen, inst); - case TGSI_OPCODE_DP4: - return emit_DP4(gen, inst); - case TGSI_OPCODE_DPH: - return emit_DPH(gen, inst); - case TGSI_OPCODE_NRM: - return emit_NRM3(gen, inst); - case TGSI_OPCODE_XPD: - return emit_XPD(gen, inst); - case TGSI_OPCODE_RCP: - case TGSI_OPCODE_RSQ: - return emit_RCP_RSQ(gen, inst); - case TGSI_OPCODE_ABS: - return emit_ABS(gen, inst); - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGE: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - return emit_inequality(gen, inst); - case TGSI_OPCODE_CMP: - return emit_CMP(gen, inst); - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MAX: - return emit_MIN_MAX(gen, inst); - case TGSI_OPCODE_TRUNC: - return emit_TRUNC(gen, inst); - case TGSI_OPCODE_FLR: - return emit_FLR(gen, inst); - case TGSI_OPCODE_FRC: - return emit_FRC(gen, inst); - case TGSI_OPCODE_END: - return emit_END(gen); - - case TGSI_OPCODE_COS: - return emit_function_call(gen, inst, "spu_cos", 1, TRUE); - case TGSI_OPCODE_SIN: - return emit_function_call(gen, inst, "spu_sin", 1, TRUE); - case TGSI_OPCODE_POW: - return emit_function_call(gen, inst, "spu_pow", 2, TRUE); - case TGSI_OPCODE_EX2: - return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); - case TGSI_OPCODE_LG2: - return emit_function_call(gen, inst, "spu_log2", 1, TRUE); - case TGSI_OPCODE_TEX: - /* fall-through for now */ - case TGSI_OPCODE_TXD: - /* fall-through for now */ - case TGSI_OPCODE_TXB: - /* fall-through for now */ - case TGSI_OPCODE_TXL: - /* fall-through for now */ - case TGSI_OPCODE_TXP: - return emit_TEX(gen, inst); - case TGSI_OPCODE_KIL: - return emit_KIL(gen, inst); - - case TGSI_OPCODE_IF: - return emit_IF(gen, inst); - case TGSI_OPCODE_ELSE: - return emit_ELSE(gen, inst); - case TGSI_OPCODE_ENDIF: - return emit_ENDIF(gen, inst); - - case TGSI_OPCODE_BGNLOOP: - return emit_BGNLOOP(gen, inst); - case TGSI_OPCODE_ENDLOOP: - return emit_ENDLOOP(gen, inst); - case TGSI_OPCODE_BRK: - return emit_BRK(gen, inst); - case TGSI_OPCODE_CONT: - return emit_CONT(gen, inst); - - case TGSI_OPCODE_DDX: - return emit_DDX_DDY(gen, inst, TRUE); - case TGSI_OPCODE_DDY: - return emit_DDX_DDY(gen, inst, FALSE); - - /* XXX lots more cases to do... */ - - default: - fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", - inst->Instruction.Opcode); - return FALSE; - } - - return TRUE; -} - - - -/** - * Emit code for a TGSI immediate value (vector of four floats). - * This involves register allocation and initialization. - * XXX the initialization should be done by a "prepare" stage, not - * per quad execution! - */ -static boolean -emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) -{ - int ch; - - assert(gen->num_imm < MAX_TEMPS); - - for (ch = 0; ch < 4; ch++) { - float val = immed->u[ch].Float; - - if (ch > 0 && val == immed->u[ch - 1].Float) { - /* re-use previous register */ - gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; - } - else { - char str[100]; - int reg = spe_allocate_available_register(gen->f); - - if (reg < 0) - return FALSE; - - sprintf(str, "init $%d = %f", reg, val); - spe_comment(gen->f, 0, str); - - /* update immediate map */ - gen->imm_regs[gen->num_imm][ch] = reg; - - /* emit initializer instruction */ - spe_load_float(gen->f, reg, val); - } - } - - gen->num_imm++; - - return TRUE; -} - - - -/** - * Emit "code" for a TGSI declaration. - * We only care about TGSI TEMPORARY register declarations at this time. - * For each TGSI TEMPORARY we allocate four SPE registers. - */ -static boolean -emit_declaration(struct cell_context *cell, - struct codegen *gen, const struct tgsi_full_declaration *decl) -{ - int i, ch; - - switch (decl->Declaration.File) { - case TGSI_FILE_TEMPORARY: - for (i = decl->Range.First; - i <= decl->Range.Last; - i++) { - assert(i < MAX_TEMPS); - for (ch = 0; ch < 4; ch++) { - gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); - if (gen->temp_regs[i][ch] < 0) - return FALSE; /* out of regs */ - } - - /* XXX if we run out of SPE registers, we need to spill - * to SPU memory. someday... - */ - - { - char buf[100]; - sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, - gen->temp_regs[i][0], gen->temp_regs[i][1], - gen->temp_regs[i][2], gen->temp_regs[i][3]); - spe_comment(gen->f, 0, buf); - } - } - break; - default: - ; /* ignore */ - } - - return TRUE; -} - - - -/** - * Translate TGSI shader code to SPE instructions. This is done when - * the state tracker gives us a new shader (via pipe->create_fs_state()). - * - * \param cell the rendering context (in) - * \param tokens the TGSI shader (in) - * \param f the generated function (out) - */ -boolean -cell_gen_fragment_program(struct cell_context *cell, - const struct tgsi_token *tokens, - struct spe_function *f) -{ - struct tgsi_parse_context parse; - struct codegen gen; - uint ic = 0; - - memset(&gen, 0, sizeof(gen)); - gen.cell = cell; - gen.f = f; - - /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ - gen.inputs_reg = 3; /* pointer to inputs array */ - gen.outputs_reg = 4; /* pointer to outputs array */ - gen.constants_reg = 5; /* pointer to constants array */ - - spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); - spe_allocate_register(f, gen.inputs_reg); - spe_allocate_register(f, gen.outputs_reg); - spe_allocate_register(f, gen.constants_reg); - - if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, TRUE); - spe_indent(f, 2*8); - printf("Begin %s\n", __FUNCTION__); - tgsi_dump(tokens, 0); - } - - tgsi_parse_init(&parse, tokens); - - emit_prologue(&gen); - - while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_IMMEDIATE: - if (f->print) { - _debug_printf(" # "); - tgsi_dump_immediate(&parse.FullToken.FullImmediate); - } - if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) - gen.error = TRUE; - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - if (f->print) { - _debug_printf(" # "); - tgsi_dump_declaration(&parse.FullToken.FullDeclaration); - } - if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) - gen.error = TRUE; - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (f->print) { - _debug_printf(" # "); - ic++; - tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); - } - if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) - gen.error = TRUE; - break; - - default: - assert(0); - } - } - - if (gen.error) { - /* terminate the SPE code */ - return emit_END(&gen); - } - - if (cell->debug_flags & CELL_DEBUG_ASM) { - printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); - printf("End %s\n", __FUNCTION__); - } - - tgsi_parse_free( &parse ); - - return !gen.error; -} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h deleted file mode 100644 index 99faea70462..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#ifndef CELL_GEN_FP_H -#define CELL_GEN_FP_H - - - -extern boolean -cell_gen_fragment_program(struct cell_context *cell, - const struct tgsi_token *tokens, - struct spe_function *f); - - -#endif /* CELL_GEN_FP_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c deleted file mode 100644 index 76a85178007..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ /dev/null @@ -1,2189 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009 VMware, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Generate SPU per-fragment code (actually per-quad code). - * \author Brian Paul - * \author Bob Ellison - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "rtasm/rtasm_ppc_spe.h" -#include "cell_context.h" -#include "cell_gen_fragment.h" - - - -/** Do extra optimizations? */ -#define OPTIMIZATIONS 1 - - -/** - * Generate SPE code to perform Z/depth testing. - * - * \param dsa Gallium depth/stencil/alpha state to gen code for - * \param f SPE function to append instruction onto. - * \param mask_reg register containing quad/pixel "alive" mask (in/out) - * \param ifragZ_reg register containing integer fragment Z values (in) - * \param ifbZ_reg register containing integer frame buffer Z values (in/out) - * \param zmask_reg register containing result of Z test/comparison (out) - * - * Returns TRUE if the Z-buffer needs to be updated. - */ -static boolean -gen_depth_test(struct spe_function *f, - const struct pipe_depth_stencil_alpha_state *dsa, - int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) -{ - /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ - * quantities. This only makes a difference for 32-bit Z values though. - */ - ASSERT(dsa->depth.enabled); - - switch (dsa->depth.func) { - case PIPE_FUNC_EQUAL: - /* zmask = (ifragZ == ref) */ - spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_NOTEQUAL: - /* zmask = (ifragZ == ref) */ - spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_GREATER: - /* zmask = (ifragZ > ref) */ - spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_LESS: - /* zmask = (ref > ifragZ) */ - spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); - /* mask = (mask & zmask) */ - spe_and(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_LEQUAL: - /* zmask = (ifragZ > ref) */ - spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_GEQUAL: - /* zmask = (ref > ifragZ) */ - spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); - /* mask = (mask & ~zmask) */ - spe_andc(f, mask_reg, mask_reg, zmask_reg); - break; - - case PIPE_FUNC_NEVER: - spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ - spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ - break; - - case PIPE_FUNC_ALWAYS: - /* mask unchanged */ - spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ - break; - - default: - ASSERT(0); - break; - } - - if (dsa->depth.writemask) { - /* - * If (ztest passed) { - * framebufferZ = fragmentZ; - * } - * OR, - * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; - */ - spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); - return TRUE; - } - - return FALSE; -} - - -/** - * Generate SPE code to perform alpha testing. - * - * \param dsa Gallium depth/stencil/alpha state to gen code for - * \param f SPE function to append instruction onto. - * \param mask_reg register containing quad/pixel "alive" mask (in/out) - * \param fragA_reg register containing four fragment alpha values (in) - */ -static void -gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask_reg, int fragA_reg) -{ - int ref_reg = spe_allocate_available_register(f); - int amask_reg = spe_allocate_available_register(f); - - ASSERT(dsa->alpha.enabled); - - if ((dsa->alpha.func != PIPE_FUNC_NEVER) && - (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - /* load/splat the alpha reference float value */ - spe_load_float(f, ref_reg, dsa->alpha.ref_value); - } - - /* emit code to do the alpha comparison, updating 'mask' */ - switch (dsa->alpha.func) { - case PIPE_FUNC_EQUAL: - /* amask = (fragA == ref) */ - spe_fceq(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_NOTEQUAL: - /* amask = (fragA == ref) */ - spe_fceq(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_GREATER: - /* amask = (fragA > ref) */ - spe_fcgt(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_LESS: - /* amask = (ref > fragA) */ - spe_fcgt(f, amask_reg, ref_reg, fragA_reg); - /* mask = (mask & amask) */ - spe_and(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_LEQUAL: - /* amask = (fragA > ref) */ - spe_fcgt(f, amask_reg, fragA_reg, ref_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_GEQUAL: - /* amask = (ref > fragA) */ - spe_fcgt(f, amask_reg, ref_reg, fragA_reg); - /* mask = (mask & ~amask) */ - spe_andc(f, mask_reg, mask_reg, amask_reg); - break; - - case PIPE_FUNC_NEVER: - spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ - break; - - case PIPE_FUNC_ALWAYS: - /* no-op, mask unchanged */ - break; - - default: - ASSERT(0); - break; - } - -#if OPTIMIZATIONS - /* if mask == {0,0,0,0} we're all done, return */ - { - /* re-use amask reg here */ - int tmp_reg = amask_reg; - /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ - spe_orx(f, tmp_reg, mask_reg); - /* if tmp[0] == 0 then return from function call */ - spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); - } -#endif - - spe_release_register(f, ref_reg); - spe_release_register(f, amask_reg); -} - - -/** - * This pair of functions is used inline to allocate and deallocate - * optional constant registers. Once a constant is discovered to be - * needed, we will likely need it again, so we don't want to deallocate - * it and have to allocate and load it again unnecessarily. - */ -static INLINE void -setup_optional_register(struct spe_function *f, - int *r) -{ - if (*r < 0) - *r = spe_allocate_available_register(f); -} - -static INLINE void -release_optional_register(struct spe_function *f, - int r) -{ - if (r >= 0) - spe_release_register(f, r); -} - -static INLINE void -setup_const_register(struct spe_function *f, - int *r, - float value) -{ - if (*r >= 0) - return; - setup_optional_register(f, r); - spe_load_float(f, *r, value); -} - -static INLINE void -release_const_register(struct spe_function *f, - int r) -{ - release_optional_register(f, r); -} - - - -/** - * Unpack/convert framebuffer colors from four 32-bit packed colors - * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). - * Each 8-bit color component is expanded into a float in [0.0, 1.0]. - */ -static void -unpack_colors(struct spe_function *f, - enum pipe_format color_format, - int fbRGBA_reg, - int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) -{ - int mask0_reg = spe_allocate_available_register(f); - int mask1_reg = spe_allocate_available_register(f); - int mask2_reg = spe_allocate_available_register(f); - int mask3_reg = spe_allocate_available_register(f); - - spe_load_int(f, mask0_reg, 0xff); - spe_load_int(f, mask1_reg, 0xff00); - spe_load_int(f, mask2_reg, 0xff0000); - spe_load_int(f, mask3_reg, 0xff000000); - - spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); - - switch (color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); - - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); - - /* fbG = fbG >> 8 */ - spe_roti(f, fbG_reg, fbG_reg, -8); - - /* fbR = fbR >> 16 */ - spe_roti(f, fbR_reg, fbR_reg, -16); - - /* fbA = fbA >> 24 */ - spe_roti(f, fbA_reg, fbA_reg, -24); - break; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); - - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); - - /* fbR = fbR >> 8 */ - spe_roti(f, fbR_reg, fbR_reg, -8); - - /* fbG = fbG >> 16 */ - spe_roti(f, fbG_reg, fbG_reg, -16); - - /* fbB = fbB >> 24 */ - spe_roti(f, fbB_reg, fbB_reg, -24); - break; - - default: - ASSERT(0); - } - - /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ - spe_cuflt(f, fbR_reg, fbR_reg, 8); - spe_cuflt(f, fbG_reg, fbG_reg, 8); - spe_cuflt(f, fbB_reg, fbB_reg, 8); - spe_cuflt(f, fbA_reg, fbA_reg, 8); - - spe_release_register(f, mask0_reg); - spe_release_register(f, mask1_reg); - spe_release_register(f, mask2_reg); - spe_release_register(f, mask3_reg); -} - - -/** - * Generate SPE code to implement the given blend mode for a quad of pixels. - * \param f SPE function to append instruction onto. - * \param fragR_reg register with fragment red values (float) (in/out) - * \param fragG_reg register with fragment green values (float) (in/out) - * \param fragB_reg register with fragment blue values (float) (in/out) - * \param fragA_reg register with fragment alpha values (float) (in/out) - * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) - */ -static void -gen_blend(const struct pipe_blend_state *blend, - const struct pipe_blend_color *blend_color, - struct spe_function *f, - enum pipe_format color_format, - int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, - int fbRGBA_reg) -{ - int term1R_reg = spe_allocate_available_register(f); - int term1G_reg = spe_allocate_available_register(f); - int term1B_reg = spe_allocate_available_register(f); - int term1A_reg = spe_allocate_available_register(f); - - int term2R_reg = spe_allocate_available_register(f); - int term2G_reg = spe_allocate_available_register(f); - int term2B_reg = spe_allocate_available_register(f); - int term2A_reg = spe_allocate_available_register(f); - - int fbR_reg = spe_allocate_available_register(f); - int fbG_reg = spe_allocate_available_register(f); - int fbB_reg = spe_allocate_available_register(f); - int fbA_reg = spe_allocate_available_register(f); - - int tmp_reg = spe_allocate_available_register(f); - - /* Optional constant registers we might or might not end up using; - * if we do use them, make sure we only allocate them once by - * keeping a flag on each one. - */ - int one_reg = -1; - int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; - - ASSERT(blend->rt[0].blend_enable); - - /* packed RGBA -> float colors */ - unpack_colors(f, color_format, fbRGBA_reg, - fbR_reg, fbG_reg, fbB_reg, fbA_reg); - - /* - * Compute Src RGB terms. We're actually looking for the value - * of (the appropriate RGB factors) * (the incoming source RGB color), - * because in some cases (like PIPE_BLENDFACTOR_ONE and - * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. - */ - switch (blend->rt[0].rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factors = (1,1,1), so term = (R,G,B) */ - spe_move(f, term1R_reg, fragR_reg); - spe_move(f, term1G_reg, fragG_reg); - spe_move(f, term1B_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factors = (0,0,0), so term = (0,0,0) */ - spe_load_float(f, term1R_reg, 0.0f); - spe_load_float(f, term1G_reg, 0.0f); - spe_load_float(f, term1B_reg, 0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ - spe_fm(f, term1R_reg, fragR_reg, fragR_reg); - spe_fm(f, term1G_reg, fragG_reg, fragG_reg); - spe_fm(f, term1B_reg, fragB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ - spe_fm(f, term1R_reg, fragR_reg, fragA_reg); - spe_fm(f, term1G_reg, fragG_reg, fragA_reg); - spe_fm(f, term1B_reg, fragB_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) - * or in other words term = (R-R*R, G-G*G, B-B*B) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ - spe_fm(f, term1R_reg, fragR_reg, fbR_reg); - spe_fm(f, term1G_reg, fragG_reg, fbG_reg); - spe_fm(f, term1B_reg, fragB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) - * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) - * or term = (R-R*A,G-G*A,B-B*A) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ - spe_fm(f, term1R_reg, fragR_reg, fbA_reg); - spe_fm(f, term1G_reg, fragG_reg, fbA_reg); - spe_fm(f, term1B_reg, fragB_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) - * or term = (R-R*Afb,G-G*Afb,b-B*Afb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ - spe_fm(f, term1R_reg, fragR_reg, constR_reg); - spe_fm(f, term1G_reg, fragG_reg, constG_reg); - spe_fm(f, term1B_reg, fragB_reg, constB_reg); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - /* we'll need the optional constant alpha register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ - spe_fm(f, term1R_reg, fragR_reg, constA_reg); - spe_fm(f, term1G_reg, fragG_reg, constA_reg); - spe_fm(f, term1B_reg, fragB_reg, constA_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) - * or term = (R-R*Rc, G-G*Gc, B-B*Bc) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) - * or term = (R-R*Ac,G-G*Ac,B-B*Ac) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); - spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); - spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - /* We'll need the optional {1,1,1,1} register */ - setup_const_register(f, &one_reg, 1.0f); - /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so - * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) - * We could expand the term (as a*min(b,c) == min(a*b,a*c) - * as long as a is positive), but then we'd have to do three - * spe_float_min() functions instead of one, so this is simpler. - */ - /* tmp = 1 - Afb */ - spe_fs(f, tmp_reg, one_reg, fbA_reg); - /* tmp = min(A,tmp) */ - spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); - /* term = R*tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - - default: - ASSERT(0); - } - - /* - * Compute Src Alpha term. Like the above, we're looking for - * the full term A*factor, not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].alpha_src_factor) { - case PIPE_BLENDFACTOR_ZERO: - /* factor = 0, so term = 0 */ - spe_load_float(f, term1A_reg, 0.0f); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ - case PIPE_BLENDFACTOR_ONE: - /* factor = 1, so term = A */ - spe_move(f, term1A_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factor = A, so term = A*A */ - spe_fm(f, term1A_reg, fragA_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - spe_fm(f, term1A_reg, fragA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factor = 1-A, so term = A*(1-A) = A-A*A */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_DST_COLOR: - /* factor = Afb, so term = A*Afb */ - spe_fm(f, term1A_reg, fragA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = Ac, so term = A*Ac */ - spe_fm(f, term1A_reg, fragA_reg, constA_reg); - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - ASSERT(0); - } - - /* - * Compute Dest RGB term. Like the above, we're looking for - * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ - spe_move(f, term2R_reg, fbR_reg); - spe_move(f, term2G_reg, fbG_reg); - spe_move(f, term2B_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factor s= (0,0,0), so term = (0,0,0) */ - spe_load_float(f, term2R_reg, 0.0f); - spe_load_float(f, term2G_reg, 0.0f); - spe_load_float(f, term2B_reg, 0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ - spe_fm(f, term2R_reg, fbR_reg, fragR_reg); - spe_fm(f, term2G_reg, fbG_reg, fragG_reg); - spe_fm(f, term2B_reg, fbB_reg, fragB_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) - * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ - spe_fm(f, term2R_reg, fbR_reg, fragA_reg); - spe_fm(f, term2G_reg, fbG_reg, fragA_reg); - spe_fm(f, term2B_reg, fbB_reg, fragA_reg); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ - spe_fm(f, term2R_reg, fbR_reg, fbR_reg); - spe_fm(f, term2G_reg, fbG_reg, fbG_reg); - spe_fm(f, term2B_reg, fbB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) - * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ - spe_fm(f, term2R_reg, fbR_reg, fbA_reg); - spe_fm(f, term2G_reg, fbG_reg, fbA_reg); - spe_fm(f, term2B_reg, fbB_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) - * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ - spe_fm(f, term2R_reg, fbR_reg, constR_reg); - spe_fm(f, term2G_reg, fbG_reg, constG_reg); - spe_fm(f, term2B_reg, fbB_reg, constB_reg); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - /* we'll need the optional constant alpha register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ - spe_fm(f, term2R_reg, fbR_reg, constA_reg); - spe_fm(f, term2G_reg, fbG_reg, constA_reg); - spe_fm(f, term2B_reg, fbB_reg, constA_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) - * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional constant color registers */ - setup_const_register(f, &constR_reg, blend_color->color[0]); - setup_const_register(f, &constG_reg, blend_color->color[1]); - setup_const_register(f, &constB_reg, blend_color->color[2]); - /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) - * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) - * fnms(a,b,c,d) computes a = d - b*c - */ - spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); - spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); - spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ - ASSERT(0); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - - default: - ASSERT(0); - } - - /* - * Compute Dest Alpha term. Like the above, we're looking for - * the full term Afb*factor, not just the factor itself, because - * in many cases we can avoid doing unnecessary multiplies. - */ - switch (blend->rt[0].alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - /* factor = 1, so term = Afb */ - spe_move(f, term2A_reg, fbA_reg); - break; - case PIPE_BLENDFACTOR_ZERO: - /* factor = 0, so term = 0 */ - spe_load_float(f, term2A_reg, 0.0f); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_SRC_COLOR: - /* factor = A, so term = Afb*A */ - spe_fm(f, term2A_reg, fbA_reg, fragA_reg); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_DST_COLOR: - /* factor = Afb, so term = Afb*Afb */ - spe_fm(f, term2A_reg, fbA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = Ac, so term = Afb*Ac */ - spe_fm(f, term2A_reg, fbA_reg, constA_reg); - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need the optional constA_reg register */ - setup_const_register(f, &constA_reg, blend_color->color[3]); - /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ - /* fnms(a,b,c,d) computes a = d - b*c */ - spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ - ASSERT(0); - break; - - /* These are special D3D cases involving a second color output - * from the fragment shader. I'm not sure we can support them - * yet... XXX - */ - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - ASSERT(0); - } - - /* - * Combine Src/Dest RGB terms as per the blend equation. - */ - switch (blend->rt[0].rgb_func) { - case PIPE_BLEND_ADD: - spe_fa(f, fragR_reg, term1R_reg, term2R_reg); - spe_fa(f, fragG_reg, term1G_reg, term2G_reg); - spe_fa(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_SUBTRACT: - spe_fs(f, fragR_reg, term1R_reg, term2R_reg); - spe_fs(f, fragG_reg, term1G_reg, term2G_reg); - spe_fs(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - spe_fs(f, fragR_reg, term2R_reg, term1R_reg); - spe_fs(f, fragG_reg, term2G_reg, term1G_reg); - spe_fs(f, fragB_reg, term2B_reg, term1B_reg); - break; - case PIPE_BLEND_MIN: - spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); - spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); - spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); - break; - case PIPE_BLEND_MAX: - spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); - spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); - spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); - break; - default: - ASSERT(0); - } - - /* - * Combine Src/Dest A term - */ - switch (blend->rt[0].alpha_func) { - case PIPE_BLEND_ADD: - spe_fa(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_SUBTRACT: - spe_fs(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - spe_fs(f, fragA_reg, term2A_reg, term1A_reg); - break; - case PIPE_BLEND_MIN: - spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); - break; - case PIPE_BLEND_MAX: - spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); - break; - default: - ASSERT(0); - } - - spe_release_register(f, term1R_reg); - spe_release_register(f, term1G_reg); - spe_release_register(f, term1B_reg); - spe_release_register(f, term1A_reg); - - spe_release_register(f, term2R_reg); - spe_release_register(f, term2G_reg); - spe_release_register(f, term2B_reg); - spe_release_register(f, term2A_reg); - - spe_release_register(f, fbR_reg); - spe_release_register(f, fbG_reg); - spe_release_register(f, fbB_reg); - spe_release_register(f, fbA_reg); - - spe_release_register(f, tmp_reg); - - /* Free any optional registers that actually got used */ - release_const_register(f, one_reg); - release_const_register(f, constR_reg); - release_const_register(f, constG_reg); - release_const_register(f, constB_reg); - release_const_register(f, constA_reg); -} - - -static void -gen_logicop(const struct pipe_blend_state *blend, - struct spe_function *f, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. - * */ - ASSERT(blend->logicop_enable); - - switch(blend->logicop_func) { - case PIPE_LOGICOP_CLEAR: /* 0 */ - spe_zero(f, fragRGBA_reg); - break; - case PIPE_LOGICOP_NOR: /* ~(s | d) */ - spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ - /* andc R, A, B computes R = A & ~B */ - spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ - spe_complement(f, fragRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ - /* andc R, A, B computes R = A & ~B */ - spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_INVERT: /* ~d */ - /* Note that (A nor A) == ~(A|A) == ~A */ - spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_XOR: /* s ^ d */ - spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_NAND: /* ~(s & d) */ - spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_AND: /* s & d */ - spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ - spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - spe_complement(f, fragRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_NOOP: /* d */ - spe_move(f, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ - /* orc R, A, B computes R = A | ~B */ - spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); - break; - case PIPE_LOGICOP_COPY: /* s */ - break; - case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ - /* orc R, A, B computes R = A | ~B */ - spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_OR: /* s | d */ - spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - break; - case PIPE_LOGICOP_SET: /* 1 */ - spe_load_int(f, fragRGBA_reg, 0xffffffff); - break; - default: - ASSERT(0); - } -} - - -/** - * Generate code to pack a quad of float colors into four 32-bit integers. - * - * \param f SPE function to append instruction onto. - * \param color_format the dest color packing format - * \param r_reg register containing four red values (in/clobbered) - * \param g_reg register containing four green values (in/clobbered) - * \param b_reg register containing four blue values (in/clobbered) - * \param a_reg register containing four alpha values (in/clobbered) - * \param rgba_reg register to store the packed RGBA colors (out) - */ -static void -gen_pack_colors(struct spe_function *f, - enum pipe_format color_format, - int r_reg, int g_reg, int b_reg, int a_reg, - int rgba_reg) -{ - int rg_reg = spe_allocate_available_register(f); - int ba_reg = spe_allocate_available_register(f); - - /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ - spe_cfltu(f, r_reg, r_reg, 32); - spe_cfltu(f, g_reg, g_reg, 32); - spe_cfltu(f, b_reg, b_reg, 32); - spe_cfltu(f, a_reg, a_reg, 32); - - /* Shift the most significant bytes to the least significant positions. - * I.e.: reg = reg >> 24 - */ - spe_rotmi(f, r_reg, r_reg, -24); - spe_rotmi(f, g_reg, g_reg, -24); - spe_rotmi(f, b_reg, b_reg, -24); - spe_rotmi(f, a_reg, a_reg, -24); - - /* Shift the color bytes according to the surface format */ - if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { - spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ - spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ - spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ - } - else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { - spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ - spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ - spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ - } - else { - ASSERT(0); - } - - /* Merge red, green, blue, alpha registers to make packed RGBA colors. - * Eg: after shifting according to color_format we might have: - * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} - * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} - * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} - * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} - * OR-ing all those together gives us four packed colors: - * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} - */ - spe_or(f, rg_reg, r_reg, g_reg); - spe_or(f, ba_reg, a_reg, b_reg); - spe_or(f, rgba_reg, rg_reg, ba_reg); - - spe_release_register(f, rg_reg); - spe_release_register(f, ba_reg); -} - - -static void -gen_colormask(struct spe_function *f, - uint colormask, - enum pipe_format color_format, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* We've got four 32-bit RGBA packed pixels in each of - * fragRGBA_reg and fbRGBA_reg, not sets of floating-point - * reds, greens, blues, and alphas. Further, the pixels - * are packed according to the given color format, not - * necessarily RGBA... - */ - uint r_mask; - uint g_mask; - uint b_mask; - uint a_mask; - - /* Calculate exactly where the bits for any particular color - * end up, so we can mask them correctly. - */ - switch(color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* ARGB */ - a_mask = 0xff000000; - r_mask = 0x00ff0000; - g_mask = 0x0000ff00; - b_mask = 0x000000ff; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* BGRA */ - b_mask = 0xff000000; - g_mask = 0x00ff0000; - r_mask = 0x0000ff00; - a_mask = 0x000000ff; - break; - default: - ASSERT(0); - } - - /* For each R, G, B, and A component we're supposed to mask out, - * clear its bits. Then our mask operation later will work - * as expected. - */ - if (!(colormask & PIPE_MASK_R)) { - r_mask = 0; - } - if (!(colormask & PIPE_MASK_G)) { - g_mask = 0; - } - if (!(colormask & PIPE_MASK_B)) { - b_mask = 0; - } - if (!(colormask & PIPE_MASK_A)) { - a_mask = 0; - } - - /* Get a temporary register to hold the mask that will be applied - * to the fragment - */ - int colormask_reg = spe_allocate_available_register(f); - - /* The actual mask we're going to use is an OR of the remaining R, G, B, - * and A masks. Load the result value into our temporary register. - */ - spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); - - /* Use the mask register to select between the fragment color - * values and the frame buffer color values. Wherever the - * mask has a 0 bit, the current frame buffer color should override - * the fragment color. Wherever the mask has a 1 bit, the - * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) - * instruction will select bits from its first operand rA wherever the - * the mask bits rM are 0, and from its second operand rB wherever the - * mask bits rM are 1. That means that the frame buffer color is the - * first operand, and the fragment color the second. - */ - spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); - - /* Release the temporary register and we're done */ - spe_release_register(f, colormask_reg); -} - - -/** - * This function is annoyingly similar to gen_depth_test(), above, except - * that instead of comparing two varying values (i.e. fragment and buffer), - * we're comparing a varying value with a static value. As such, we have - * access to the Compare Immediate instructions where we don't in - * gen_depth_test(), which is what makes us very different. - * - * There's some added complexity if there's a non-trivial state->mask - * value; then stencil and reference both must be masked - * - * The return value in the stencil_pass_reg is a bitmask of valid - * fragments that also passed the stencil test. The bitmask of valid - * fragments that failed would be found in - * (fragment_mask_reg & ~stencil_pass_reg). - */ -static void -gen_stencil_test(struct spe_function *f, - const struct pipe_stencil_state *state, - const unsigned ref_value, - uint stencil_max_value, - int fragment_mask_reg, - int fbS_reg, - int stencil_pass_reg) -{ - /* Generate code that puts the set of passing fragments into the - * stencil_pass_reg register, taking into account whether each fragment - * was active to begin with. - */ - switch (state->func) { - case PIPE_FUNC_EQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (s == reference) */ - spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ - uint tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_NOTEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & ~(s == reference) */ - spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_LESS: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference < s) */ - spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_GREATER: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference > s) */ - /* There's no convenient Compare Less Than Immediate instruction, so - * we'll have to do this one the harder way, by loading a register and - * comparing directly. Compare Logical Greater Than Word (clgt) - * treats its operands as unsigned - no sign extension. - */ - int tmp_reg = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value); - spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - } - else { - /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ - int tmp_reg = spe_allocate_available_register(f); - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, state->valuemask & ref_value); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); - spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_GEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference >= s) - * = fragment_mask & ~(s > reference) */ - spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, - ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - } - else { - /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, - state->valuemask & ref_value); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_LEQUAL: - if (state->valuemask == stencil_max_value) { - /* stencil_pass = fragment_mask & (reference <= s) ] - * = fragment_mask & ~(reference > s) */ - /* As above, we have to do this by loading a register */ - int tmp_reg = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value); - spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - } - else { - /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ - int tmp_reg = spe_allocate_available_register(f); - int tmp_masked_stencil = spe_allocate_available_register(f); - spe_load_uint(f, tmp_reg, ref_value & state->valuemask); - spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); - spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); - spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); - spe_release_register(f, tmp_reg); - spe_release_register(f, tmp_masked_stencil); - } - break; - - case PIPE_FUNC_NEVER: - /* stencil_pass = fragment_mask & 0 = 0 */ - spe_load_uint(f, stencil_pass_reg, 0); - break; - - case PIPE_FUNC_ALWAYS: - /* stencil_pass = fragment_mask & 1 = fragment_mask */ - spe_move(f, stencil_pass_reg, fragment_mask_reg); - break; - } - - /* The fragments that passed the stencil test are now in stencil_pass_reg. - * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). - */ -} - - -/** - * This function generates code that calculates a set of new stencil values - * given the earlier values and the operation to apply. It does not - * apply any tests. It is intended to be called up to 3 times - * (for the stencil fail operation, for the stencil pass-z fail operation, - * and for the stencil pass-z pass operation) to collect up to three - * possible sets of values, and for the caller to combine them based - * on the result of the tests. - * - * stencil_max_value should be (2^n - 1) where n is the number of bits - * in the stencil buffer - in other words, it should be usable as a mask. - */ -static void -gen_stencil_values(struct spe_function *f, - uint stencil_op, - uint stencil_ref_value, - uint stencil_max_value, - int fbS_reg, - int newS_reg) -{ - /* The code below assumes that newS_reg and fbS_reg are not the same - * register; if they can be, the calculations below will have to use - * an additional temporary register. For now, mark the assumption - * with an assertion that will fail if they are the same. - */ - ASSERT(fbS_reg != newS_reg); - - /* The code also assumes that the stencil_max_value is of the form - * 2^n-1 and can therefore be used as a mask for the valid bits in - * addition to a maximum. Make sure this is the case as well. - * The clever math below exploits the fact that incrementing a - * binary number serves to flip all the bits of a number starting at - * the LSB and continuing to (and including) the first zero bit - * found. That means that a number and its increment will always - * have at least one bit in common (the high order bit, if nothing - * else) *unless* the number is zero, *or* the number is of a form - * consisting of some number of 1s in the low-order bits followed - * by nothing but 0s in the high-order bits. The latter case - * implies it's of the form 2^n-1. - */ - ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); - - switch(stencil_op) { - case PIPE_STENCIL_OP_KEEP: - /* newS = S */ - spe_move(f, newS_reg, fbS_reg); - break; - - case PIPE_STENCIL_OP_ZERO: - /* newS = 0 */ - spe_zero(f, newS_reg); - break; - - case PIPE_STENCIL_OP_REPLACE: - /* newS = stencil reference value */ - spe_load_uint(f, newS_reg, stencil_ref_value); - break; - - case PIPE_STENCIL_OP_INCR: { - /* newS = (s == max ? max : s + 1) */ - int equals_reg = spe_allocate_available_register(f); - - spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); - /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ - spe_ai(f, newS_reg, fbS_reg, 1); - /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); - - spe_release_register(f, equals_reg); - break; - } - case PIPE_STENCIL_OP_DECR: { - /* newS = (s == 0 ? 0 : s - 1) */ - int equals_reg = spe_allocate_available_register(f); - - spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); - /* Add Word Immediate with a (-1) value works */ - spe_ai(f, newS_reg, fbS_reg, -1); - /* Select from the current value or the new value based on the equality test */ - spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); - - spe_release_register(f, equals_reg); - break; - } - case PIPE_STENCIL_OP_INCR_WRAP: - /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can - * do a normal add and mask off the correct bits - */ - spe_ai(f, newS_reg, fbS_reg, 1); - spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); - break; - - case PIPE_STENCIL_OP_DECR_WRAP: - /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ - spe_ai(f, newS_reg, fbS_reg, -1); - spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); - break; - - case PIPE_STENCIL_OP_INVERT: - /* newS = ~s. We take advantage of the mask/max value to invert only - * the valid bits for the field so we don't have to do an extra "and". - */ - spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); - break; - - default: - ASSERT(0); - } -} - - -/** - * This function generates code to get all the necessary possible - * stencil values. For each of the output registers (fail_reg, - * zfail_reg, and zpass_reg), it either allocates a new register - * and calculates a new set of values based on the stencil operation, - * or it reuses a register allocation and calculation done for an - * earlier (matching) operation, or it reuses the fbS_reg register - * (if the stencil operation is KEEP, which doesn't change the - * stencil buffer). - * - * Since this function allocates a variable number of registers, - * to avoid incurring complex logic to free them, they should - * be allocated after a spe_allocate_register_set() call - * and released by the corresponding spe_release_register_set() call. - */ -static void -gen_get_stencil_values(struct spe_function *f, - const struct pipe_stencil_state *stencil, - const unsigned ref_value, - const uint depth_enabled, - int fbS_reg, - int *fail_reg, - int *zfail_reg, - int *zpass_reg) -{ - uint zfail_op; - - /* Stenciling had better be enabled here */ - ASSERT(stencil->enabled); - - /* If the depth test is not enabled, it is treated as though it always - * passes, which means that the zfail_op is not considered - a - * failing stencil test triggers the fail_op, and a passing one - * triggers the zpass_op - * - * As an optimization, override calculation of the zfail_op values - * if they aren't going to be used. By setting the value of - * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed - * to match the incoming stencil values, and no calculation will - * be done. - */ - if (depth_enabled) { - zfail_op = stencil->zfail_op; - } - else { - zfail_op = PIPE_STENCIL_OP_KEEP; - } - - /* One-sided or front-facing stencil */ - if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { - *fail_reg = fbS_reg; - } - else { - *fail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->fail_op, ref_value, - 0xff, fbS_reg, *fail_reg); - } - - /* Check the possibly overridden value, not the structure value */ - if (zfail_op == PIPE_STENCIL_OP_KEEP) { - *zfail_reg = fbS_reg; - } - else if (zfail_op == stencil->fail_op) { - *zfail_reg = *fail_reg; - } - else { - *zfail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->zfail_op, ref_value, - 0xff, fbS_reg, *zfail_reg); - } - - if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { - *zpass_reg = fbS_reg; - } - else if (stencil->zpass_op == stencil->fail_op) { - *zpass_reg = *fail_reg; - } - else if (stencil->zpass_op == zfail_op) { - *zpass_reg = *zfail_reg; - } - else { - *zpass_reg = spe_allocate_available_register(f); - gen_stencil_values(f, stencil->zpass_op, ref_value, - 0xff, fbS_reg, *zpass_reg); - } -} - -/** - * Note that fbZ_reg may *not* be set on entry, if in fact - * the depth test is not enabled. This function must not use - * the register if depth is not enabled. - */ -static boolean -gen_stencil_depth_test(struct spe_function *f, - const struct pipe_depth_stencil_alpha_state *dsa, - const struct pipe_stencil_ref *stencil_ref, - const uint facing, - const int mask_reg, const int fragZ_reg, - const int fbZ_reg, const int fbS_reg) -{ - /* True if we've generated code that could require writeback to the - * depth and/or stencil buffers - */ - boolean modified_buffers = FALSE; - - boolean need_to_calculate_stencil_values; - boolean need_to_writemask_stencil_values; - - struct pipe_stencil_state *stencil; - - /* Registers. We may or may not actually allocate these, depending - * on whether the state values indicate that we need them. - */ - int stencil_pass_reg, stencil_fail_reg; - int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; - int stencil_writemask_reg; - int zmask_reg; - int newS_reg; - unsigned ref_value; - - /* Stenciling is quite complex: up to six different configurable stencil - * operations/calculations can be required (three each for front-facing - * and back-facing fragments). Many of those operations will likely - * be identical, so there's good reason to try to avoid calculating - * the same values more than once (which unfortunately makes the code less - * straightforward). - * - * To make register management easier, we start a new - * register set; we can release all the registers in the set at - * once, and avoid having to keep track of exactly which registers - * we allocate. We can still allocate and free registers as - * desired (if we know we no longer need a register), but we don't - * have to spend the complexity to track the more difficult variant - * register usage scenarios. - */ - spe_comment(f, 0, "Allocating stencil register set"); - spe_allocate_register_set(f); - - /* The facing we're given is the fragment facing; it doesn't - * exactly match the stencil facing. If stencil is enabled, - * but two-sided stencil is *not* enabled, we use the same - * stencil settings for both front- and back-facing fragments. - * We only use the "back-facing" stencil for backfacing fragments - * if two-sided stenciling is enabled. - */ - if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { - stencil = &dsa->stencil[1]; - ref_value = stencil_ref->ref_value[1]; - } - else { - stencil = &dsa->stencil[0]; - ref_value = stencil_ref->ref_value[0]; - } - - /* Calculate the writemask. If the writemask is trivial (either - * all 0s, meaning that we don't need to calculate any stencil values - * because they're not going to change the stencil anyway, or all 1s, - * meaning that we have to calculate the stencil values but do not - * need to mask them), we can avoid generating code. Don't forget - * that we need to consider backfacing stencil, if enabled. - * - * Note that if the backface stencil is *not* enabled, the backface - * stencil will have the same values as the frontface stencil. - */ - if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && - stencil->zfail_op == PIPE_STENCIL_OP_KEEP && - stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { - need_to_calculate_stencil_values = FALSE; - need_to_writemask_stencil_values = FALSE; - } - else if (stencil->writemask == 0x0) { - /* All changes are writemasked out, so no need to calculate - * what those changes might be, and no need to write anything back. - */ - need_to_calculate_stencil_values = FALSE; - need_to_writemask_stencil_values = FALSE; - } - else if (stencil->writemask == 0xff) { - /* Still trivial, but a little less so. We need to write the stencil - * values, but we don't need to mask them. - */ - need_to_calculate_stencil_values = TRUE; - need_to_writemask_stencil_values = FALSE; - } - else { - /* The general case: calculate, mask, and write */ - need_to_calculate_stencil_values = TRUE; - need_to_writemask_stencil_values = TRUE; - - /* While we're here, generate code that calculates what the - * writemask should be. If backface stenciling is enabled, - * and the backface writemask is not the same as the frontface - * writemask, we'll have to generate code that merges the - * two masks into a single effective mask based on fragment facing. - */ - spe_comment(f, 0, "Computing stencil writemask"); - stencil_writemask_reg = spe_allocate_available_register(f); - spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask); - } - - /* At least one-sided stenciling must be on. Generate code that - * runs the stencil test on the basic/front-facing stencil, leaving - * the mask of passing stencil bits in stencil_pass_reg. This mask will - * be used both to mask the set of active pixels, and also to - * determine how the stencil buffer changes. - * - * This test will *not* change the value in mask_reg (because we don't - * yet know whether to apply the two-sided stencil or one-sided stencil). - */ - spe_comment(f, 0, "Running basic stencil test"); - stencil_pass_reg = spe_allocate_available_register(f); - gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg); - - /* Generate code that, given the mask of valid fragments and the - * mask of valid fragments that passed the stencil test, computes - * the mask of valid fragments that failed the stencil test. We - * have to do this before we run a depth test (because the - * depth test should not be performed on fragments that failed the - * stencil test, and because the depth test will update the - * mask of valid fragments based on the results of the depth test). - */ - spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); - stencil_fail_reg = spe_allocate_available_register(f); - spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); - /* Now remove the stenciled-out pixels from the valid fragment mask, - * so we can later use the valid fragment mask in the depth test. - */ - spe_and(f, mask_reg, mask_reg, stencil_pass_reg); - - /* We may not need to calculate stencil values, if the writemask is off */ - if (need_to_calculate_stencil_values) { - /* Generate code that calculates exactly which stencil values we need, - * without calculating the same value twice (say, if two different - * stencil ops have the same value). This code will work for one-sided - * and two-sided stenciling (so that we take into account that operations - * may match between front and back stencils), and will also take into - * account whether the depth test is enabled (if the depth test is off, - * we don't need any of the zfail results, because the depth test always - * is considered to pass if it is disabled). Any register value that - * does not need to be calculated will come back with the same value - * that's in fbS_reg. - * - * This function will allocate a variant number of registers that - * will be released as part of the register set. - */ - spe_comment(f, 0, facing == CELL_FACING_FRONT - ? "Computing front-facing stencil values" - : "Computing back-facing stencil values"); - gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg, - &stencil_fail_values, &stencil_pass_depth_fail_values, - &stencil_pass_depth_pass_values); - } - - /* We now have all the stencil values we need. We also need - * the results of the depth test to figure out which - * stencil values will become the new stencil values. (Even if - * we aren't actually calculating stencil values, we need to apply - * the depth test if it's enabled.) - * - * The code generated by gen_depth_test() returns the results of the - * test in the given register, but also alters the mask_reg based - * on the results of the test. - */ - if (dsa->depth.enabled) { - spe_comment(f, 0, "Running stencil depth test"); - zmask_reg = spe_allocate_available_register(f); - modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, - fbZ_reg, zmask_reg); - } - - if (need_to_calculate_stencil_values) { - - /* If we need to writemask the stencil values before going into - * the stencil buffer, we'll have to use a new register to - * hold the new values. If not, we can just keep using the - * current register. - */ - if (need_to_writemask_stencil_values) { - newS_reg = spe_allocate_available_register(f); - spe_comment(f, 0, "Saving current stencil values for writemasking"); - spe_move(f, newS_reg, fbS_reg); - } - else { - newS_reg = fbS_reg; - } - - /* Merge in the selected stencil fail values */ - if (stencil_fail_values != fbS_reg) { - spe_comment(f, 0, "Loading stencil fail values"); - spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); - modified_buffers = TRUE; - } - - /* Same for the stencil pass/depth fail values. If this calculation - * is not needed (say, if depth test is off), then the - * stencil_pass_depth_fail_values register will be equal to fbS_reg - * and we'll skip the calculation. - */ - if (stencil_pass_depth_fail_values != fbS_reg) { - /* We don't actually have a stencil pass/depth fail mask yet. - * Calculate it here from the stencil passing mask and the - * depth passing mask. Note that zmask_reg *must* have been - * set above if we're here. - */ - uint stencil_pass_depth_fail_mask = - spe_allocate_available_register(f); - - spe_comment(f, 0, "Loading stencil pass/depth fail values"); - spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); - - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, - stencil_pass_depth_fail_mask); - - spe_release_register(f, stencil_pass_depth_fail_mask); - modified_buffers = TRUE; - } - - /* Same for the stencil pass/depth pass mask. Note that we - * *can* get here with zmask_reg being unset (if the depth - * test is off but the stencil test is on). In this case, - * we assume the depth test passes, and don't need to mask - * the stencil pass mask with the Z mask. - */ - if (stencil_pass_depth_pass_values != fbS_reg) { - if (dsa->depth.enabled) { - uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); - /* We'll need a separate register */ - spe_comment(f, 0, "Loading stencil pass/depth pass values"); - spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); - spe_release_register(f, stencil_pass_depth_pass_mask); - } - else { - /* We can use the same stencil-pass register */ - spe_comment(f, 0, "Loading stencil pass values"); - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); - } - modified_buffers = TRUE; - } - - /* Almost done. If we need to writemask, do it now, leaving the - * results in the fbS_reg register passed in. If we don't need - * to writemask, then the results are *already* in the fbS_reg, - * so there's nothing more to do. - */ - - if (need_to_writemask_stencil_values && modified_buffers) { - /* The Select Bytes command makes a fine writemask. Where - * the mask is 0, the first (original) values are retained, - * effectively masking out changes. Where the mask is 1, the - * second (new) values are retained, incorporating changes. - */ - spe_comment(f, 0, "Writemasking new stencil values"); - spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); - } - - } /* done calculating stencil values */ - - /* The stencil and/or depth values have been applied, and the - * mask_reg, fbS_reg, and fbZ_reg values have been updated. - * We're all done, except that we've allocated a fair number - * of registers that we didn't bother tracking. Release all - * those registers as part of the register set, and go home. - */ - spe_comment(f, 0, "Releasing stencil register set"); - spe_release_register_set(f); - - /* Return TRUE if we could have modified the stencil and/or - * depth buffers. - */ - return modified_buffers; -} - - -/** - * Generate depth and/or stencil test code. - * \param cell context - * \param dsa depth/stencil/alpha state - * \param f spe function to emit - * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK - * \param mask_reg register containing the pixel alive/dead mask - * \param depth_tile_reg register containing address of z/stencil tile - * \param quad_offset_reg offset to quad from start of tile - * \param fragZ_reg register containg fragment Z values - */ -static void -gen_depth_stencil(struct cell_context *cell, - const struct pipe_depth_stencil_alpha_state *dsa, - const struct pipe_stencil_ref *stencil_ref, - struct spe_function *f, - uint facing, - int mask_reg, - int depth_tile_reg, - int quad_offset_reg, - int fragZ_reg) - -{ - const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; - boolean write_depth_stencil; - - /* framebuffer's combined z/stencil values register */ - int fbZS_reg = spe_allocate_available_register(f); - - /* Framebufer Z values register */ - int fbZ_reg = spe_allocate_available_register(f); - - /* Framebuffer stencil values register (may not be used) */ - int fbS_reg = spe_allocate_available_register(f); - - /* 24-bit mask register (may not be used) */ - int zmask_reg = spe_allocate_available_register(f); - - /** - * The following code: - * 1. fetch quad of packed Z/S values from the framebuffer tile. - * 2. extract the separate the Z and S values from packed values - * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints - * - * The instructions for doing this are interleaved for better performance. - */ - spe_comment(f, 0, "Fetch Z/stencil quad from tile"); - - switch(zs_format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */ - case PIPE_FORMAT_Z24X8_UNORM: - /* prepare mask to extract Z vals from ZS vals */ - spe_load_uint(f, zmask_reg, 0x00ffffff); - - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* right shift 32-bit fragment Z to 24 bits */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - - /* extract 24-bit Z values from ZS values by masking */ - spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); - - /* extract 8-bit stencil values by shifting */ - spe_rotmi(f, fbS_reg, fbZS_reg, -24); - break; - - case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */ - case PIPE_FORMAT_X8Z24_UNORM: - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* right shift 32-bit fragment Z to 24 bits */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - - /* extract 24-bit Z values from ZS values by shifting */ - spe_rotmi(f, fbZ_reg, fbZS_reg, -8); - - /* extract 8-bit stencil values by masking */ - spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); - break; - - case PIPE_FORMAT_Z32_UNORM: - /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); - - /* convert fragment Z from [0,1] to 32-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - - /* No stencil, so can't do anything there */ - break; - - case PIPE_FORMAT_Z16_UNORM: - /* XXX This code for 16bpp Z is broken! */ - - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - /* Copy over 4 32-bit values */ - spe_move(f, fbZ_reg, fbZS_reg); - - /* convert Z from [0,1] to 16-bit ints */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - spe_rotmi(f, fragZ_reg, fragZ_reg, -16); - /* No stencil */ - break; - - default: - ASSERT(0); /* invalid format */ - } - - /* If stencil is enabled, use the stencil-specific code - * generator to generate both the stencil and depth (if needed) - * tests. Otherwise, if only depth is enabled, generate - * a quick depth test. The test generators themselves will - * report back whether the depth/stencil buffer has to be - * written back. - */ - if (dsa->stencil[0].enabled) { - /* This will perform the stencil and depth tests, and update - * the mask_reg, fbZ_reg, and fbS_reg as required by the - * tests. - */ - ASSERT(fbS_reg >= 0); - spe_comment(f, 0, "Perform stencil test"); - - /* Note that fbZ_reg may not be set on entry, if stenciling - * is enabled but there's no Z-buffer. The - * gen_stencil_depth_test() function must ignore the - * fbZ_reg register if depth is not enabled. - */ - write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing, - mask_reg, fragZ_reg, - fbZ_reg, fbS_reg); - } - else if (dsa->depth.enabled) { - int zmask_reg = spe_allocate_available_register(f); - ASSERT(fbZ_reg >= 0); - spe_comment(f, 0, "Perform depth test"); - write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, - fbZ_reg, zmask_reg); - spe_release_register(f, zmask_reg); - } - else { - write_depth_stencil = FALSE; - } - - if (write_depth_stencil) { - /* Merge latest Z and Stencil values into fbZS_reg. - * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. - * fbS_reg has four 8-bit Z values in bits [7..0]. - */ - spe_comment(f, 0, "Store quad's depth/stencil values in tile"); - if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8_UINT) { - ASSERT(0); /* XXX to do */ - } - else { - ASSERT(0); /* bad zs_format */ - } - - /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ - spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - } - - /* Don't need these any more */ - spe_release_register(f, fbZS_reg); - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); - spe_release_register(f, zmask_reg); -} - - - -/** - * Generate SPE code to implement the fragment operations (alpha test, - * depth test, stencil test, blending, colormask, and final - * framebuffer write) as specified by the current context state. - * - * Logically, this code will be called after running the fragment - * shader. But under some circumstances we could run some of this - * code before the fragment shader to cull fragments/quads that are - * totally occluded/discarded. - * - * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now. - * - * See the spu_default_fragment_ops() function to see how the per-fragment - * operations would be done with ordinary C code. - * The code we generate here though has no branches, is SIMD, etc and - * should be much faster. - * - * \param cell the rendering context (in) - * \param facing whether the generated code is for front-facing or - * back-facing fragments - * \param f the generated function (in/out); on input, the function - * must already have been initialized. On exit, whatever - * instructions within the generated function have had - * the fragment ops appended. - */ -void -cell_gen_fragment_function(struct cell_context *cell, - const uint facing, - struct spe_function *f) -{ - const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; - const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref; - const struct pipe_blend_state *blend = cell->blend; - const struct pipe_blend_color *blend_color = &cell->blend_color; - const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; - - /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ - const int x_reg = 3; /* uint */ - const int y_reg = 4; /* uint */ - const int color_tile_reg = 5; /* tile_t * */ - const int depth_tile_reg = 6; /* tile_t * */ - const int fragZ_reg = 7; /* vector float */ - const int fragR_reg = 8; /* vector float */ - const int fragG_reg = 9; /* vector float */ - const int fragB_reg = 10; /* vector float */ - const int fragA_reg = 11; /* vector float */ - const int mask_reg = 12; /* vector uint */ - - ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); - - /* offset of quad from start of tile - * XXX assuming 4-byte pixels for color AND Z/stencil!!!! - */ - int quad_offset_reg; - - int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ - - if (cell->debug_flags & CELL_DEBUG_ASM) { - spe_print_code(f, TRUE); - spe_indent(f, 8); - spe_comment(f, -4, facing == CELL_FACING_FRONT - ? "Begin front-facing per-fragment ops" - : "Begin back-facing per-fragment ops"); - } - - spe_allocate_register(f, x_reg); - spe_allocate_register(f, y_reg); - spe_allocate_register(f, color_tile_reg); - spe_allocate_register(f, depth_tile_reg); - spe_allocate_register(f, fragZ_reg); - spe_allocate_register(f, fragR_reg); - spe_allocate_register(f, fragG_reg); - spe_allocate_register(f, fragB_reg); - spe_allocate_register(f, fragA_reg); - spe_allocate_register(f, mask_reg); - - quad_offset_reg = spe_allocate_available_register(f); - fbRGBA_reg = spe_allocate_available_register(f); - - /* compute offset of quad from start of tile, in bytes */ - { - int x2_reg = spe_allocate_available_register(f); - int y2_reg = spe_allocate_available_register(f); - - ASSERT(TILE_SIZE == 32); - - spe_comment(f, 0, "Compute quad offset within tile"); - spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ - spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ - spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ - spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ - spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ - - spe_release_register(f, x2_reg); - spe_release_register(f, y2_reg); - } - - /* Generate the alpha test, if needed. */ - if (dsa->alpha.enabled) { - gen_alpha_test(dsa, f, mask_reg, fragA_reg); - } - - /* generate depth and/or stencil test code */ - if (dsa->depth.enabled || dsa->stencil[0].enabled) { - gen_depth_stencil(cell, dsa, stencil_ref, f, - facing, - mask_reg, - depth_tile_reg, - quad_offset_reg, - fragZ_reg); - } - - /* Get framebuffer quad/colors. We'll need these for blending, - * color masking, and to obey the quad/pixel mask. - * Load: fbRGBA_reg = memory[color_tile + quad_offset] - * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking - * we could skip this load. - */ - spe_comment(f, 0, "Fetch quad colors from tile"); - spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - - if (blend->rt[0].blend_enable) { - spe_comment(f, 0, "Perform blending"); - gen_blend(blend, blend_color, f, color_format, - fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); - } - - /* - * Write fragment colors to framebuffer/tile. - * This involves converting the fragment colors from float[4] to the - * tile's specific format and obeying the quad/pixel mask. - */ - { - int rgba_reg = spe_allocate_available_register(f); - - /* Pack four float colors as four 32-bit int colors */ - spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); - gen_pack_colors(f, color_format, - fragR_reg, fragG_reg, fragB_reg, fragA_reg, - rgba_reg); - - if (blend->logicop_enable) { - spe_comment(f, 0, "Compute logic op"); - gen_logicop(blend, f, rgba_reg, fbRGBA_reg); - } - - if (blend->rt[0].colormask != PIPE_MASK_RGBA) { - spe_comment(f, 0, "Compute color mask"); - gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg); - } - - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: - * if (mask[i]) - * rgba[i] = rgba[i]; - * else - * rgba[i] = framebuffer[i]; - */ - spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); - - /* Store updated quad in tile: - * memory[color_tile + quad_offset] = rgba_reg; - */ - spe_comment(f, 0, "Store quad colors into color tile"); - spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); - - spe_release_register(f, rgba_reg); - } - - //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); - - spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ - - spe_release_register(f, fbRGBA_reg); - spe_release_register(f, quad_offset_reg); - - if (cell->debug_flags & CELL_DEBUG_ASM) { - char buffer[1024]; - sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", - facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); - spe_comment(f, -4, buffer); - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h deleted file mode 100644 index 21b35d1fafe..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_GEN_FRAGMENT_H -#define CELL_GEN_FRAGMENT_H - - -extern void -cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); - - -#endif /* CELL_GEN_FRAGMENT_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c deleted file mode 100644 index 223adda48f0..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ /dev/null @@ -1,473 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: - * Keith Whitwell - * Brian Paul - */ - -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "draw/draw_context.h" -#include "cell_context.h" -#include "cell_flush.h" -#include "cell_pipe_state.h" -#include "cell_state.h" -#include "cell_texture.h" - - - -static void * -cell_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - return mem_dup(blend, sizeof(*blend)); -} - - -static void -cell_bind_blend_state(struct pipe_context *pipe, void *blend) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend = (struct pipe_blend_state *) blend; - cell->dirty |= CELL_NEW_BLEND; -} - - -static void -cell_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - FREE(blend); -} - - -static void -cell_set_blend_color(struct pipe_context *pipe, - const struct pipe_blend_color *blend_color) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->blend_color = *blend_color; - - cell->dirty |= CELL_NEW_BLEND; -} - - - - -static void * -cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *dsa) -{ - return mem_dup(dsa, sizeof(*dsa)); -} - - -static void -cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *dsa) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -static void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) -{ - FREE(dsa); -} - - -static void -cell_set_stencil_ref(struct pipe_context *pipe, - const struct pipe_stencil_ref *stencil_ref) -{ - struct cell_context *cell = cell_context(pipe); - - draw_flush(cell->draw); - - cell->stencil_ref = *stencil_ref; - - cell->dirty |= CELL_NEW_DEPTH_STENCIL; -} - - -static void -cell_set_clip_state(struct pipe_context *pipe, - const struct pipe_clip_state *clip) -{ - struct cell_context *cell = cell_context(pipe); - - /* pass the clip state to the draw module */ - draw_set_clip_state(cell->draw, clip); -} - - -static void -cell_set_sample_mask(struct pipe_context *pipe, - unsigned sample_mask) -{ -} - - -/* Called when driver state tracker notices changes to the viewport - * matrix: - */ -static void -cell_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct cell_context *cell = cell_context(pipe); - - cell->viewport = *viewport; /* struct copy */ - cell->dirty |= CELL_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - draw_set_viewport_state(cell->draw, viewport); - - /* Using tnl/ and vf/ modules is temporary while getting started. - * Full pipe will have vertex shader, vertex fetch of its own. - */ -} - - -static void -cell_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->scissor, scissor, sizeof(*scissor) ); - cell->dirty |= CELL_NEW_SCISSOR; -} - - -static void -cell_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ - struct cell_context *cell = cell_context(pipe); - - memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); - cell->dirty |= CELL_NEW_STIPPLE; -} - - - -static void * -cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - return mem_dup(rasterizer, sizeof(*rasterizer)); -} - - -static void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) -{ - struct pipe_rasterizer_state *rasterizer = - (struct pipe_rasterizer_state *) rast; - struct cell_context *cell = cell_context(pipe); - - /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, rasterizer, rast); - - cell->rasterizer = rasterizer; - - cell->dirty |= CELL_NEW_RASTERIZER; -} - - -static void -cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) -{ - FREE(rasterizer); -} - - - -static void * -cell_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - return mem_dup(sampler, sizeof(*sampler)); -} - - -static void -cell_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **samplers) -{ - struct cell_context *cell = cell_context(pipe); - uint i, changed = 0x0; - - assert(num <= CELL_MAX_SAMPLERS); - - draw_flush(cell->draw); - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; - if (cell->sampler[i] != new_samp) { - cell->sampler[i] = new_samp; - changed |= (1 << i); - } - } - - if (changed) { - cell->dirty |= CELL_NEW_SAMPLER; - cell->dirty_samplers |= changed; - } -} - - -static void -cell_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - FREE( sampler ); -} - - - -static void -cell_set_fragment_sampler_views(struct pipe_context *pipe, - unsigned num, - struct pipe_sampler_view **views) -{ - struct cell_context *cell = cell_context(pipe); - uint i, changed = 0x0; - - assert(num <= CELL_MAX_SAMPLERS); - - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_sampler_view *new_view = i < num ? views[i] : NULL; - struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i]; - - if (old_view != new_view) { - struct pipe_resource *new_tex = new_view ? new_view->texture : NULL; - - pipe_sampler_view_reference(&cell->fragment_sampler_views[i], - new_view); - pipe_resource_reference((struct pipe_resource **) &cell->texture[i], - (struct pipe_resource *) new_tex); - - changed |= (1 << i); - } - } - - cell->num_textures = num; - - if (changed) { - cell->dirty |= CELL_NEW_TEXTURE; - cell->dirty_textures |= changed; - } -} - - -static struct pipe_sampler_view * -cell_create_sampler_view(struct pipe_context *pipe, - struct pipe_resource *texture, - const struct pipe_sampler_view *templ) -{ - struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view); - - if (view) { - *view = *templ; - view->reference.count = 1; - view->texture = NULL; - pipe_resource_reference(&view->texture, texture); - view->context = pipe; - } - - return view; -} - - -static void -cell_sampler_view_destroy(struct pipe_context *pipe, - struct pipe_sampler_view *view) -{ - pipe_resource_reference(&view->texture, NULL); - FREE(view); -} - - -/** - * Map color and z/stencil framebuffer surfaces. - */ -static void -cell_map_surfaces(struct cell_context *cell) -{ -#if 0 - struct pipe_screen *screen = cell->pipe.screen; -#endif - uint i; - - for (i = 0; i < 1; i++) { - struct pipe_surface *ps = cell->framebuffer.cbufs[i]; - if (ps) { - struct cell_resource *ct = cell_resource(ps->texture); -#if 0 - cell->cbuf_map[i] = screen->buffer_map(screen, - ct->buffer, - (PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE)); -#else - cell->cbuf_map[i] = ct->data; -#endif - } - } - - { - struct pipe_surface *ps = cell->framebuffer.zsbuf; - if (ps) { - struct cell_resource *ct = cell_resource(ps->texture); -#if 0 - cell->zsbuf_map = screen->buffer_map(screen, - ct->buffer, - (PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE)); -#else - cell->zsbuf_map = ct->data; -#endif - } - } -} - - -/** - * Unmap color and z/stencil framebuffer surfaces. - */ -static void -cell_unmap_surfaces(struct cell_context *cell) -{ - /*struct pipe_screen *screen = cell->pipe.screen;*/ - uint i; - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - struct pipe_surface *ps = cell->framebuffer.cbufs[i]; - if (ps && cell->cbuf_map[i]) { - /*struct cell_resource *ct = cell_resource(ps->texture);*/ - assert(ps->texture); - /*assert(ct->buffer);*/ - - /*screen->buffer_unmap(screen, ct->buffer);*/ - cell->cbuf_map[i] = NULL; - } - } - - { - struct pipe_surface *ps = cell->framebuffer.zsbuf; - if (ps && cell->zsbuf_map) { - /*struct cell_resource *ct = cell_resource(ps->texture);*/ - /*screen->buffer_unmap(screen, ct->buffer);*/ - cell->zsbuf_map = NULL; - } - } -} - - -static void -cell_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct cell_context *cell = cell_context(pipe); - - if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { - uint i; - - /* unmap old surfaces */ - cell_unmap_surfaces(cell); - - /* Finish any pending rendering to the current surface before - * installing a new surface! - */ - cell_flush_int(cell, CELL_FLUSH_WAIT); - - /* update my state - * (this is also where old surfaces will finally get freed) - */ - cell->framebuffer.width = fb->width; - cell->framebuffer.height = fb->height; - cell->framebuffer.nr_cbufs = fb->nr_cbufs; - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); - } - pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); - - /* map new surfaces */ - cell_map_surfaces(cell); - - cell->dirty |= CELL_NEW_FRAMEBUFFER; - } -} - - -void -cell_init_state_functions(struct cell_context *cell) -{ - cell->pipe.create_blend_state = cell_create_blend_state; - cell->pipe.bind_blend_state = cell_bind_blend_state; - cell->pipe.delete_blend_state = cell_delete_blend_state; - - cell->pipe.create_sampler_state = cell_create_sampler_state; - cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states; - cell->pipe.delete_sampler_state = cell_delete_sampler_state; - - cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views; - cell->pipe.create_sampler_view = cell_create_sampler_view; - cell->pipe.sampler_view_destroy = cell_sampler_view_destroy; - - cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; - cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; - cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; - - cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; - cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; - cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; - - cell->pipe.set_blend_color = cell_set_blend_color; - cell->pipe.set_stencil_ref = cell_set_stencil_ref; - cell->pipe.set_clip_state = cell_set_clip_state; - cell->pipe.set_sample_mask = cell_set_sample_mask; - - cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; - - cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; - cell->pipe.set_scissor_state = cell_set_scissor_state; - cell->pipe.set_viewport_state = cell_set_viewport_state; -} diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h deleted file mode 100644 index 1889bd52ff5..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.h +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_PIPE_STATE_H -#define CELL_PIPE_STATE_H - - -struct cell_context; - -extern void -cell_init_state_functions(struct cell_context *cell); - - -#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_public.h b/src/gallium/drivers/cell/ppu/cell_public.h deleted file mode 100644 index 7e2e093565d..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_public.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef CELL_PUBLIC_H -#define CELL_PUBLIC_H - -struct pipe_screen; -struct sw_winsys; - -struct pipe_screen * -cell_create_screen(struct sw_winsys *winsys); - -#endif diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c deleted file mode 100644 index f648482c551..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ /dev/null @@ -1,211 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Last stage of 'draw' pipeline: send tris to SPUs. - * \author Brian Paul - */ - -#include "cell_context.h" -#include "cell_render.h" -#include "cell_spu.h" -#include "util/u_memory.h" -#include "draw/draw_private.h" - - -struct render_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct cell_context *cell; -}; - - -static INLINE struct render_stage * -render_stage(struct draw_stage *stage) -{ - return (struct render_stage *) stage; -} - - -static void render_begin( struct draw_stage *stage ) -{ -#if 0 - struct render_stage *render = render_stage(stage); - struct cell_context *sp = render->cell; - const struct pipe_shader_state *fs = &render->cell->fs->shader; - render->quad.nr_attrs = render->cell->nr_frag_attrs; - - render->firstFpInput = fs->input_semantic_name[0]; - - sp->quad.first->begin(sp->quad.first); -#endif -} - - -static void render_end( struct draw_stage *stage ) -{ -} - - -static void reset_stipple_counter( struct draw_stage *stage ) -{ - struct render_stage *render = render_stage(stage); - /*render->cell->line_stipple_counter = 0;*/ -} - - -static void -render_point(struct draw_stage *stage, struct prim_header *prim) -{ -} - - -static void -render_line(struct draw_stage *stage, struct prim_header *prim) -{ -} - - -/** Write a vertex into the prim buffer */ -static void -save_vertex(struct cell_prim_buffer *buf, uint pos, - const struct vertex_header *vert) -{ - uint attr, j; - - for (attr = 0; attr < 2; attr++) { - for (j = 0; j < 4; j++) { - buf->vertex[pos][attr][j] = vert->data[attr][j]; - } - } - - /* update bounding box */ - if (vert->data[0][0] < buf->xmin) - buf->xmin = vert->data[0][0]; - if (vert->data[0][0] > buf->xmax) - buf->xmax = vert->data[0][0]; - if (vert->data[0][1] < buf->ymin) - buf->ymin = vert->data[0][1]; - if (vert->data[0][1] > buf->ymax) - buf->ymax = vert->data[0][1]; -} - - -static void -render_tri(struct draw_stage *stage, struct prim_header *prim) -{ - struct render_stage *rs = render_stage(stage); - struct cell_context *cell = rs->cell; - struct cell_prim_buffer *buf = &cell->prim_buffer; - uint i; - - if (buf->num_verts + 3 > CELL_MAX_VERTS) { - cell_flush_prim_buffer(cell); - } - - i = buf->num_verts; - assert(i+2 <= CELL_MAX_VERTS); - save_vertex(buf, i+0, prim->v[0]); - save_vertex(buf, i+1, prim->v[1]); - save_vertex(buf, i+2, prim->v[2]); - buf->num_verts += 3; -} - - -/** - * Send the a RENDER command to all SPUs to have them render the prims - * in the current prim_buffer. - */ -void -cell_flush_prim_buffer(struct cell_context *cell) -{ - uint i; - - if (cell->prim_buffer.num_verts == 0) - return; - - for (i = 0; i < cell->num_spus; i++) { - struct cell_command_render *render = &cell_global.command[i].render; - render->prim_type = PIPE_PRIM_TRIANGLES; - render->num_verts = cell->prim_buffer.num_verts; - render->front_ccw = cell->rasterizer->front_ccw; - render->vertex_size = cell->vertex_info->size * 4; - render->xmin = cell->prim_buffer.xmin; - render->ymin = cell->prim_buffer.ymin; - render->xmax = cell->prim_buffer.xmax; - render->ymax = cell->prim_buffer.ymax; - render->vertex_data = &cell->prim_buffer.vertex; - ASSERT_ALIGN16(render->vertex_data); - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); - } - - cell->prim_buffer.num_verts = 0; - - cell->prim_buffer.xmin = 1e100; - cell->prim_buffer.ymin = 1e100; - cell->prim_buffer.xmax = -1e100; - cell->prim_buffer.ymax = -1e100; - - /* XXX temporary, need to double-buffer the prim buffer until we get - * a real command buffer/list system. - */ - cell_flush(&cell->pipe, 0x0); -} - - - -static void render_destroy( struct draw_stage *stage ) -{ - FREE( stage ); -} - - -/** - * Create a new draw/render stage. This will be plugged into the - * draw module as the last pipeline stage. - */ -struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) -{ - struct render_stage *render = CALLOC_STRUCT(render_stage); - - render->cell = cell; - render->stage.draw = cell->draw; - render->stage.begin = render_begin; - render->stage.point = render_point; - render->stage.line = render_line; - render->stage.tri = render_tri; - render->stage.end = render_end; - render->stage.reset_stipple_counter = reset_stipple_counter; - render->stage.destroy = render_destroy; - - /* - render->quad.coef = render->coef; - render->quad.posCoef = &render->posCoef; - */ - - return &render->stage; -} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h deleted file mode 100644 index 826dcbafeba..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_render.h +++ /dev/null @@ -1,39 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_RENDER_H -#define CELL_RENDER_H - -struct cell_context; -struct draw_stage; - -extern void -cell_flush_prim_buffer(struct cell_context *cell); - -extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); - -#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c deleted file mode 100644 index 7ffdcc51bbd..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ /dev/null @@ -1,221 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "util/u_memory.h" -#include "util/u_simple_screen.h" -#include "pipe/p_defines.h" -#include "pipe/p_screen.h" - -#include "cell/common.h" -#include "cell_context.h" -#include "cell_screen.h" -#include "cell_texture.h" -#include "cell_public.h" - -#include "state_tracker/sw_winsys.h" - - -static const char * -cell_get_vendor(struct pipe_screen *screen) -{ - return "VMware, Inc."; -} - - -static const char * -cell_get_name(struct pipe_screen *screen) -{ - return "Cell"; -} - - -static int -cell_get_param(struct pipe_screen *screen, enum pipe_cap param) -{ - switch (param) { - case PIPE_CAP_MAX_COMBINED_SAMPLERS: - return CELL_MAX_SAMPLERS; - case PIPE_CAP_NPOT_TEXTURES: - return 1; - case PIPE_CAP_TWO_SIDED_STENCIL: - return 1; - case PIPE_CAP_ANISOTROPIC_FILTER: - return 0; - case PIPE_CAP_POINT_SPRITE: - return 1; - case PIPE_CAP_MAX_RENDER_TARGETS: - return 1; - case PIPE_CAP_OCCLUSION_QUERY: - return 1; - case PIPE_CAP_TIMER_QUERY: - return 0; - case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 10; - case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return CELL_MAX_TEXTURE_LEVELS; - case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: - return 8; /* max 128x128x128 */ - case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return CELL_MAX_TEXTURE_LEVELS; - case PIPE_CAP_TEXTURE_MIRROR_CLAMP: - return 0; /* XXX to do */ - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER: - return 1; - case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT: - case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER: - return 0; - case PIPE_CAP_BLEND_EQUATION_SEPARATE: - return 1; - default: - return 0; - } -} - -static int -cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) -{ - switch(shader) - { - case PIPE_SHADER_FRAGMENT: - switch (param) { - case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: - return CELL_MAX_SAMPLERS; - default: - return tgsi_exec_get_shader_param(param); - } - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_GEOMETRY: - return draw_get_shader_param(shader, param); - default: - return 0; - } -} - -static float -cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param) -{ - switch (param) { - case PIPE_CAPF_MAX_LINE_WIDTH: - /* fall-through */ - case PIPE_CAPF_MAX_LINE_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAPF_MAX_POINT_WIDTH: - /* fall-through */ - case PIPE_CAPF_MAX_POINT_WIDTH_AA: - return 255.0; /* arbitrary */ - - case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: - return 0.0; - - case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: - return 16.0; /* arbitrary */ - - default: - return 0; - } -} - - -static boolean -cell_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, - enum pipe_texture_target target, - unsigned sample_count, - unsigned tex_usage) -{ - struct sw_winsys *winsys = cell_screen(screen)->winsys; - - if (sample_count > 1) - return FALSE; - - if (tex_usage & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format)) - return FALSE; - } - - /* only a few formats are known to work at this time */ - switch (format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_I8_UNORM: - return TRUE; - default: - return FALSE; - } -} - - -static void -cell_destroy_screen( struct pipe_screen *screen ) -{ - struct cell_screen *sp_screen = cell_screen(screen); - struct sw_winsys *winsys = sp_screen->winsys; - - if(winsys->destroy) - winsys->destroy(winsys); - - FREE(screen); -} - - - -/** - * Create a new pipe_screen object - * Note: we're not presently subclassing pipe_screen (no cell_screen) but - * that would be the place to put SPU thread/context info... - */ -struct pipe_screen * -cell_create_screen(struct sw_winsys *winsys) -{ - struct cell_screen *screen = CALLOC_STRUCT(cell_screen); - - if (!screen) - return NULL; - - screen->winsys = winsys; - screen->base.winsys = NULL; - - screen->base.destroy = cell_destroy_screen; - - screen->base.get_name = cell_get_name; - screen->base.get_vendor = cell_get_vendor; - screen->base.get_param = cell_get_param; - screen->base.get_shader_param = cell_get_shader_param; - screen->base.get_paramf = cell_get_paramf; - screen->base.is_format_supported = cell_is_format_supported; - screen->base.context_create = cell_create_context; - - cell_init_screen_texture_funcs(&screen->base); - - return &screen->base; -} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h deleted file mode 100644 index baff9d3b7d4..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_screen.h +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_SCREEN_H -#define CELL_SCREEN_H - - -#include "pipe/p_screen.h" - -struct sw_winsys; - -struct cell_screen { - struct pipe_screen base; - - struct sw_winsys *winsys; - - /* Increments whenever textures are modified. Contexts can track - * this. - */ - unsigned timestamp; -}; - -static INLINE struct cell_screen * -cell_screen( struct pipe_screen *pipe ) -{ - return (struct cell_screen *)pipe; -} - - -#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c deleted file mode 100644 index 39284f3a5d1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ /dev/null @@ -1,219 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * Utility/wrappers for communicating with the SPUs. - */ - - -#include - -#include "cell_spu.h" -#include "pipe/p_format.h" -#include "pipe/p_state.h" -#include "util/u_memory.h" -#include "cell/common.h" - - -/* -helpful headers: -/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h -*/ - - -/** - * Cell/SPU info that's not per-context. - */ -struct cell_global_info cell_global; - - -/** - * Scan /proc/cpuinfo to determine the timebase for the system. - * This is used by the SPUs to convert 'decrementer' ticks to seconds. - * There may be a better way to get this value... - */ -static unsigned -get_timebase(void) -{ - FILE *f = fopen("/proc/cpuinfo", "r"); - unsigned timebase; - - assert(f); - while (!feof(f)) { - char line[80]; - fgets(line, sizeof(line), f); - if (strncmp(line, "timebase", 8) == 0) { - char *colon = strchr(line, ':'); - if (colon) { - timebase = atoi(colon + 2); - break; - } - } - } - fclose(f); - - return timebase; -} - - -/** - * Write a 1-word message to the given SPE mailbox. - */ -void -send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) -{ - spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); -} - - -/** - * Wait for a 1-word message to arrive in given mailbox. - */ -uint -wait_mbox_message(spe_context_ptr_t ctx) -{ - do { - unsigned data; - int count = spe_out_mbox_read(ctx, &data, 1); - - if (count == 1) { - return data; - } - - if (count < 0) { - /* error */ ; - } - } while (1); -} - - -/** - * Called by pthread_create() to spawn an SPU thread. - */ -static void * -cell_thread_function(void *arg) -{ - struct cell_init_info *init = (struct cell_init_info *) arg; - unsigned entry = SPE_DEFAULT_ENTRY; - - ASSERT_ALIGN16(init); - - if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, - init, NULL, NULL) < 0) { - fprintf(stderr, "spe_context_run() failed\n"); - exit(1); - } - - pthread_exit(NULL); -} - - -/** - * Create the SPU threads. This is done once during driver initialization. - * This involves setting the "init" message which is sent to each SPU. - * The init message specifies an SPU id, total number of SPUs, location - * and number of batch buffers, etc. - */ -void -cell_start_spus(struct cell_context *cell) -{ - static boolean one_time_init = FALSE; - uint i, j; - uint timebase = get_timebase(); - - if (one_time_init) { - fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " - "on Cell.\n"); - abort(); - } - - one_time_init = TRUE; - - assert(cell->num_spus <= CELL_MAX_SPUS); - - ASSERT_ALIGN16(&cell_global.inits[0]); - ASSERT_ALIGN16(&cell_global.inits[1]); - - /* - * Initialize the global 'inits' structure for each SPU. - * A pointer to the init struct will be passed to each SPU. - * The SPUs will then each grab their init info with mfc_get(). - */ - for (i = 0; i < cell->num_spus; i++) { - cell_global.inits[i].id = i; - cell_global.inits[i].num_spus = cell->num_spus; - cell_global.inits[i].debug_flags = cell->debug_flags; - cell_global.inits[i].inv_timebase = 1000.0f / timebase; - - for (j = 0; j < CELL_NUM_BUFFERS; j++) { - cell_global.inits[i].buffers[j] = cell->buffer[j]; - } - cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; - - cell_global.inits[i].spu_functions = &cell->spu_functions; - - cell_global.spe_contexts[i] = spe_context_create(0, NULL); - if (!cell_global.spe_contexts[i]) { - fprintf(stderr, "spe_context_create() failed\n"); - exit(1); - } - - if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { - fprintf(stderr, "spe_program_load() failed\n"); - exit(1); - } - - pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ - NULL, /* pthread attribs */ - &cell_thread_function, /* start routine */ - &cell_global.inits[i]); /* thread argument */ - } -} - - -/** - * Tell all the SPUs to stop/exit. - * This is done when the driver's exiting / cleaning up. - */ -void -cell_spu_exit(struct cell_context *cell) -{ - uint i; - - for (i = 0; i < cell->num_spus; i++) { - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); - } - - /* wait for threads to exit */ - for (i = 0; i < cell->num_spus; i++) { - void *value; - pthread_join(cell_global.spe_threads[i], &value); - cell_global.spe_threads[i] = 0; - cell_global.spe_contexts[i] = 0; - } -} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h deleted file mode 100644 index c93958a9ed5..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ /dev/null @@ -1,79 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_SPU -#define CELL_SPU - - -#include -#include -#include "cell/common.h" - -#include "cell_context.h" - - -/** - * Global vars, for now anyway. - */ -struct cell_global_info -{ - /** - * SPU/SPE handles, etc - */ - spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; - pthread_t spe_threads[CELL_MAX_SPUS]; - - /** - * Data sent to SPUs at start-up - */ - struct cell_init_info inits[CELL_MAX_SPUS]; -}; - - -extern struct cell_global_info cell_global; - - -/** This is the handle for the actual SPE code */ -extern spe_program_handle_t g3d_spu; - - -extern void -send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); - -extern uint -wait_mbox_message(spe_context_ptr_t ctx); - - -extern void -cell_start_spus(struct cell_context *cell); - - -extern void -cell_spu_exit(struct cell_context *cell); - - -#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h deleted file mode 100644 index 7adedcde57c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ /dev/null @@ -1,65 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef CELL_STATE_H -#define CELL_STATE_H - - -#define CELL_NEW_VIEWPORT 0x1 -#define CELL_NEW_RASTERIZER 0x2 -#define CELL_NEW_FS 0x4 -#define CELL_NEW_BLEND 0x8 -#define CELL_NEW_CLIP 0x10 -#define CELL_NEW_SCISSOR 0x20 -#define CELL_NEW_STIPPLE 0x40 -#define CELL_NEW_FRAMEBUFFER 0x80 -#define CELL_NEW_ALPHA_TEST 0x100 -#define CELL_NEW_DEPTH_STENCIL 0x200 -#define CELL_NEW_SAMPLER 0x400 -#define CELL_NEW_TEXTURE 0x800 -#define CELL_NEW_VERTEX 0x1000 -#define CELL_NEW_VS 0x2000 -#define CELL_NEW_VS_CONSTANTS 0x4000 -#define CELL_NEW_FS_CONSTANTS 0x8000 -#define CELL_NEW_VERTEX_INFO 0x10000 - - -extern void -cell_update_derived( struct cell_context *cell ); - - -extern void -cell_init_shader_functions(struct cell_context *cell); - - -extern void -cell_init_vertex_functions(struct cell_context *cell); - - -#endif /* CELL_STATE_H */ - diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c deleted file mode 100644 index b723e794e71..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ /dev/null @@ -1,170 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_memory.h" -#include "pipe/p_shader_tokens.h" -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "cell_context.h" -#include "cell_batch.h" -#include "cell_state.h" -#include "cell_state_emit.h" - - -/** - * Determine how to map vertex program outputs to fragment program inputs. - * Basically, this will be used when computing the triangle interpolation - * coefficients from the post-transform vertex attributes. - */ -static void -calculate_vertex_layout( struct cell_context *cell ) -{ - const struct cell_fragment_shader_state *fs = cell->fs; - const enum interp_mode colorInterp - = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; - struct vertex_info *vinfo = &cell->vertex_info; - uint i; - int src; - -#if 0 - if (cell->vbuf) { - /* if using the post-transform vertex buffer, tell draw_vbuf to - * simply emit the whole post-xform vertex as-is: - */ - struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; - vinfo_vbuf->num_attribs = 0; - draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; - } -#endif - - /* reset vinfo */ - vinfo->num_attribs = 0; - - /* we always want to emit vertex pos */ - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); - - - /* - * Loop over fragment shader inputs, searching for the matching output - * from the vertex shader. - */ - for (i = 0; i < fs->info.num_inputs; i++) { - switch (fs->info.input_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - /* already done above */ - break; - - case TGSI_SEMANTIC_COLOR: - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, - fs->info.input_semantic_index[i]); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); - break; - - case TGSI_SEMANTIC_FOG: - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0); -#if 1 - if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ - src = 0; -#endif - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); - break; - - case TGSI_SEMANTIC_GENERIC: - /* this includes texcoords and varying vars */ - src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC, - fs->info.input_semantic_index[i]); - assert(src >= 0); - draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); - break; - - default: - assert(0); - } - } - - draw_compute_vertex_size(vinfo); - - /* XXX only signal this if format really changes */ - cell->dirty |= CELL_NEW_VERTEX_INFO; -} - - -#if 0 -/** - * Recompute cliprect from scissor bounds, scissor enable and surface size. - */ -static void -compute_cliprect(struct cell_context *sp) -{ - uint surfWidth = sp->framebuffer.width; - uint surfHeight = sp->framebuffer.height; - - if (sp->rasterizer->scissor) { - /* clip to scissor rect */ - sp->cliprect.minx = MAX2(sp->scissor.minx, 0); - sp->cliprect.miny = MAX2(sp->scissor.miny, 0); - sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); - sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); - } - else { - /* clip to surface bounds */ - sp->cliprect.minx = 0; - sp->cliprect.miny = 0; - sp->cliprect.maxx = surfWidth; - sp->cliprect.maxy = surfHeight; - } -} -#endif - - - -/** - * Update derived state, send current state to SPUs prior to rendering. - */ -void cell_update_derived( struct cell_context *cell ) -{ - if (cell->dirty & (CELL_NEW_RASTERIZER | - CELL_NEW_FS | - CELL_NEW_VS)) - calculate_vertex_layout( cell ); - -#if 0 - if (cell->dirty & (CELL_NEW_SCISSOR | - CELL_NEW_DEPTH_STENCIL_ALPHA | - CELL_NEW_FRAMEBUFFER)) - compute_cliprect(cell); -#endif - - cell_emit_state(cell); - - cell->dirty = 0; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c deleted file mode 100644 index bb11c68fa24..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ /dev/null @@ -1,343 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_inlines.h" -#include "util/u_memory.h" -#include "util/u_math.h" -#include "util/u_format.h" -#include "cell_context.h" -#include "cell_gen_fragment.h" -#include "cell_state.h" -#include "cell_state_emit.h" -#include "cell_batch.h" -#include "cell_texture.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" - - -/** - * Find/create a cell_command_fragment_ops object corresponding to the - * current blend/stencil/z/colormask/etc. state. - */ -static struct cell_command_fragment_ops * -lookup_fragment_ops(struct cell_context *cell) -{ - struct cell_fragment_ops_key key; - struct cell_command_fragment_ops *ops; - - /* - * Build key - */ - memset(&key, 0, sizeof(key)); - key.blend = *cell->blend; - key.blend_color = cell->blend_color; - key.dsa = *cell->depth_stencil; - - if (cell->framebuffer.cbufs[0]) - key.color_format = cell->framebuffer.cbufs[0]->format; - else - key.color_format = PIPE_FORMAT_NONE; - - if (cell->framebuffer.zsbuf) - key.zs_format = cell->framebuffer.zsbuf->format; - else - key.zs_format = PIPE_FORMAT_NONE; - - /* - * Look up key in cache. - */ - ops = (struct cell_command_fragment_ops *) - util_keymap_lookup(cell->fragment_ops_cache, &key); - - /* - * If not found, create/save new fragment ops command. - */ - if (!ops) { - struct spe_function spe_code_front, spe_code_back; - unsigned int facing_dependent, total_code_size; - - if (0) - debug_printf("**** Create New Fragment Ops\n"); - - /* Prepare the buffer that will hold the generated code. The - * "0" passed in for the size means that the SPE code will - * use a default size. - */ - spe_init_func(&spe_code_front, 0); - spe_init_func(&spe_code_back, 0); - - /* Generate new code. Always generate new code for both front-facing - * and back-facing fragments, even if it's the same code in both - * cases. - */ - cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); - cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); - - /* Make sure the code is a multiple of 8 bytes long; this is - * required to ensure that the dual pipe instruction alignment - * is correct. It's also important for the SPU unpacking, - * which assumes 8-byte boundaries. - */ - unsigned int front_code_size = spe_code_size(&spe_code_front); - while (front_code_size % 8 != 0) { - spe_lnop(&spe_code_front); - front_code_size = spe_code_size(&spe_code_front); - } - unsigned int back_code_size = spe_code_size(&spe_code_back); - while (back_code_size % 8 != 0) { - spe_lnop(&spe_code_back); - back_code_size = spe_code_size(&spe_code_back); - } - - /* Determine whether the code we generated is facing-dependent, by - * determining whether the generated code is different for the front- - * and back-facing fragments. - */ - if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { - /* Code is identical; only need one copy. */ - facing_dependent = 0; - total_code_size = front_code_size; - } - else { - /* Code is different for front-facing and back-facing fragments. - * Need to send both copies. - */ - facing_dependent = 1; - total_code_size = front_code_size + back_code_size; - } - - /* alloc new fragment ops command. Note that this structure - * has variant length based on the total code size required. - */ - ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); - /* populate the new cell_command_fragment_ops object */ - ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; - ops->total_code_size = total_code_size; - ops->front_code_index = 0; - memcpy(ops->code, spe_code_front.store, front_code_size); - if (facing_dependent) { - /* We have separate front- and back-facing code. Append the - * back-facing code to the buffer. Be careful because the code - * size is in bytes, but the buffer is of unsigned elements. - */ - ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); - memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); - } - else { - /* Use the same code for front- and back-facing fragments */ - ops->back_code_index = ops->front_code_index; - } - - /* Set the fields for the fallback case. Note that these fields - * (and the whole fallback case) will eventually go away. - */ - ops->dsa = *cell->depth_stencil; - ops->blend = *cell->blend; - ops->blend_color = cell->blend_color; - - /* insert cell_command_fragment_ops object into keymap/cache */ - util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); - - /* release rtasm buffer */ - spe_release_func(&spe_code_front); - spe_release_func(&spe_code_back); - } - else { - if (0) - debug_printf("**** Re-use Fragment Ops\n"); - } - - return ops; -} - - - -static void -emit_state_cmd(struct cell_context *cell, uint cmd, - const void *state, uint state_size) -{ - uint32_t *dst = (uint32_t *) - cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); - *dst = cmd; - memcpy(dst + 4, state, state_size); -} - - -/** - * For state marked as 'dirty', construct a state-update command block - * and insert it into the current batch buffer. - */ -void -cell_emit_state(struct cell_context *cell) -{ - if (cell->dirty & CELL_NEW_FRAMEBUFFER) { - struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; - struct pipe_surface *zbuf = cell->framebuffer.zsbuf; - STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); - struct cell_command_framebuffer *fb - = cell_batch_alloc16(cell, sizeof(*fb)); - fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; - fb->color_start = cell->cbuf_map[0]; - fb->color_format = cbuf->format; - fb->depth_start = cell->zsbuf_map; - fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; - fb->width = cell->framebuffer.width; - fb->height = cell->framebuffer.height; -#if 0 - printf("EMIT color format %s\n", util_format_name(fb->color_format)); - printf("EMIT depth format %s\n", util_format_name(fb->depth_format)); -#endif - } - - if (cell->dirty & (CELL_NEW_RASTERIZER)) { - STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); - struct cell_command_rasterizer *rast = - cell_batch_alloc16(cell, sizeof(*rast)); - rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; - rast->rasterizer = *cell->rasterizer; - } - - if (cell->dirty & (CELL_NEW_FS)) { - /* Send new fragment program to SPUs */ - STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); - struct cell_command_fragment_program *fp - = cell_batch_alloc16(cell, sizeof(*fp)); - fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; - fp->num_inst = cell->fs->code.num_inst; - memcpy(&fp->code, cell->fs->code.store, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); - if (0) { - int i; - printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); - for (i = 0; i < fp->num_inst; i++) { - printf(" %3d: 0x%08x\n", i, fp->code[i]); - } - } - } - - if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { - const uint shader = PIPE_SHADER_FRAGMENT; - const uint num_const = cell->constants[shader]->width0 / sizeof(float); - uint i, j; - float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); - uint32_t *ibuf = (uint32_t *) buf; - const float *constants = cell->mapped_constants[shader]; - ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; - ibuf[4] = num_const; - j = 8; - for (i = 0; i < num_const; i++) { - buf[j++] = constants[i]; - } - } - - if (cell->dirty & (CELL_NEW_FRAMEBUFFER | - CELL_NEW_DEPTH_STENCIL | - CELL_NEW_BLEND)) { - struct cell_command_fragment_ops *fops, *fops_cmd; - /* Note that cell_command_fragment_ops is a variant-sized record */ - fops = lookup_fragment_ops(cell); - fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); - memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); - } - - if (cell->dirty & CELL_NEW_SAMPLER) { - uint i; - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - if (cell->dirty_samplers & (1 << i)) { - if (cell->sampler[i]) { - STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); - struct cell_command_sampler *sampler - = cell_batch_alloc16(cell, sizeof(*sampler)); - sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; - sampler->unit = i; - sampler->state = *cell->sampler[i]; - } - } - } - cell->dirty_samplers = 0x0; - } - - if (cell->dirty & CELL_NEW_TEXTURE) { - uint i; - for (i = 0;i < CELL_MAX_SAMPLERS; i++) { - if (cell->dirty_textures & (1 << i)) { - STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); - struct cell_command_texture *texture = - (struct cell_command_texture *) - cell_batch_alloc16(cell, sizeof(*texture)); - - texture->opcode[0] = CELL_CMD_STATE_TEXTURE; - texture->unit = i; - if (cell->texture[i]) { - struct cell_resource *ct = cell->texture[i]; - uint level; - for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = (ct->mapped + - ct->level_offset[level]); - texture->width[level] = u_minify(ct->base.width0, level); - texture->height[level] = u_minify(ct->base.height0, level); - texture->depth[level] = u_minify(ct->base.depth0, level); - } - texture->target = ct->base.target; - } - else { - uint level; - for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { - texture->start[level] = NULL; - texture->width[level] = 0; - texture->height[level] = 0; - texture->depth[level] = 0; - } - texture->target = 0; - } - } - } - cell->dirty_textures = 0x0; - } - - if (cell->dirty & CELL_NEW_VERTEX_INFO) { - emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, - &cell->vertex_info, sizeof(struct vertex_info)); - } - -#if 0 - if (cell->dirty & CELL_NEW_VS) { - const struct draw_context *const draw = cell->draw; - struct cell_shader_info info; - - info.num_outputs = draw_num_shader_outputs(draw); - info.declarations = (uintptr_t) draw->vs.machine.Declarations; - info.num_declarations = draw->vs.machine.NumDeclarations; - info.instructions = (uintptr_t) draw->vs.machine.Instructions; - info.num_instructions = draw->vs.machine.NumInstructions; - info.immediates = (uintptr_t) draw->vs.machine.Imms; - info.num_immediates = draw->vs.machine.ImmLimit / 4; - - emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); - } -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h deleted file mode 100644 index 59f8affe8d3..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.h +++ /dev/null @@ -1,36 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_STATE_EMIT_H -#define CELL_STATE_EMIT_H - - -extern void -cell_emit_state(struct cell_context *cell); - - -#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c deleted file mode 100644 index dc33e7ccc2c..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ /dev/null @@ -1,1432 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file - * Generate code to perform all per-fragment operations. - * - * Code generated by these functions perform both alpha, depth, and stencil - * testing as well as alpha blending. - * - * \note - * Occlusion query is not supported, but this is the right place to add that - * support. - * - * \author Ian Romanick - */ - -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "cell_context.h" - -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Generate code to perform alpha testing. - * - * The code generated by this function uses the register specificed by - * \c mask as both an input and an output. - * - * \param dsa Current alpha-test state - * \param f Function to which code should be appended - * \param mask Index of register containing active fragment mask - * \param alphas Index of register containing per-fragment alpha values - * - * \note Emits a maximum of 6 instructions. - */ -static void -emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int alphas) -{ - /* If the alpha function is either NEVER or ALWAYS, there is no need to - * load the reference value into a register. ALWAYS is a fairly common - * case, and this optimization saves 2 instructions. - */ - if (dsa->alpha.enabled - && (dsa->alpha.func != PIPE_FUNC_NEVER) - && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { - int ref = spe_allocate_available_register(f); - int tmp_a = spe_allocate_available_register(f); - int tmp_b = spe_allocate_available_register(f); - union { - float f; - unsigned u; - } ref_val; - boolean complement = FALSE; - - ref_val.f = dsa->alpha.ref; - - spe_il(f, ref, ref_val.u & 0x0000ffff); - spe_ilh(f, ref, ref_val.u >> 16); - - switch (dsa->alpha.func) { - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_EQUAL: - spe_fceq(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GREATER: - spe_fcgt(f, tmp_a, ref, alphas); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - - case PIPE_FUNC_GEQUAL: - spe_fcgt(f, tmp_a, ref, alphas); - spe_fceq(f, tmp_b, ref, alphas); - spe_or(f, tmp_a, tmp_b, tmp_a); - break; - - case PIPE_FUNC_ALWAYS: - case PIPE_FUNC_NEVER: - default: - assert(0); - break; - } - - if (complement) { - spe_andc(f, mask, mask, tmp_a); - } else { - spe_and(f, mask, mask, tmp_a); - } - - spe_release_register(f, ref); - spe_release_register(f, tmp_a); - spe_release_register(f, tmp_b); - } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { - spe_il(f, mask, 0); - } -} - - -/** - * Generate code to perform Z testing. Four Z values are tested at once. - * \param dsa Current depth-test state - * \param f Function to which code should be appended - * \param mask Index of register to contain depth-pass mask - * \param stored Index of register containing values from depth buffer - * \param calculated Index of register containing per-fragment depth values - * - * \return - * If the calculated depth comparison mask is the actual mask, \c FALSE is - * returned. If the calculated depth comparison mask is the compliment of - * the actual mask, \c TRUE is returned. - * - * \note Emits a maximum of 3 instructions. - */ -static boolean -emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, int mask, int stored, int calculated) -{ - unsigned func = (dsa->depth.enabled) - ? dsa->depth.func : PIPE_FUNC_ALWAYS; - int tmp = spe_allocate_available_register(f); - boolean compliment = FALSE; - - switch (func) { - case PIPE_FUNC_NEVER: - spe_il(f, mask, 0); - break; - - case PIPE_FUNC_NOTEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - spe_ceq(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LEQUAL: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - spe_clgt(f, mask, calculated, stored); - break; - - case PIPE_FUNC_LESS: - compliment = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - spe_clgt(f, mask, calculated, stored); - spe_ceq(f, tmp, calculated, stored); - spe_or(f, mask, mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - spe_il(f, mask, ~0); - break; - - default: - assert(0); - break; - } - - spe_release_register(f, tmp); - return compliment; -} - - -/** - * Generate code to apply the stencil operation (after testing). - * \note Emits a maximum of 5 instructions. - * - * \warning - * Since \c out and \c in might be the same register, this routine cannot - * generate code that uses \c out as a temporary. - */ -static void -emit_stencil_op(struct spe_function *f, - int out, int in, int mask, unsigned op, unsigned ref) -{ - const int clamp = spe_allocate_available_register(f); - const int clamp_mask = spe_allocate_available_register(f); - const int result = spe_allocate_available_register(f); - - switch(op) { - case PIPE_STENCIL_OP_KEEP: - assert(0); - case PIPE_STENCIL_OP_ZERO: - spe_il(f, result, 0); - break; - case PIPE_STENCIL_OP_REPLACE: - spe_il(f, result, ref); - break; - case PIPE_STENCIL_OP_INCR: - /* clamp = [0xff, 0xff, 0xff, 0xff] */ - spe_il(f, clamp, 0x0ff); - /* result[i] = in[i] + 1 */ - spe_ai(f, result, in, 1); - /* clamp_mask[i] = (result[i] > 0xff) */ - spe_clgti(f, clamp_mask, result, 0x0ff); - /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_DECR: - spe_il(f, clamp, 0); - spe_ai(f, result, in, -1); - - /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned - * arithmetic. - */ - spe_clgti(f, clamp_mask, result, 0x0ff); - spe_selb(f, result, result, clamp, clamp_mask); - break; - case PIPE_STENCIL_OP_INCR_WRAP: - spe_ai(f, result, in, 1); - break; - case PIPE_STENCIL_OP_DECR_WRAP: - spe_ai(f, result, in, -1); - break; - case PIPE_STENCIL_OP_INVERT: - spe_nor(f, result, in, in); - break; - default: - assert(0); - } - - spe_selb(f, out, in, result, mask); - - spe_release_register(f, result); - spe_release_register(f, clamp_mask); - spe_release_register(f, clamp); -} - - -/** - * Generate code to do stencil test. Four pixels are tested at once. - * \param dsa Depth / stencil test state - * \param face 0 for front face, 1 for back face - * \param f Function to append instructions to - * \param mask Register containing mask of fragments passing the - * alpha test - * \param depth_mask Register containing mask of fragments passing the - * depth test - * \param depth_compliment Is \c depth_mask the compliment of the actual mask? - * \param stencil Register containing values from stencil buffer - * \param depth_pass Register to store mask of fragments passing stencil test - * and depth test - * - * \note - * Emits a maximum of 10 + (3 * 5) = 25 instructions. - */ -static int -emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, - struct pipe_stencil_ref *sr, - unsigned face, - struct spe_function *f, - int mask, - int depth_mask, - boolean depth_complement, - int stencil, - int depth_pass) -{ - int stencil_fail = spe_allocate_available_register(f); - int depth_fail = spe_allocate_available_register(f); - int stencil_mask = spe_allocate_available_register(f); - int stencil_pass = spe_allocate_available_register(f); - int face_stencil = spe_allocate_available_register(f); - int stencil_src = stencil; - const unsigned ref = (sr->ref_value[face] - & dsa->stencil[face].valuemask); - boolean complement = FALSE; - int stored; - int tmp = spe_allocate_available_register(f); - - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].valuemask != 0x0ff)) { - stored = spe_allocate_available_register(f); - spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); - } else { - stored = stencil; - } - - - switch (dsa->stencil[face].func) { - case PIPE_FUNC_NEVER: - spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ - break; - - case PIPE_FUNC_NOTEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_EQUAL: - /* stencil_mask[i] = (stored[i] == ref) */ - spe_ceqi(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LEQUAL: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GREATER: - complement = TRUE; - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - break; - - case PIPE_FUNC_LESS: - complement = TRUE; - /* FALLTHROUGH */ - case PIPE_FUNC_GEQUAL: - /* stencil_mask[i] = (stored[i] > ref) */ - spe_clgti(f, stencil_mask, stored, ref); - /* tmp[i] = (stored[i] == ref) */ - spe_ceqi(f, tmp, stored, ref); - /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ - spe_or(f, stencil_mask, stencil_mask, tmp); - break; - - case PIPE_FUNC_ALWAYS: - /* See comment below. */ - break; - - default: - assert(0); - break; - } - - if (stored != stencil) { - spe_release_register(f, stored); - } - spe_release_register(f, tmp); - - - /* ALWAYS is a very common stencil-test, so some effort is applied to - * optimize that case. The stencil-pass mask is the same as the input - * fragment mask. This makes the stencil-test (above) a no-op, and the - * input fragment mask can be "renamed" the stencil-pass mask. - */ - if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { - spe_release_register(f, stencil_pass); - stencil_pass = mask; - } else { - if (complement) { - spe_andc(f, stencil_pass, mask, stencil_mask); - } else { - spe_and(f, stencil_pass, mask, stencil_mask); - } - } - - if (depth_complement) { - spe_andc(f, depth_pass, stencil_pass, depth_mask); - } else { - spe_and(f, depth_pass, stencil_pass, depth_mask); - } - - - /* Conditionally emit code to update the stencil value under various - * condititons. Note that there is no need to generate code under the - * following circumstances: - * - * - Stencil write mask is zero. - * - For stencil-fail if the stencil test is ALWAYS - * - For depth-fail if the stencil test is NEVER - * - For depth-pass if the stencil test is NEVER - * - Any of the 3 conditions if the operation is KEEP - */ - if (dsa->stencil[face].writemask != 0) { - if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { - if (complement) { - spe_and(f, stencil_fail, mask, stencil_mask); - } else { - spe_andc(f, stencil_fail, mask, stencil_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, - dsa->stencil[face].fail_op, - sr->ref_value[face]); - - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { - if (depth_complement) { - spe_and(f, depth_fail, stencil_pass, depth_mask); - } else { - spe_andc(f, depth_fail, stencil_pass, depth_mask); - } - - emit_stencil_op(f, face_stencil, stencil_src, depth_fail, - dsa->stencil[face].zfail_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - - if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) - && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { - emit_stencil_op(f, face_stencil, stencil_src, depth_pass, - dsa->stencil[face].zpass_op, - sr->ref_value[face]); - stencil_src = face_stencil; - } - } - - spe_release_register(f, stencil_fail); - spe_release_register(f, depth_fail); - spe_release_register(f, stencil_mask); - if (stencil_pass != mask) { - spe_release_register(f, stencil_pass); - } - - /* If all of the stencil operations were KEEP or the stencil write mask was - * zero, "stencil_src" will still be set to "stencil". In this case - * release the "face_stencil" register. Otherwise apply the stencil write - * mask to select bits from the calculated stencil value and the previous - * stencil value. - */ - if (stencil_src == stencil) { - spe_release_register(f, face_stencil); - } else if (dsa->stencil[face].writemask != 0x0ff) { - int tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, dsa->stencil[face].writemask); - spe_selb(f, stencil_src, stencil, stencil_src, tmp); - - spe_release_register(f, tmp); - } - - return stencil_src; -} - - -void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa, - struct pipe_stencil_ref *sr) -{ - struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; - struct spe_function *const f = &cdsa->code; - - /* This code generates a maximum of 6 (alpha test) + 3 (depth test) - * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round - * up to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Allocate registers for the function's input parameters. Cleverly (and - * clever code is usually dangerous, but I couldn't resist) the generated - * function returns a structure. Returned structures start with register - * 3, and the structure fields are ordered to match up exactly with the - * input parameters. - */ - int mask = spe_allocate_register(f, 3); - int depth = spe_allocate_register(f, 4); - int stencil = spe_allocate_register(f, 5); - int zvals = spe_allocate_register(f, 6); - int frag_a = spe_allocate_register(f, 7); - int facing = spe_allocate_register(f, 8); - - int depth_mask = spe_allocate_available_register(f); - - boolean depth_complement; - - - emit_alpha_test(dsa, f, mask, frag_a); - - depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); - - if (dsa->stencil[0].enabled) { - const int front_depth_pass = spe_allocate_available_register(f); - int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask, - depth_mask, depth_complement, - stencil, front_depth_pass); - - if (dsa->stencil[1].enabled) { - const int back_depth_pass = spe_allocate_available_register(f); - int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask, - depth_mask, depth_complement, - stencil, back_depth_pass); - - /* If the front facing stencil value and the back facing stencil - * value are stored in the same register, there is no need to select - * a value based on the facing. This can happen if the stencil value - * was not modified due to the write masks being zero, the stencil - * operations being KEEP, etc. - */ - if (front_stencil != back_stencil) { - spe_selb(f, stencil, back_stencil, front_stencil, facing); - } - - if (back_stencil != stencil) { - spe_release_register(f, back_stencil); - } - - if (front_stencil != stencil) { - spe_release_register(f, front_stencil); - } - - spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); - - spe_release_register(f, back_depth_pass); - } else { - if (front_stencil != stencil) { - spe_or(f, stencil, front_stencil, front_stencil); - spe_release_register(f, front_stencil); - } - spe_or(f, mask, front_depth_pass, front_depth_pass); - } - - spe_release_register(f, front_depth_pass); - } else if (dsa->depth.enabled) { - if (depth_complement) { - spe_andc(f, mask, mask, depth_mask); - } else { - spe_and(f, mask, mask, depth_mask); - } - } - - if (dsa->depth.writemask) { - spe_selb(f, depth, depth, zvals, mask); - } - - spe_bi(f, 0, 0, 0); /* return from function call */ - - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# alpha (%sabled)\n", - (dsa->alpha.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->alpha.func); - printf("# ref: %.2f\n", dsa->alpha.ref); - - printf("# depth (%sabled)\n", - (dsa->depth.enabled) ? "en" : "dis"); - printf("# func: %u\n", dsa->depth.func); - - for (i = 0; i < 2; i++) { - printf("# %s stencil (%sabled)\n", - (i == 0) ? "front" : "back", - (dsa->stencil[i].enabled) ? "en" : "dis"); - - printf("# func: %u\n", dsa->stencil[i].func); - printf("# op (sf, zf, zp): %u %u %u\n", - dsa->stencil[i].fail_op, - dsa->stencil[i].zfail_op, - dsa->stencil[i].zpass_op); - printf("# ref value / value mask / write mask: %02x %02x %02x\n", - sr->ref_value[i], - dsa->stencil[i].valuemask, - dsa->stencil[i].writemask); - } - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -/** - * \note Emits a maximum of 3 instructions - */ -static int -emit_alpha_factor_calculation(struct spe_function *f, - unsigned factor, - int src_alpha, int dst_alpha, int const_alpha) -{ - int factor_reg; - int tmp; - - - switch (factor) { - case PIPE_BLENDFACTOR_ONE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor_reg = spe_allocate_available_register(f); - - spe_or(f, factor_reg, src_alpha, src_alpha); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor_reg = dst_alpha; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor_reg = spe_allocate_available_register(f); - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, const_alpha); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor_reg = const_alpha; - break; - - case PIPE_BLENDFACTOR_ZERO: - factor_reg = -1; - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, src_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor_reg = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor_reg, tmp, dst_alpha); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - factor_reg = -1; - break; - } - - return factor_reg; -} - - -/** - * \note Emits a maximum of 6 instructions - */ -static void -emit_color_factor_calculation(struct spe_function *f, - unsigned sF, unsigned mask, - const int *src, - const int *dst, - const int *const_color, - int *factor) -{ - int tmp; - unsigned i; - - - factor[0] = -1; - factor[1] = -1; - factor[2] = -1; - factor[3] = -1; - - switch (sF) { - case PIPE_BLENDFACTOR_ONE: - break; - - case PIPE_BLENDFACTOR_SRC_COLOR: - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_or(f, factor[i], src[i], src[i]); - } - } - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_or(f, factor[0], src[3], src[3]); - break; - - case PIPE_BLENDFACTOR_DST_ALPHA: - factor[0] = dst[3]; - factor[1] = dst[3]; - factor[2] = dst[3]; - break; - - case PIPE_BLENDFACTOR_DST_COLOR: - factor[0] = dst[0]; - factor[1] = dst[1]; - factor[2] = dst[2]; - break; - - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - /* Alpha saturate means min(As, 1-Ad). - */ - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, tmp, tmp, dst[3]); - spe_fcgt(f, factor[0], tmp, src[3]); - spe_selb(f, factor[0], src[3], tmp, factor[0]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_CONST_COLOR: - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; i++) { - factor[i] = spe_allocate_available_register(f); - - spe_fs(f, factor[i], tmp, const_color[i]); - } - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_COLOR: - for (i = 0; i < 3; i++) { - factor[i] = const_color[i]; - } - break; - - case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - tmp = spe_allocate_available_register(f); - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, const_color[3]); - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_CONST_ALPHA: - factor[0] = const_color[3]; - factor[1] = factor[0]; - factor[2] = factor[0]; - break; - - case PIPE_BLENDFACTOR_ZERO: - break; - - case PIPE_BLENDFACTOR_INV_SRC_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, src[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, src[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_ALPHA: - tmp = spe_allocate_available_register(f); - factor[0] = spe_allocate_available_register(f); - factor[1] = factor[0]; - factor[2] = factor[0]; - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - spe_fs(f, factor[0], tmp, dst[3]); - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_INV_DST_COLOR: - tmp = spe_allocate_available_register(f); - - spe_il(f, tmp, 1); - spe_cuflt(f, tmp, tmp, 0); - - for (i = 0; i < 3; ++i) { - if ((mask & (1U << i)) != 0) { - factor[i] = spe_allocate_available_register(f); - spe_fs(f, factor[i], tmp, dst[i]); - } - } - - spe_release_register(f, tmp); - break; - - case PIPE_BLENDFACTOR_SRC1_COLOR: - case PIPE_BLENDFACTOR_SRC1_ALPHA: - case PIPE_BLENDFACTOR_INV_SRC1_COLOR: - case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: - default: - assert(0); - } -} - - -static void -emit_blend_calculation(struct spe_function *f, - unsigned func, unsigned sF, unsigned dF, - int src, int src_factor, int dst, int dst_factor) -{ - int tmp = spe_allocate_available_register(f); - - switch (func) { - case PIPE_BLEND_ADD: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fa(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fma(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - /* Do nothing. */ - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, src, dst); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, dst, dst_factor); - spe_fms(f, src, src, src_factor, tmp); - } - break; - - case PIPE_BLEND_REVERSE_SUBTRACT: - if (sF == PIPE_BLENDFACTOR_ONE) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, tmp, 0); - spe_fs(f, src, tmp, src); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_fs(f, src, dst, src); - } - } else if (sF == PIPE_BLENDFACTOR_ZERO) { - if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_il(f, src, 0); - } else if (dF == PIPE_BLENDFACTOR_ONE) { - spe_or(f, src, dst, dst); - } else { - spe_fm(f, src, dst, dst_factor); - } - } else if (dF == PIPE_BLENDFACTOR_ZERO) { - spe_fm(f, src, src, src_factor); - } else { - spe_fm(f, tmp, src, src_factor); - spe_fms(f, src, src, dst_factor, tmp); - } - break; - - case PIPE_BLEND_MIN: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, src, dst, tmp); - break; - - case PIPE_BLEND_MAX: - spe_cgt(f, tmp, src, dst); - spe_selb(f, src, dst, src, tmp); - break; - - default: - assert(0); - } - - spe_release_register(f, tmp); -} - - -/** - * Generate code to perform alpha blending on the SPE - */ -void -cell_generate_alpha_blend(struct cell_blend_state *cb) -{ - struct pipe_blend_state *const b = &cb->base; - struct spe_function *const f = &cb->code; - - /* This code generates a maximum of 3 (source alpha factor) - * + 3 (destination alpha factor) + (3 * 6) (source color factor) - * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) - * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to - * make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - const int frag[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - const int pixel[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - const int const_color[4] = { - spe_allocate_register(f, 11), - spe_allocate_register(f, 12), - spe_allocate_register(f, 13), - spe_allocate_register(f, 14), - }; - unsigned func[4]; - unsigned sF[4]; - unsigned dF[4]; - unsigned i; - int src_factor[4]; - int dst_factor[4]; - - - /* Does the selected blend mode make use of the source / destination - * color (RGB) blend factors? - */ - boolean need_color_factor = b->rt[0].blend_enable - && (b->rt[0].rgb_func != PIPE_BLEND_MIN) - && (b->rt[0].rgb_func != PIPE_BLEND_MAX); - - /* Does the selected blend mode make use of the source / destination - * alpha blend factors? - */ - boolean need_alpha_factor = b->rt[0].blend_enable - && (b->rt[0].alpha_func != PIPE_BLEND_MIN) - && (b->rt[0].alpha_func != PIPE_BLEND_MAX); - - - if (b->rt[0].blend_enable) { - sF[0] = b->rt[0].rgb_src_factor; - sF[1] = sF[0]; - sF[2] = sF[0]; - switch (b->rt[0].alpha_src_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: - sF[3] = PIPE_BLENDFACTOR_ONE; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - sF[3] = b->rt[0].alpha_src_factor + 1; - break; - default: - sF[3] = b->rt[0].alpha_src_factor; - } - - dF[0] = b->rt[0].rgb_dst_factor; - dF[1] = dF[0]; - dF[2] = dF[0]; - switch (b->rt[0].alpha_dst_factor & 0x0f) { - case PIPE_BLENDFACTOR_SRC_COLOR: - case PIPE_BLENDFACTOR_DST_COLOR: - case PIPE_BLENDFACTOR_CONST_COLOR: - case PIPE_BLENDFACTOR_SRC1_COLOR: - dF[3] = b->rt[0].alpha_dst_factor + 1; - break; - default: - dF[3] = b->rt[0].alpha_dst_factor; - } - - func[0] = b->rt[0].rgb_func; - func[1] = func[0]; - func[2] = func[0]; - func[3] = b->rt[0].alpha_func; - } else { - sF[0] = PIPE_BLENDFACTOR_ONE; - sF[1] = PIPE_BLENDFACTOR_ONE; - sF[2] = PIPE_BLENDFACTOR_ONE; - sF[3] = PIPE_BLENDFACTOR_ONE; - dF[0] = PIPE_BLENDFACTOR_ZERO; - dF[1] = PIPE_BLENDFACTOR_ZERO; - dF[2] = PIPE_BLENDFACTOR_ZERO; - dF[3] = PIPE_BLENDFACTOR_ZERO; - - func[0] = PIPE_BLEND_ADD; - func[1] = PIPE_BLEND_ADD; - func[2] = PIPE_BLEND_ADD; - func[3] = PIPE_BLEND_ADD; - } - - - /* If alpha writing is enabled and the alpha blend mode requires use of - * the alpha factor, calculate the alpha factor. - */ - if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) { - src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], - frag[3], pixel[3]); - - /* If the alpha destination blend factor is the same as the alpha source - * blend factor, re-use the previously calculated value. - */ - dst_factor[3] = (dF[3] == sF[3]) - ? src_factor[3] - : emit_alpha_factor_calculation(f, dF[3], const_color[3], - frag[3], pixel[3]); - } - - - if (sF[0] == sF[3]) { - src_factor[0] = src_factor[3]; - src_factor[1] = src_factor[3]; - src_factor[2] = src_factor[3]; - } else if (sF[0] == dF[3]) { - src_factor[0] = dst_factor[3]; - src_factor[1] = dst_factor[3]; - src_factor[2] = dst_factor[3]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_src_factor, - b->rt[0].colormask, - frag, pixel, const_color, src_factor); - } - - - if (dF[0] == sF[3]) { - dst_factor[0] = src_factor[3]; - dst_factor[1] = src_factor[3]; - dst_factor[2] = src_factor[3]; - } else if (dF[0] == dF[3]) { - dst_factor[0] = dst_factor[3]; - dst_factor[1] = dst_factor[3]; - dst_factor[2] = dst_factor[3]; - } else if (dF[0] == sF[0]) { - dst_factor[0] = src_factor[0]; - dst_factor[1] = src_factor[1]; - dst_factor[2] = src_factor[2]; - } else if (need_color_factor) { - emit_color_factor_calculation(f, - b->rt[0].rgb_dst_factor, - b->rt[0].colormask, - frag, pixel, const_color, dst_factor); - } - - - - for (i = 0; i < 4; ++i) { - if ((b->rt[0].colormask & (1U << i)) != 0) { - emit_blend_calculation(f, - func[i], sF[i], dF[i], - frag[i], src_factor[i], - pixel[i], dst_factor[i]); - } - } - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - - printf("# %u instructions\n", f->csr - f->store); - printf("# blend (%sabled)\n", - (cb->base.blend_enable) ? "en" : "dis"); - printf("# RGB func / sf / df: %u %u %u\n", - cb->base.rgb_func, - cb->base.rgb_src_factor, - cb->base.rgb_dst_factor); - printf("# ALP func / sf / df: %u %u %u\n", - cb->base.alpha_func, - cb->base.alpha_src_factor, - cb->base.alpha_dst_factor); - - printf("\t.text\n"); - for (/* empty */; p < f->csr; p++) { - printf("\t.long\t0x%04x\n", *p); - } - fflush(stdout); - } -#endif -} - - -static int -PC_OFFSET(const struct spe_function *f, const void *d) -{ - const intptr_t pc = (intptr_t) &f->store[f->num_inst]; - const intptr_t ea = ~0x0f & (intptr_t) d; - - return (ea - pc) >> 2; -} - - -/** - * Generate code to perform color conversion and logic op - * - * \bug - * The code generated by this function should also perform dithering. - * - * \bug - * The code generated by this function should also perform color-write - * masking. - * - * \bug - * Only two framebuffer formats are supported at this time. - */ -void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf) -{ - const unsigned logic_op = (blend->logicop_enable) - ? blend->logicop_func : PIPE_LOGICOP_COPY; - - /* This code generates a maximum of 37 instructions. An additional 32 - * bytes (equiv. to 8 instructions) are needed for data storage. Round up - * to 64 to make it a happy power-of-two. - */ - spe_init_func(f, SPE_INST_SIZE * 64); - - - /* Pixel colors in framebuffer format in AoS layout. - */ - const int pixel[4] = { - spe_allocate_register(f, 3), - spe_allocate_register(f, 4), - spe_allocate_register(f, 5), - spe_allocate_register(f, 6), - }; - - /* Fragment colors stored as floats in SoA layout. - */ - const int frag[4] = { - spe_allocate_register(f, 7), - spe_allocate_register(f, 8), - spe_allocate_register(f, 9), - spe_allocate_register(f, 10), - }; - - const int mask = spe_allocate_register(f, 11); - - - /* Short-circuit the noop and invert cases. - */ - if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) { - spe_bi(f, 0, 0, 0); - return; - } else if (logic_op == PIPE_LOGICOP_INVERT) { - spe_nor(f, pixel[0], pixel[0], pixel[0]); - spe_nor(f, pixel[1], pixel[1], pixel[1]); - spe_nor(f, pixel[2], pixel[2], pixel[2]); - spe_nor(f, pixel[3], pixel[3], pixel[3]); - spe_bi(f, 0, 0, 0); - return; - } - - - const int tmp[4] = { - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - spe_allocate_available_register(f), - }; - - const int shuf_xpose_hi = spe_allocate_available_register(f); - const int shuf_xpose_lo = spe_allocate_available_register(f); - const int shuf_color = spe_allocate_available_register(f); - - - /* Pointer to the begining of the function's private data area. - */ - uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); - - - /* Convert fragment colors to framebuffer format in AoS layout. - */ - switch (surf->format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - data[0] = 0x00010203; - data[1] = 0x10111213; - data[2] = 0x04050607; - data[3] = 0x14151617; - data[4] = 0x0c000408; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - data[0] = 0x03020100; - data[1] = 0x13121110; - data[2] = 0x07060504; - data[3] = 0x17161514; - data[4] = 0x0804000c; - data[5] = 0x80808080; - data[6] = 0x80808080; - data[7] = 0x80808080; - break; - default: - fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); - ASSERT(0); - } - - spe_ilh(f, tmp[0], 0x0808); - spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); - spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); - spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); - - spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); - spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); - spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); - spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); - - spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); - spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); - spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); - spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); - - spe_cfltu(f, frag[0], frag[0], 32); - spe_cfltu(f, frag[1], frag[1], 32); - spe_cfltu(f, frag[2], frag[2], 32); - spe_cfltu(f, frag[3], frag[3], 32); - - spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); - spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); - spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); - spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); - - - /* If logic op is enabled, perform the requested logical operation on the - * converted fragment colors and the pixel colors. - */ - switch (logic_op) { - case PIPE_LOGICOP_CLEAR: - spe_il(f, frag[0], 0); - spe_il(f, frag[1], 0); - spe_il(f, frag[2], 0); - spe_il(f, frag[3], 0); - break; - case PIPE_LOGICOP_NOR: - spe_nor(f, frag[0], frag[0], pixel[0]); - spe_nor(f, frag[1], frag[1], pixel[1]); - spe_nor(f, frag[2], frag[2], pixel[2]); - spe_nor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND_INVERTED: - spe_andc(f, frag[0], pixel[0], frag[0]); - spe_andc(f, frag[1], pixel[1], frag[1]); - spe_andc(f, frag[2], pixel[2], frag[2]); - spe_andc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY_INVERTED: - spe_nor(f, frag[0], frag[0], frag[0]); - spe_nor(f, frag[1], frag[1], frag[1]); - spe_nor(f, frag[2], frag[2], frag[2]); - spe_nor(f, frag[3], frag[3], frag[3]); - break; - case PIPE_LOGICOP_AND_REVERSE: - spe_andc(f, frag[0], frag[0], pixel[0]); - spe_andc(f, frag[1], frag[1], pixel[1]); - spe_andc(f, frag[2], frag[2], pixel[2]); - spe_andc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_XOR: - spe_xor(f, frag[0], frag[0], pixel[0]); - spe_xor(f, frag[1], frag[1], pixel[1]); - spe_xor(f, frag[2], frag[2], pixel[2]); - spe_xor(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_NAND: - spe_nand(f, frag[0], frag[0], pixel[0]); - spe_nand(f, frag[1], frag[1], pixel[1]); - spe_nand(f, frag[2], frag[2], pixel[2]); - spe_nand(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_AND: - spe_and(f, frag[0], frag[0], pixel[0]); - spe_and(f, frag[1], frag[1], pixel[1]); - spe_and(f, frag[2], frag[2], pixel[2]); - spe_and(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_EQUIV: - spe_eqv(f, frag[0], frag[0], pixel[0]); - spe_eqv(f, frag[1], frag[1], pixel[1]); - spe_eqv(f, frag[2], frag[2], pixel[2]); - spe_eqv(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR_INVERTED: - spe_orc(f, frag[0], pixel[0], frag[0]); - spe_orc(f, frag[1], pixel[1], frag[1]); - spe_orc(f, frag[2], pixel[2], frag[2]); - spe_orc(f, frag[3], pixel[3], frag[3]); - break; - case PIPE_LOGICOP_COPY: - break; - case PIPE_LOGICOP_OR_REVERSE: - spe_orc(f, frag[0], frag[0], pixel[0]); - spe_orc(f, frag[1], frag[1], pixel[1]); - spe_orc(f, frag[2], frag[2], pixel[2]); - spe_orc(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_OR: - spe_or(f, frag[0], frag[0], pixel[0]); - spe_or(f, frag[1], frag[1], pixel[1]); - spe_or(f, frag[2], frag[2], pixel[2]); - spe_or(f, frag[3], frag[3], pixel[3]); - break; - case PIPE_LOGICOP_SET: - spe_il(f, frag[0], ~0); - spe_il(f, frag[1], ~0); - spe_il(f, frag[2], ~0); - spe_il(f, frag[3], ~0); - break; - - /* These two cases are short-circuited above. - */ - case PIPE_LOGICOP_INVERT: - case PIPE_LOGICOP_NOOP: - default: - assert(0); - } - - - /* Apply fragment mask. - */ - spe_ilh(f, tmp[0], 0x0000); - spe_ilh(f, tmp[1], 0x0404); - spe_ilh(f, tmp[2], 0x0808); - spe_ilh(f, tmp[3], 0x0c0c); - - spe_shufb(f, tmp[0], mask, mask, tmp[0]); - spe_shufb(f, tmp[1], mask, mask, tmp[1]); - spe_shufb(f, tmp[2], mask, mask, tmp[2]); - spe_shufb(f, tmp[3], mask, mask, tmp[3]); - - spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); - spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); - spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); - spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); - - spe_bi(f, 0, 0, 0); - -#if 0 - { - const uint32_t *p = f->store; - unsigned i; - - printf("# %u instructions\n", f->csr - f->store); - - printf("\t.text\n"); - for (i = 0; i < 64; i++) { - printf("\t.long\t0x%04x\n", p[i]); - } - fflush(stdout); - } -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h deleted file mode 100644 index a8267a51331..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef CELL_STATE_PER_FRAGMENT_H -#define CELL_STATE_PER_FRAGMENT_H - -extern void -cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); - -extern void -cell_generate_alpha_blend(struct cell_blend_state *cb); - -extern void -cell_generate_logic_op(struct spe_function *f, - const struct pipe_blend_state *blend, - struct pipe_surface *surf); - -#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c deleted file mode 100644 index ddf14772689..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ /dev/null @@ -1,229 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_defines.h" -#include "util/u_memory.h" -#include "util/u_inlines.h" -#include "draw/draw_context.h" -#include "tgsi/tgsi_parse.h" - -#include "cell_context.h" -#include "cell_state.h" -#include "cell_gen_fp.h" -#include "cell_texture.h" - - -/** cast wrapper */ -static INLINE struct cell_fragment_shader_state * -cell_fragment_shader_state(void *shader) -{ - return (struct cell_fragment_shader_state *) shader; -} - - -/** cast wrapper */ -static INLINE struct cell_vertex_shader_state * -cell_vertex_shader_state(void *shader) -{ - return (struct cell_vertex_shader_state *) shader; -} - - -/** - * Create fragment shader state. - * Called via pipe->create_fs_state() - */ -static void * -cell_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_fragment_shader_state *cfs; - - cfs = CALLOC_STRUCT(cell_fragment_shader_state); - if (!cfs) - return NULL; - - cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); - if (!cfs->shader.tokens) { - FREE(cfs); - return NULL; - } - - tgsi_scan_shader(templ->tokens, &cfs->info); - - cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); - - return cfs; -} - - -/** - * Called via pipe->bind_fs_state() - */ -static void -cell_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->fs = cell_fragment_shader_state(fs); - - cell->dirty |= CELL_NEW_FS; -} - - -/** - * Called via pipe->delete_fs_state() - */ -static void -cell_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); - - spe_release_func(&cfs->code); - - FREE((void *) cfs->shader.tokens); - FREE(cfs); -} - - -/** - * Create vertex shader state. - * Called via pipe->create_vs_state() - */ -static void * -cell_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *templ) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *cvs; - - cvs = CALLOC_STRUCT(cell_vertex_shader_state); - if (!cvs) - return NULL; - - cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); - if (!cvs->shader.tokens) { - FREE(cvs); - return NULL; - } - - tgsi_scan_shader(templ->tokens, &cvs->info); - - cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); - if (cvs->draw_data == NULL) { - FREE( (void *) cvs->shader.tokens ); - FREE( cvs ); - return NULL; - } - - return cvs; -} - - -/** - * Called via pipe->bind_vs_state() - */ -static void -cell_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - - cell->vs = cell_vertex_shader_state(vs); - - draw_bind_vertex_shader(cell->draw, - (cell->vs ? cell->vs->draw_data : NULL)); - - cell->dirty |= CELL_NEW_VS; -} - - -/** - * Called via pipe->delete_vs_state() - */ -static void -cell_delete_vs_state(struct pipe_context *pipe, void *vs) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); - - draw_delete_vertex_shader(cell->draw, cvs->draw_data); - FREE( (void *) cvs->shader.tokens ); - FREE( cvs ); -} - - -/** - * Called via pipe->set_constant_buffer() - */ -static void -cell_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - struct pipe_resource *constants) -{ - struct cell_context *cell = cell_context(pipe); - unsigned size = constants ? constants->width0 : 0; - const void *data = constants ? cell_resource(constants)->data : NULL; - - assert(shader < PIPE_SHADER_TYPES); - assert(index == 0); - - if (cell->constants[shader] == constants) - return; - - draw_flush(cell->draw); - - /* note: reference counting */ - pipe_resource_reference(&cell->constants[shader], constants); - - if(shader == PIPE_SHADER_VERTEX) { - draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0, - data, size); - } - - cell->mapped_constants[shader] = data; - - if (shader == PIPE_SHADER_VERTEX) - cell->dirty |= CELL_NEW_VS_CONSTANTS; - else if (shader == PIPE_SHADER_FRAGMENT) - cell->dirty |= CELL_NEW_FS_CONSTANTS; -} - - -void -cell_init_shader_functions(struct cell_context *cell) -{ - cell->pipe.create_fs_state = cell_create_fs_state; - cell->pipe.bind_fs_state = cell_bind_fs_state; - cell->pipe.delete_fs_state = cell_delete_fs_state; - - cell->pipe.create_vs_state = cell_create_vs_state; - cell->pipe.bind_vs_state = cell_bind_vs_state; - cell->pipe.delete_vs_state = cell_delete_vs_state; - - cell->pipe.set_constant_buffer = cell_set_constant_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c deleted file mode 100644 index 7f65b82619e..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ /dev/null @@ -1,120 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - - -#include "cell_context.h" -#include "cell_state.h" - -#include "util/u_memory.h" -#include "util/u_transfer.h" -#include "draw/draw_context.h" - - -static void * -cell_create_vertex_elements_state(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_element *attribs) -{ - struct cell_velems_state *velems; - assert(count <= PIPE_MAX_ATTRIBS); - velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state)); - if (velems) { - velems->count = count; - memcpy(velems->velem, attribs, sizeof(*attribs) * count); - } - return velems; -} - -static void -cell_bind_vertex_elements_state(struct pipe_context *pipe, - void *velems) -{ - struct cell_context *cell = cell_context(pipe); - struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems; - - cell->velems = cell_velems; - - cell->dirty |= CELL_NEW_VERTEX; - - if (cell_velems) - draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem); -} - -static void -cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems) -{ - FREE( velems ); -} - - -static void -cell_set_vertex_buffers(struct pipe_context *pipe, - unsigned count, - const struct pipe_vertex_buffer *buffers) -{ - struct cell_context *cell = cell_context(pipe); - - assert(count <= PIPE_MAX_ATTRIBS); - - util_copy_vertex_buffers(cell->vertex_buffer, - &cell->num_vertex_buffers, - buffers, count); - - cell->dirty |= CELL_NEW_VERTEX; - - draw_set_vertex_buffers(cell->draw, count, buffers); -} - - -static void -cell_set_index_buffer(struct pipe_context *pipe, - const struct pipe_index_buffer *ib) -{ - struct cell_context *cell = cell_context(pipe); - - if (ib) - memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer)); - else - memset(&cell->index_buffer, 0, sizeof(cell->index_buffer)); - - draw_set_index_buffer(cell->draw, ib); -} - - -void -cell_init_vertex_functions(struct cell_context *cell) -{ - cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; - cell->pipe.set_index_buffer = cell_set_index_buffer; - cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state; - cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state; - cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state; - cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer; -} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c deleted file mode 100644 index 777454479b1..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "util/u_surface.h" -#include "cell_context.h" -#include "cell_surface.h" - - -void -cell_init_surface_functions(struct cell_context *cell) -{ - cell->pipe.resource_copy_region = util_resource_copy_region; -} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h deleted file mode 100644 index 9e58f329443..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_surface.h +++ /dev/null @@ -1,42 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#ifndef CELL_SURFACE_H -#define CELL_SURFACE_H - - -struct cell_context; - - -extern void -cell_init_surface_functions(struct cell_context *cell); - - -#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c deleted file mode 100644 index 946a7050e5f..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ /dev/null @@ -1,644 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell - * Michel DÃ¤nzer - * Brian Paul - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "util/u_inlines.h" -#include "util/u_transfer.h" -#include "util/u_format.h" -#include "util/u_math.h" -#include "util/u_memory.h" - -#include "cell_context.h" -#include "cell_screen.h" -#include "cell_state.h" -#include "cell_texture.h" - -#include "state_tracker/sw_winsys.h" - - - -static boolean -cell_resource_layout(struct pipe_screen *screen, - struct cell_resource *ct) -{ - struct pipe_resource *pt = &ct->base; - unsigned level; - unsigned width = pt->width0; - unsigned height = pt->height0; - unsigned depth = pt->depth0; - - ct->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - unsigned size; - unsigned w_tile, h_tile; - - assert(level < CELL_MAX_TEXTURE_LEVELS); - - /* width, height, rounded up to tile size */ - w_tile = align(width, TILE_SIZE); - h_tile = align(height, TILE_SIZE); - - ct->stride[level] = util_format_get_stride(pt->format, w_tile); - - ct->level_offset[level] = ct->buffer_size; - - size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile); - if (pt->target == PIPE_TEXTURE_CUBE) - size *= 6; - else - size *= depth; - - ct->buffer_size += size; - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - ct->data = align_malloc(ct->buffer_size, 16); - - return ct->data != NULL; -} - - -/** - * Texture layout for simple color buffers. - */ -static boolean -cell_displaytarget_layout(struct pipe_screen *screen, - struct cell_resource * ct) -{ - struct sw_winsys *winsys = cell_screen(screen)->winsys; - - /* Round up the surface size to a multiple of the tile size? - */ - ct->dt = winsys->displaytarget_create(winsys, - ct->base.bind, - ct->base.format, - ct->base.width0, - ct->base.height0, - 16, - &ct->dt_stride ); - - return ct->dt != NULL; -} - -static struct pipe_resource * -cell_resource_create(struct pipe_screen *screen, - const struct pipe_resource *templat) -{ - struct cell_resource *ct = CALLOC_STRUCT(cell_resource); - if (!ct) - return NULL; - - ct->base = *templat; - pipe_reference_init(&ct->base.reference, 1); - ct->base.screen = screen; - - /* Create both a displaytarget (linear) and regular texture - * (twiddled). Convert twiddled->linear at flush_frontbuffer time. - */ - if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET | - PIPE_BIND_SCANOUT | - PIPE_BIND_SHARED)) { - if (!cell_displaytarget_layout(screen, ct)) - goto fail; - } - - if (!cell_resource_layout(screen, ct)) - goto fail; - - return &ct->base; - -fail: - if (ct->dt) { - struct sw_winsys *winsys = cell_screen(screen)->winsys; - winsys->displaytarget_destroy(winsys, ct->dt); - } - - FREE(ct); - - return NULL; -} - - -static void -cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt) -{ - struct cell_screen *screen = cell_screen(scrn); - struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(pt); - - if (ct->dt) { - /* display target */ - winsys->displaytarget_destroy(winsys, ct->dt); - } - else if (!ct->userBuffer) { - align_free(ct->data); - } - - FREE(ct); -} - - - -/** - * Convert image from linear layout to tiled layout. 4-byte pixels. - */ -static void -twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, - uint src_stride, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = (h + tile_size - 1) / tile_size; - const uint w_t = (w + tile_size - 1) / tile_size; - - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - src_stride /= 4; /* convert from bytes to pixels */ - - /* loop over dest tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of dest tile: */ - uint *tdst = dst + (it * w_t + jt) * tile_size2; - - /* compute size of this tile (may be smaller than tile_size) */ - /* XXX note: a compiler bug was found here. That's why the code - * looks as it does. - */ - uint tile_width = w - jt * tile_size; - tile_width = MIN2(tile_width, tile_size); - uint tile_height = h - it * tile_size; - tile_height = MIN2(tile_height, tile_size); - - /* loop over texels in the tile */ - for (i = 0; i < tile_height; i++) { - for (j = 0; j < tile_width; j++) { - const uint srci = it * tile_size + i; - const uint srcj = jt * tile_size + j; - ASSERT(srci < h); - ASSERT(srcj < w); - tdst[i * tile_size + j] = src[srci * src_stride + srcj]; - } - } - } - } -} - - -/** - * For Cell. Basically, rearrange the pixels/quads from this layout: - * +--+--+--+--+ - * |p0|p1|p2|p3|.... - * +--+--+--+--+ - * - * to this layout: - * +--+--+ - * |p0|p1|.... - * +--+--+ - * |p2|p3| - * +--+--+ - */ -static void -twiddle_tile(const uint *tileIn, uint *tileOut) -{ - int y, x; - - for (y = 0; y < TILE_SIZE; y+=2) { - for (x = 0; x < TILE_SIZE; x+=2) { - int k = 4 * (y/2 * TILE_SIZE/2 + x/2); - tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; - tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; - tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; - tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; - } - } -} - - -/** - * Convert image from tiled layout to linear layout. 4-byte pixels. - */ -static void -untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, - uint dst_stride, const uint *src) -{ - const uint tile_size2 = tile_size * tile_size; - const uint h_t = (h + tile_size - 1) / tile_size; - const uint w_t = (w + tile_size - 1) / tile_size; - uint *tile_buf; - uint it, jt; /* tile counters */ - uint i, j; /* intra-tile counters */ - - dst_stride /= 4; /* convert from bytes to pixels */ - - tile_buf = align_malloc(tile_size * tile_size * 4, 16); - - /* loop over src tiles */ - for (it = 0; it < h_t; it++) { - for (jt = 0; jt < w_t; jt++) { - /* start of src tile: */ - const uint *tsrc = src + (it * w_t + jt) * tile_size2; - - twiddle_tile(tsrc, tile_buf); - tsrc = tile_buf; - - /* compute size of this tile (may be smaller than tile_size) */ - /* XXX note: a compiler bug was found here. That's why the code - * looks as it does. - */ - uint tile_width = w - jt * tile_size; - tile_width = MIN2(tile_width, tile_size); - uint tile_height = h - it * tile_size; - tile_height = MIN2(tile_height, tile_size); - - /* loop over texels in the tile */ - for (i = 0; i < tile_height; i++) { - for (j = 0; j < tile_width; j++) { - uint dsti = it * tile_size + i; - uint dstj = jt * tile_size + j; - ASSERT(dsti < h); - ASSERT(dstj < w); - dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; - } - } - } - } - - align_free(tile_buf); -} - - -static struct pipe_surface * -cell_create_surface(struct pipe_context *ctx, - struct pipe_resource *pt, - const struct pipe_surface *surf_tmpl) -{ - struct cell_resource *ct = cell_resource(pt); - struct pipe_surface *ps; - - assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer); - ps = CALLOC_STRUCT(pipe_surface); - if (ps) { - pipe_reference_init(&ps->reference, 1); - pipe_resource_reference(&ps->texture, pt); - ps->format = surf_tmpl->format; - ps->context = ctx; - ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level); - ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level); - /* XXX may need to override usage flags (see sp_texture.c) */ - ps->usage = surf_tmpl->usage; - ps->u.tex.level = surf_tmpl->u.tex.level; - ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer; - ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer; - } - return ps; -} - - -static void -cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf) -{ - pipe_resource_reference(&surf->texture, NULL); - FREE(surf); -} - - -/** - * Create new pipe_transfer object. - * This is used by the user to put tex data into a texture (and get it - * back out for glGetTexImage). - */ -static struct pipe_transfer * -cell_get_transfer(struct pipe_context *ctx, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box) -{ - struct cell_resource *ct = cell_resource(resource); - struct cell_transfer *ctrans; - enum pipe_format format = resource->format; - - assert(resource); - assert(level <= resource->last_level); - - /* make sure the requested region is in the image bounds */ - assert(box->x + box->width <= u_minify(resource->width0, level)); - assert(box->y + box->height <= u_minify(resource->height0, level)); - assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1)); - - ctrans = CALLOC_STRUCT(cell_transfer); - if (ctrans) { - struct pipe_transfer *pt = &ctrans->base; - pipe_resource_reference(&pt->resource, resource); - pt->level = level; - pt->usage = usage; - pt->box = *box; - pt->stride = ct->stride[level]; - - ctrans->offset = ct->level_offset[level]; - - if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) { - unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE); - ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride; - } - else { - assert(box->z == 0); - } - - return pt; - } - return NULL; -} - - -static void -cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t) -{ - struct cell_transfer *transfer = cell_transfer(t); - /* Effectively do the texture_update work here - if texture images - * needed post-processing to put them into hardware layout, this is - * where it would happen. For cell, nothing to do. - */ - assert (transfer->base.resource); - pipe_resource_reference(&transfer->base.resource, NULL); - FREE(transfer); -} - - -/** - * Return pointer to texture image data in linear layout. - */ -static void * -cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer) -{ - struct cell_transfer *ctrans = cell_transfer(transfer); - struct pipe_resource *pt = transfer->resource; - struct cell_resource *ct = cell_resource(pt); - - assert(transfer->resource); - - if (ct->mapped == NULL) { - ct->mapped = ct->data; - } - - - /* Better test would be resource->is_linear - */ - if (transfer->resource->target != PIPE_BUFFER) { - const uint level = ctrans->base.level; - const uint texWidth = u_minify(pt->width0, level); - const uint texHeight = u_minify(pt->height0, level); - unsigned size; - - - /* - * Create a buffer of ordinary memory for the linear texture. - * This is the memory that the user will read/write. - */ - size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) * - util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE))); - - ctrans->map = align_malloc(size, 16); - if (!ctrans->map) - return NULL; /* out of memory */ - - if (transfer->usage & PIPE_TRANSFER_READ) { - /* Textures always stored twiddled, need to untwiddle the - * texture to make a linear version. - */ - const uint bpp = util_format_get_blocksize(ct->base.format); - if (bpp == 4) { - const uint *src = (uint *) (ct->mapped + ctrans->offset); - uint *dst = ctrans->map; - untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, - dst, transfer->stride, src); - } - else { - // xxx fix - } - } - } - else { - unsigned stride = transfer->stride; - enum pipe_format format = pt->format; - unsigned blocksize = util_format_get_blocksize(format); - - ctrans->map = (ct->mapped + - ctrans->offset + - ctrans->base.box.y / util_format_get_blockheight(format) * stride + - ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize); - } - - - return ctrans->map; -} - - -/** - * Called when user is done reading/writing texture data. - * If new data was written, this is where we convert the linear data - * to tiled data. - */ -static void -cell_transfer_unmap(struct pipe_context *ctx, - struct pipe_transfer *transfer) -{ - struct cell_transfer *ctrans = cell_transfer(transfer); - struct pipe_resource *pt = transfer->resource; - struct cell_resource *ct = cell_resource(pt); - const uint level = ctrans->base.level; - const uint texWidth = u_minify(pt->width0, level); - const uint texHeight = u_minify(pt->height0, level); - const uint stride = ct->stride[level]; - - if (!ct->mapped) { - assert(0); - return; - } - - if (pt->target != PIPE_BUFFER) { - if (transfer->usage & PIPE_TRANSFER_WRITE) { - /* The user wrote new texture data into the mapped buffer. - * We need to convert the new linear data into the twiddled/tiled format. - */ - const uint bpp = util_format_get_blocksize(ct->base.format); - if (bpp == 4) { - const uint *src = ctrans->map; - uint *dst = (uint *) (ct->mapped + ctrans->offset); - twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src); - } - else { - // xxx fix - } - } - - align_free(ctrans->map); - } - else { - /* nothing to do */ - } - - ctrans->map = NULL; -} - - - -/* This used to be overriden by the co-state tracker, but really needs - * to be active with sw_winsys. - * - * Contrasting with llvmpipe and softpipe, this is the only place - * where we use the ct->dt display target in any real sense. - * - * Basically just untwiddle our local data into the linear - * displaytarget. - */ -static void -cell_flush_frontbuffer(struct pipe_screen *_screen, - struct pipe_resource *resource, - unsigned level, unsigned layer, - void *context_private) -{ - struct cell_screen *screen = cell_screen(_screen); - struct sw_winsys *winsys = screen->winsys; - struct cell_resource *ct = cell_resource(resource); - - if (!ct->dt) - return; - - /* Need to untwiddle from our internal representation here: - */ - { - unsigned *map = winsys->displaytarget_map(winsys, ct->dt, - (PIPE_TRANSFER_READ | - PIPE_TRANSFER_WRITE)); - unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]); - - untwiddle_image_uint(u_minify(resource->width0, level), - u_minify(resource->height0, level), - TILE_SIZE, - map, - ct->dt_stride, - src); - - winsys->displaytarget_unmap(winsys, ct->dt); - } - - winsys->displaytarget_display(winsys, ct->dt, context_private); -} - - - -/** - * Create buffer which wraps user-space data. - */ -static struct pipe_resource * -cell_user_buffer_create(struct pipe_screen *screen, - void *ptr, - unsigned bytes, - unsigned bind_flags) -{ - struct cell_resource *buffer; - - buffer = CALLOC_STRUCT(cell_resource); - if(!buffer) - return NULL; - - pipe_reference_init(&buffer->base.reference, 1); - buffer->base.screen = screen; - buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */ - buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags; - buffer->base.usage = PIPE_USAGE_IMMUTABLE; - buffer->base.flags = 0; - buffer->base.width0 = bytes; - buffer->base.height0 = 1; - buffer->base.depth0 = 1; - buffer->base.array_size = 1; - buffer->userBuffer = TRUE; - buffer->data = ptr; - - return &buffer->base; -} - - -static struct pipe_resource * -cell_resource_from_handle(struct pipe_screen *screen, - const struct pipe_resource *templat, - struct winsys_handle *handle) -{ - /* XXX todo */ - return NULL; -} - - -static boolean -cell_resource_get_handle(struct pipe_screen *scree, - struct pipe_resource *tex, - struct winsys_handle *handle) -{ - /* XXX todo */ - return FALSE; -} - - -void -cell_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->resource_create = cell_resource_create; - screen->resource_destroy = cell_resource_destroy; - screen->resource_from_handle = cell_resource_from_handle; - screen->resource_get_handle = cell_resource_get_handle; - screen->user_buffer_create = cell_user_buffer_create; - - screen->flush_frontbuffer = cell_flush_frontbuffer; -} - -void -cell_init_texture_transfer_funcs(struct cell_context *cell) -{ - cell->pipe.get_transfer = cell_get_transfer; - cell->pipe.transfer_destroy = cell_transfer_destroy; - cell->pipe.transfer_map = cell_transfer_map; - cell->pipe.transfer_unmap = cell_transfer_unmap; - - cell->pipe.transfer_flush_region = u_default_transfer_flush_region; - cell->pipe.transfer_inline_write = u_default_transfer_inline_write; - - cell->pipe.create_surface = cell_create_surface; - cell->pipe.surface_destroy = cell_surface_destroy; -} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h deleted file mode 100644 index bd8224b3b7b..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ /dev/null @@ -1,102 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_TEXTURE_H -#define CELL_TEXTURE_H - -#include "cell/common.h" - -struct cell_context; -struct pipe_resource; - - -/** - * Subclass of pipe_resource - */ -struct cell_resource -{ - struct pipe_resource base; - - unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; - unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; - - /** - * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET - * usage. - */ - struct sw_displaytarget *dt; - unsigned dt_stride; - - /** - * Malloc'ed data for regular textures, or a mapping to dt above. - */ - void *data; - boolean userBuffer; - - /* Size of the linear buffer?? - */ - unsigned long buffer_size; - - /** The buffer above, mapped. This is the memory from which the - * SPUs will fetch texels. This texture data is in the tiled layout. - */ - ubyte *mapped; -}; - - -struct cell_transfer -{ - struct pipe_transfer base; - - unsigned long offset; - void *map; -}; - - -/** cast wrapper */ -static INLINE struct cell_resource * -cell_resource(struct pipe_resource *pt) -{ - return (struct cell_resource *) pt; -} - - -/** cast wrapper */ -static INLINE struct cell_transfer * -cell_transfer(struct pipe_transfer *pt) -{ - return (struct cell_transfer *) pt; -} - - -extern void -cell_init_screen_texture_funcs(struct pipe_screen *screen); - -extern void -cell_init_texture_transfer_funcs(struct cell_context *cell); - -#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c deleted file mode 100644 index 37b71956482..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Vertex buffer code. The draw module transforms vertices to window - * coords, etc. and emits the vertices into buffer supplied by this module. - * When a vertex buffer is full, or we flush, we'll send the vertex data - * to the SPUs. - * - * Authors - * Brian Paul - */ - - -#include "cell_batch.h" -#include "cell_context.h" -#include "cell_fence.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_vbuf.h" -#include "draw/draw_vbuf.h" -#include "util/u_memory.h" - - -/** Allow vertex data to be inlined after RENDER command */ -#define ALLOW_INLINE_VERTS 1 - - -/** - * Subclass of vbuf_render because we need a cell_context pointer in - * a few places. - */ -struct cell_vbuf_render -{ - struct vbuf_render base; - struct cell_context *cell; - uint prim; /**< PIPE_PRIM_x */ - uint vertex_size; /**< in bytes */ - void *vertex_buffer; /**< just for debug, really */ - uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ - uint vertex_buffer_size; /**< size in bytes */ -}; - - -/** cast wrapper */ -static struct cell_vbuf_render * -cell_vbuf_render(struct vbuf_render *vbr) -{ - return (struct cell_vbuf_render *) vbr; -} - - - -static const struct vertex_info * -cell_vbuf_get_vertex_info(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - return &cvbr->cell->vertex_info; -} - - -static boolean -cell_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - unsigned size = vertex_size * nr_vertices; - /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ - - assert(cvbr->vertex_buf == ~0); - cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); - cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; - cvbr->vertex_buffer_size = size; - cvbr->vertex_size = vertex_size; - - return cvbr->vertex_buffer != NULL; -} - - -static void -cell_vbuf_release_vertices(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - struct cell_context *cell = cvbr->cell; - - /* - printf("%s vertex_buf = %u count = %u\n", - __FUNCTION__, cvbr->vertex_buf, vertices_used); - */ - - /* Make sure texture buffers aren't released until we're done rendering - * with them. - */ - cell_add_fenced_textures(cell); - - /* Tell SPUs they can release the vert buf */ - if (cvbr->vertex_buf != ~0U) { - STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) - cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); - release->opcode[0] = CELL_CMD_RELEASE_VERTS; - release->vertex_buf = cvbr->vertex_buf; - } - - cvbr->vertex_buf = ~0; - cell_flush_int(cell, 0x0); - - cvbr->vertex_buffer = NULL; -} - - -static void * -cell_vbuf_map_vertices(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - return cvbr->vertex_buffer; -} - - -static void -cell_vbuf_unmap_vertices(struct vbuf_render *vbr, - ushort min_index, - ushort max_index ) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); - /* do nothing */ -} - - - -static boolean -cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - cvbr->prim = prim; - /*printf("cell_set_prim %u\n", prim);*/ - return TRUE; -} - - -static void -cell_vbuf_draw_elements(struct vbuf_render *vbr, - const ushort *indices, - uint nr_indices) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - struct cell_context *cell = cvbr->cell; - float xmin, ymin, xmax, ymax; - uint i; - uint nr_vertices = 0, min_index = ~0; - const void *vertices = cvbr->vertex_buffer; - const uint vertex_size = cvbr->vertex_size; - - for (i = 0; i < nr_indices; i++) { - if (indices[i] > nr_vertices) - nr_vertices = indices[i]; - if (indices[i] < min_index) - min_index = indices[i]; - } - nr_vertices++; - -#if 0 - /*if (min_index > 0)*/ - printf("%s min_index = %u\n", __FUNCTION__, min_index); -#endif - -#if 0 - printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", - nr_indices, nr_vertices); - printf(" "); - for (i = 0; i < nr_indices; i += 3) { - printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); - } - printf("\n"); -#elif 0 - printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", - nr_indices, nr_vertices, - indices[0], indices[1], indices[2]); - printf("ind space = %u, vert space = %u, space = %u\n", - nr_indices * 2, - nr_vertices * 4 * cell->vertex_info.size, - cell_batch_free_space(cell)); -#endif - - /* compute x/y bounding box */ - xmin = ymin = 1e50; - xmax = ymax = -1e50; - for (i = min_index; i < nr_vertices; i++) { - const float *v = (float *) ((ubyte *) vertices + i * vertex_size); - if (v[0] < xmin) - xmin = v[0]; - if (v[0] > xmax) - xmax = v[0]; - if (v[1] < ymin) - ymin = v[1]; - if (v[1] > ymax) - ymax = v[1]; - } -#if 0 - printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); - fflush(stdout); -#endif - - if (cvbr->prim != PIPE_PRIM_TRIANGLES) - return; /* only render tris for now */ - - /* build/insert batch RENDER command */ - { - const uint index_bytes = ROUNDUP16(nr_indices * 2); - const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); - STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); - const uint batch_size = sizeof(struct cell_command_render) + index_bytes; - - struct cell_command_render *render - = (struct cell_command_render *) - cell_batch_alloc16(cell, batch_size); - - render->opcode[0] = CELL_CMD_RENDER; - render->prim_type = cvbr->prim; - - render->num_indexes = nr_indices; - render->min_index = min_index; - - /* append indices after render command */ - memcpy(render + 1, indices, nr_indices * 2); - - /* if there's room, append vertices after the indices, else leave - * vertices in the original/separate buffer. - */ - render->vertex_size = 4 * cell->vertex_info.size; - render->num_verts = nr_vertices; - if (ALLOW_INLINE_VERTS && - min_index == 0 && - vertex_bytes + 16 <= cell_batch_free_space(cell)) { - /* vertex data inlined, after indices, at 16-byte boundary */ - void *dst = cell_batch_alloc16(cell, vertex_bytes); - memcpy(dst, vertices, vertex_bytes); - render->inline_verts = TRUE; - render->vertex_buf = ~0; - } - else { - /* vertex data in separate buffer */ - render->inline_verts = FALSE; - ASSERT(cvbr->vertex_buf >= 0); - render->vertex_buf = cvbr->vertex_buf; - } - - render->xmin = xmin; - render->ymin = ymin; - render->xmax = xmax; - render->ymax = ymax; - } - -#if 0 - /* helpful for debug */ - cell_flush_int(cell, CELL_FLUSH_WAIT); -#endif -} - - -static void -cell_vbuf_destroy(struct vbuf_render *vbr) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - cvbr->cell->vbuf_render = NULL; - FREE(cvbr); -} - - -/** - * Initialize the post-transform vertex buffer information for the given - * context. - */ -void -cell_init_vbuf(struct cell_context *cell) -{ - assert(cell->draw); - - cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); - - /* The max number of indexes is what can fix into a batch buffer, - * minus the render and release-verts commands. - */ - cell->vbuf_render->base.max_indices - = (CELL_BUFFER_SIZE - - sizeof(struct cell_command_render) - - sizeof(struct cell_command_release_verts)) - / sizeof(ushort); - cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; - - cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; - cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; - cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices; - cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices; - cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; - cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements; - cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; - cell->vbuf_render->base.destroy = cell_vbuf_destroy; - - cell->vbuf_render->cell = cell; -#if 1 - cell->vbuf_render->vertex_buf = ~0; -#endif - - cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); -} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h deleted file mode 100644 index d265cbf7701..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef CELL_VBUF_H -#define CELL_VBUF_H - - -struct cell_context; - -extern void -cell_init_vbuf(struct cell_context *cell); - - -#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c deleted file mode 100644 index 9cba537d9eb..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ /dev/null @@ -1,346 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "pipe/p_format.h" - -#include "../auxiliary/draw/draw_context.h" -#include "../auxiliary/draw/draw_private.h" - -#include "cell_context.h" -#include "rtasm/rtasm_ppc_spe.h" - - -/** - * Emit a 4x4 matrix transpose operation - * - * \param p Function that the transpose operation is to be appended to - * \param row0 Register containing row 0 of the source matrix - * \param row1 Register containing row 1 of the source matrix - * \param row2 Register containing row 2 of the source matrix - * \param row3 Register containing row 3 of the source matrix - * \param dest_ptr Register containing the address of the destination matrix - * \param shuf_ptr Register containing the address of the shuffled data - * \param count Number of colums to actually be written to the destination - * - * \note - * This function assumes that the registers named by \c row0, \c row1, - * \c row2, and \c row3 are scratch and can be modified by the generated code. - * Furthermore, these registers will be released, via calls to - * \c release_register, by this function. - * - * \note - * This function requires that four temporary are available on entry. - */ -static void -emit_matrix_transpose(struct spe_function *p, - unsigned row0, unsigned row1, unsigned row2, - unsigned row3, unsigned dest_ptr, - unsigned shuf_ptr, unsigned count) -{ - int shuf_hi = spe_allocate_available_register(p); - int shuf_lo = spe_allocate_available_register(p); - int t1 = spe_allocate_available_register(p); - int t2 = spe_allocate_available_register(p); - int t3; - int t4; - int col0; - int col1; - int col2; - int col3; - - - spe_lqd(p, shuf_hi, shuf_ptr, 3*16); - spe_lqd(p, shuf_lo, shuf_ptr, 4*16); - spe_shufb(p, t1, row0, row2, shuf_hi); - spe_shufb(p, t2, row0, row2, shuf_lo); - - - /* row0 and row2 are now no longer needed. Re-use those registers as - * temporaries. - */ - t3 = row0; - t4 = row2; - - spe_shufb(p, t3, row1, row3, shuf_hi); - spe_shufb(p, t4, row1, row3, shuf_lo); - - - /* row1 and row3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col0 = row1; - col1 = row3; - - spe_shufb(p, col0, t1, t3, shuf_hi); - if (count > 1) { - spe_shufb(p, col1, t1, t3, shuf_lo); - } - - /* t1 and t3 are now no longer needed. Re-use those registers as - * temporaries. - */ - col2 = t1; - col3 = t3; - - if (count > 2) { - spe_shufb(p, col2, t2, t4, shuf_hi); - } - - if (count > 3) { - spe_shufb(p, col3, t2, t4, shuf_lo); - } - - - /* Store the results. Remember that the stqd instruction is encoded using - * the qword offset (stand-alone assemblers to the byte-offset to - * qword-offset conversion for you), so the byte-offset needs be divided by - * 16. - */ - switch (count) { - case 4: - spe_stqd(p, col3, dest_ptr, 3 * 16); - case 3: - spe_stqd(p, col2, dest_ptr, 2 * 16); - case 2: - spe_stqd(p, col1, dest_ptr, 1 * 16); - case 1: - spe_stqd(p, col0, dest_ptr, 0 * 16); - } - - - /* Release all of the temporary registers used. - */ - spe_release_register(p, col0); - spe_release_register(p, col1); - spe_release_register(p, col2); - spe_release_register(p, col3); - spe_release_register(p, shuf_hi); - spe_release_register(p, shuf_lo); - spe_release_register(p, t2); - spe_release_register(p, t4); -} - - -#if 0 -/* This appears to not be used currently */ -static void -emit_fetch(struct spe_function *p, - unsigned in_ptr, unsigned *offset, - unsigned out_ptr, unsigned shuf_ptr, - enum pipe_format format) -{ - const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); - const unsigned type = pf_type(format); - const unsigned bytes = pf_size_x(format); - - int v0 = spe_allocate_available_register(p); - int v1 = spe_allocate_available_register(p); - int v2 = spe_allocate_available_register(p); - int v3 = spe_allocate_available_register(p); - int tmp = spe_allocate_available_register(p); - int float_zero = -1; - int float_one = -1; - float scale_signed = 0.0; - float scale_unsigned = 0.0; - - spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); - spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); - spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); - spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); - offset[0] += 4; - - switch (bytes) { - case 1: - scale_signed = 1.0f / 127.0f; - scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1 * 16); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 2: - scale_signed = 1.0f / 32767.0f; - scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2 * 16); - spe_shufb(p, v0, v0, v0, tmp); - spe_shufb(p, v1, v1, v1, tmp); - spe_shufb(p, v2, v2, v2, tmp); - spe_shufb(p, v3, v3, v3, tmp); - break; - case 4: - scale_signed = 1.0f / 2147483647.0f; - scale_unsigned = 1.0f / 4294967295.0f; - break; - default: - assert(0); - break; - } - - switch (type) { - case PIPE_FORMAT_TYPE_FLOAT: - break; - case PIPE_FORMAT_TYPE_UNORM: - spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); - spe_cuflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_SNORM: - spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); - spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); - spe_csflt(p, v0, v0, 0); - spe_fm(p, v0, v0, tmp); - break; - case PIPE_FORMAT_TYPE_USCALED: - spe_cuflt(p, v0, v0, 0); - break; - case PIPE_FORMAT_TYPE_SSCALED: - spe_csflt(p, v0, v0, 0); - break; - } - - - if (count < 4) { - float_one = spe_allocate_available_register(p); - spe_il(p, float_one, 1); - spe_cuflt(p, float_one, float_one, 0); - - if (count < 3) { - float_zero = spe_allocate_available_register(p); - spe_il(p, float_zero, 0); - } - } - - spe_release_register(p, tmp); - - emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); - - switch (count) { - case 1: - spe_stqd(p, float_zero, out_ptr, 1 * 16); - case 2: - spe_stqd(p, float_zero, out_ptr, 2 * 16); - case 3: - spe_stqd(p, float_one, out_ptr, 3 * 16); - } - - if (float_zero != -1) { - spe_release_register(p, float_zero); - } - - if (float_one != -1) { - spe_release_register(p, float_one); - } -} -#endif - - -void cell_update_vertex_fetch(struct draw_context *draw) -{ -#if 0 - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_MAX_ATTRIBS]; - unsigned unique_attr_formats; - int out_ptr; - int in_ptr; - int shuf_ptr; - unsigned i; - unsigned j; - - - /* Determine how many unique input attribute formats there are. At the - * same time, store the index of the lowest numbered attribute that has - * the same format as any non-unique format. - */ - unique_attr_formats = 1; - function_index[0] = 0; - for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; - - for (j = 0; j < i; j++) { - if (curr_fmt == draw->vertex_element[j].src_format) { - break; - } - } - - if (j == i) { - unique_attr_formats++; - } - - function_index[i] = j; - } - - - /* Each fetch function can be a maximum of 34 instructions (note: this is - * actually a slight over-estimate). - */ - spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); - - - /* Allocate registers for the function's input parameters. - */ - out_ptr = spe_allocate_register(p, 3); - in_ptr = spe_allocate_register(p, 4); - shuf_ptr = spe_allocate_register(p, 5); - - - /* Generate code for the individual attribute fetch functions. - */ - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - unsigned offset; - - if (function_index[i] == i) { - cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr - - (void *) p->store); - - offset = 0; - emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, - draw->vertex_element[i].src_format); - spe_bi(p, 0, 0, 0); - - /* Round up to the next 16-byte boundary. - */ - if ((((unsigned) p->store) & 0x0f) != 0) { - const unsigned align = ((unsigned) p->store) & 0x0f; - p->store = (uint32_t *) (((void *) p->store) + align); - } - } else { - /* Use the same function entry-point as a previously seen attribute - * with the same format. - */ - cell->attrib_fetch_offsets[i] = - cell->attrib_fetch_offsets[function_index[i]]; - } - } -#else - assert(0); -#endif -} diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c deleted file mode 100644 index 3d389d6ea36..00000000000 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file cell_vertex_shader.c - * Vertex shader interface routines for Cell. - * - * \author Ian Romanick - */ - -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "util/u_math.h" - -#include "cell_context.h" -#include "cell_draw_arrays.h" -#include "cell_flush.h" -#include "cell_spu.h" -#include "cell_batch.h" - -#include "cell/common.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" - -/** - * Run the vertex shader on all vertices in the vertex queue. - * Called by the draw module when the vertx cache needs to be flushed. - */ -void -cell_vertex_shader_queue_flush(struct draw_context *draw) -{ -#if 0 - struct cell_context *const cell = - (struct cell_context *) draw->driver_private; - struct cell_command_vs *const vs = &cell_global.command[0].vs; - uint64_t *batch; - struct cell_array_info *array_info; - unsigned i, j; - struct cell_attribute_fetch_code *cf; - - assert(draw->vs.queue_nr != 0); - - /* XXX: do this on statechange: - */ - draw_update_vertex_fetch(draw); - cell_update_vertex_fetch(draw); - - - batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); - batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; - cf = (struct cell_attribute_fetch_code *) (&batch[1]); - cf->base = (uint64_t) cell->attrib_fetch.store; - cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr - - (void *) cell->attrib_fetch.store)); - - - for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { - const enum pipe_format format = draw->vertex_element[i].src_format; - const unsigned count = ((pf_size_x(format) != 0) - + (pf_size_y(format) != 0) - + (pf_size_z(format) != 0) - + (pf_size_w(format) != 0)); - const unsigned size = pf_size_x(format) * count; - - batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); - - batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; - - array_info = (struct cell_array_info *) &batch[1]; - assert(draw->vertex_fetch.src_ptr[i] != NULL); - array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; - array_info->attr = i; - array_info->pitch = draw->vertex_fetch.pitch[i]; - array_info->size = size; - array_info->function_offset = cell->attrib_fetch_offsets[i]; - } - - batch = cell_batch_alloc(cell, sizeof(batch[0]) - + sizeof(struct pipe_viewport_state)); - batch[0] = CELL_CMD_STATE_VIEWPORT; - (void) memcpy(&batch[1], &draw->viewport, - sizeof(struct pipe_viewport_state)); - - { - uint64_t uniforms = (uintptr_t) draw->user.constants; - - batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); - batch[0] = CELL_CMD_STATE_UNIFORMS; - batch[1] = uniforms; - } - - cell_batch_flush(cell); - - vs->opcode = CELL_CMD_VS_EXECUTE; - vs->nr_attrs = draw->vertex_fetch.nr_attrs; - - (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); - vs->nr_planes = draw->nr_planes; - - for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { - const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); - - for (j = 0; j < n; j++) { - vs->elts[j] = draw->vs.queue[i + j].elt; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; - } - - for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { - vs->elts[j] = vs->elts[0]; - vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; - } - - vs->num_elts = n; - send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - - cell_flush_int(cell, CELL_FLUSH_WAIT); - } - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -#else - assert(0); -#endif -} diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore deleted file mode 100644 index 2be9a2d3242..00000000000 --- a/src/gallium/drivers/cell/spu/.gitignore +++ /dev/null @@ -1 +0,0 @@ -g3d_spu diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile deleted file mode 100644 index 3cc52301da2..00000000000 --- a/src/gallium/drivers/cell/spu/Makefile +++ /dev/null @@ -1,83 +0,0 @@ -# Gallium3D Cell driver: SPU code - -# This makefile builds the g3d_spu.a file that's linked into the -# PPU code/library. - - -TOP = ../../../../.. -include $(TOP)/configs/current - - -PROG = g3d - -PROG_SPU = $(PROG)_spu -PROG_SPU_A = $(PROG)_spu.a -PROG_SPU_EMBED_O = $(PROG)_spu-embed.o - - -SOURCES = \ - spu_command.c \ - spu_dcache.c \ - spu_funcs.c \ - spu_main.c \ - spu_per_fragment_op.c \ - spu_render.c \ - spu_texture.c \ - spu_tile.c \ - spu_tri.c - -OLD_SOURCES = \ - spu_exec.c \ - spu_util.c \ - spu_vertex_fetch.c \ - spu_vertex_shader.c - - -SPU_OBJECTS = $(SOURCES:.c=.o) - -SPU_ASM_OUT = $(SOURCES:.c=.s) - - -INCLUDE_DIRS = \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers - - -.c.o: - $(SPU_CC) $(SPU_CFLAGS) -c $< - -.c.s: - $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< - - -# The .a file will be linked into the main/PPU executable -default: $(PROG_SPU_A) - -$(PROG_SPU_A): $(PROG_SPU_EMBED_O) - $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) - -$(PROG_SPU_EMBED_O): $(PROG_SPU) - $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) - -$(PROG_SPU): $(SPU_OBJECTS) - $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) - - - -asmfiles: $(SPU_ASM_OUT) - - -clean: - rm -f *~ *.o *.a *.d *.s $(PROG_SPU) - - - -depend: $(SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null - -include depend - diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h deleted file mode 100644 index d7ce0055248..00000000000 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ /dev/null @@ -1,145 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#ifndef SPU_COLORPACK_H -#define SPU_COLORPACK_H - - -#include -#include - - -static INLINE unsigned int -spu_pack_R8G8B8A8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - - out = spu_shuffle(out, out, ((vector unsigned char) { - 0, 4, 8, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 }) ); - - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_A8R8G8B8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, ((vector unsigned char) { - 12, 0, 4, 8, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}) ); - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_B8G8R8A8(vector float rgba) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, ((vector unsigned char) { - 8, 4, 0, 12, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0}) ); - return spu_extract(out, 0); -} - - -static INLINE unsigned int -spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) -{ - vector unsigned int out = spu_convtu(rgba, 32); - out = spu_shuffle(out, out, shuffle); - return spu_extract(out, 0); -} - - -static INLINE vector float -spu_unpack_B8G8R8A8(uint color) -{ - vector unsigned int color_u4 = spu_splats(color); - color_u4 = spu_shuffle(color_u4, color_u4, - ((vector unsigned char) { - 2, 2, 2, 2, - 1, 1, 1, 1, - 0, 0, 0, 0, - 3, 3, 3, 3}) ); - return spu_convtf(color_u4, 32); -} - - -static INLINE vector float -spu_unpack_A8R8G8B8(uint color) -{ - vector unsigned int color_u4 = spu_splats(color); - color_u4 = spu_shuffle(color_u4, color_u4, - ((vector unsigned char) { - 1, 1, 1, 1, - 2, 2, 2, 2, - 3, 3, 3, 3, - 0, 0, 0, 0}) ); - return spu_convtf(color_u4, 32); -} - - -/** - * \param color_in - array of 32-bit packed ARGB colors - * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order - */ -static INLINE void -spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], - vector float color_out[4]) -{ - vector unsigned int c0; - - c0 = spu_shuffle(color_in[0], color_in[0], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[0] = spu_convtf(c0, 32); - - c0 = spu_shuffle(color_in[1], color_in[1], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[1] = spu_convtf(c0, 32); - - c0 = spu_shuffle(color_in[2], color_in[2], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[2] = spu_convtf(c0, 32); - - c0 = spu_shuffle(color_in[3], color_in[3], - ((vector unsigned char) { - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); - color_out[3] = spu_convtf(c0, 32); - - _transpose_matrix4x4(color_out, color_out); -} - - - -#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c deleted file mode 100644 index 6f8ba9562d2..00000000000 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ /dev/null @@ -1,810 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * SPU command processing code - */ - - -#include -#include - -#include "pipe/p_defines.h" - -#include "spu_command.h" -#include "spu_main.h" -#include "spu_render.h" -#include "spu_per_fragment_op.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" -#include "cell/common.h" - - -struct spu_vs_context draw; - - -/** - * Buffers containing dynamically generated SPU code: - */ -PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]; - - - -static INLINE int -align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - - -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, - CELL_BUFFER_STATUS_FREE, - CELL_BUFFER_STATUS_FREE, - CELL_BUFFER_STATUS_FREE}; - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -/** - * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. - * There's a qword of status per SPU. - */ -static void -cmd_fence(struct cell_command_fence *fence_cmd) -{ - static const vector unsigned int status = {CELL_FENCE_SIGNALLED, - CELL_FENCE_SIGNALLED, - CELL_FENCE_SIGNALLED, - CELL_FENCE_SIGNALLED}; - uint *dst = (uint *) fence_cmd->fence; - dst += 4 * spu.init.id; /* main store/memory address, not local store */ - ASSERT_ALIGN16(dst); - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_FENCE, /* tag */ - 0, /* tid */ - 0 /* rid */); -} - - -static void -cmd_clear_surface(const struct cell_command_clear_surface *clear) -{ - D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); - - if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { - uint x = (spu.init.id << 4) | (spu.init.id << 12) | - (spu.init.id << 20) | (spu.init.id << 28); - spu.fb.color_clear_value ^= x; - } - } - else { - spu.fb.depth_clear_value = clear->value; - } - -#define CLEAR_OPT 1 -#if CLEAR_OPT - - /* Simply set all tiles' status to CLEAR. - * When we actually begin rendering into a tile, we'll initialize it to - * the clear value. If any tiles go untouched during the frame, - * really_clear_tiles() will set them to the clear value. - */ - if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); - } - else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); - } - -#else - - /* - * This path clears the whole framebuffer to the clear color right now. - */ - - /* - printf("SPU: %s num=%d w=%d h=%d\n", - __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); - */ - - /* init a single tile to the clear value */ - if (clear->surface == 0) { - clear_c_tile(&spu.ctile); - } - else { - clear_z_tile(&spu.ztile); - } - - /* walk over my tiles, writing the 'clear' tile's data */ - { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - } - } - - if (spu.init.debug_flags & CELL_DEBUG_SYNC) { - wait_on_mask(1 << TAG_SURFACE_CLEAR); - } - -#endif /* CLEAR_OPT */ - - D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); -} - - -static void -cmd_release_verts(const struct cell_command_release_verts *release) -{ - D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); - ASSERT(release->vertex_buf != ~0U); - release_buffer(release->vertex_buf); -} - - -/** - * Process a CELL_CMD_STATE_FRAGMENT_OPS command. - * This involves installing new fragment ops SPU code. - * If this function is never called, we'll use a regular C fallback function - * for fragment processing. - */ -static void -cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) -{ - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); - - /* Copy state info (for fallback case only - this will eventually - * go away when the fallback case goes away) - */ - memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); - memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); - - /* Make sure the SPU knows which buffers it's expected to read when - * it's told to pull tiles. - */ - spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); - - /* If we're forcing the fallback code to be used (for debug purposes), - * install that. Otherwise install the incoming SPU code. - */ - if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { - static unsigned int warned = 0; - if (!warned) { - fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); - warned = 1; - } - /* The following two lines aren't really necessary if you - * know the debug flags won't change during a run, and if you - * know that the function pointers are initialized correctly. - * We set them here to allow a person to change the debug - * flags during a run (from inside a debugger). - */ - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; - return; - } - - /* Make sure the SPU code buffer is large enough to hold the incoming code. - * Note that we *don't* use align_malloc() and align_free(), because - * those utility functions are *not* available in SPU code. - * */ - if (spu.fragment_ops_code_size < fops->total_code_size) { - if (spu.fragment_ops_code != NULL) { - free(spu.fragment_ops_code); - } - spu.fragment_ops_code_size = fops->total_code_size; - spu.fragment_ops_code = malloc(fops->total_code_size); - if (spu.fragment_ops_code == NULL) { - /* Whoops. */ - fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); - spu.fragment_ops_code = NULL; - spu.fragment_ops_code_size = 0; - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; - return; - } - } - - /* Copy the SPU code from the command buffer to the spu buffer */ - memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); - - /* Set the pointers for the front-facing and back-facing fragments - * to the specified offsets within the code. Note that if the - * front-facing and back-facing code are the same, they'll have - * the same offset. - */ - spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; - spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; -} - -static void -cmd_state_fragment_program(const struct cell_command_fragment_program *fp) -{ - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_program_code, fp->code, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); -#if 01 - /* Point function pointer at new code */ - spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; -#endif -} - - -static uint -cmd_state_fs_constants(const qword *buffer, uint pos) -{ - const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0); - const float *constants = (const float *) &buffer[pos+2]; - uint i; - - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); - - /* Expand each float to float[4] for SOA execution */ - for (i = 0; i < num_const; i++) { - D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); - spu.constants[i] = spu_splats(constants[i]); - } - - /* return new buffer pos (in 16-byte words) */ - return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16); -} - - -static void -cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) -{ - D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - cmd->width, - cmd->height, - cmd->color_start, - cmd->color_format, - cmd->depth_format); - - ASSERT_ALIGN16(cmd->color_start); - ASSERT_ALIGN16(cmd->depth_start); - - spu.fb.color_start = cmd->color_start; - spu.fb.depth_start = cmd->depth_start; - spu.fb.color_format = cmd->color_format; - spu.fb.depth_format = cmd->depth_format; - spu.fb.width = cmd->width; - spu.fb.height = cmd->height; - spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; - spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z32_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0xffffffffu; - break; - case PIPE_FORMAT_S8_UINT_Z24_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0x00ffffffu; - break; - case PIPE_FORMAT_Z16_UNORM: - spu.fb.zsize = 2; - spu.fb.zscale = (float) 0xffffu; - break; - default: - spu.fb.zsize = 0; - break; - } -} - - -/** - * Tex texture mask_s/t and scale_s/t fields depend on the texture size and - * sampler wrap modes. - */ -static void -update_tex_masks(struct spu_texture *texture, - const struct pipe_sampler_state *sampler) -{ - uint i; - - for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - int width = texture->level[i].width; - int height = texture->level[i].height; - - if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) - texture->level[i].mask_s = spu_splats(width - 1); - else - texture->level[i].mask_s = spu_splats(~0); - - if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) - texture->level[i].mask_t = spu_splats(height - 1); - else - texture->level[i].mask_t = spu_splats(~0); - - if (sampler->normalized_coords) { - texture->level[i].scale_s = spu_splats((float) width); - texture->level[i].scale_t = spu_splats((float) height); - } - else { - texture->level[i].scale_s = spu_splats(1.0f); - texture->level[i].scale_t = spu_splats(1.0f); - } - } -} - - -static void -cmd_state_sampler(const struct cell_command_sampler *sampler) -{ - uint unit = sampler->unit; - - D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); - - spu.sampler[unit] = sampler->state; - - switch (spu.sampler[unit].min_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; - break; - case PIPE_TEX_FILTER_NEAREST: - spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; - break; - default: - ASSERT(0); - } - - switch (spu.sampler[sampler->unit].mag_img_filter) { - case PIPE_TEX_FILTER_LINEAR: - spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; - break; - case PIPE_TEX_FILTER_NEAREST: - spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; - break; - default: - ASSERT(0); - } - - switch (spu.sampler[sampler->unit].min_mip_filter) { - case PIPE_TEX_MIPFILTER_NEAREST: - case PIPE_TEX_MIPFILTER_LINEAR: - spu.sample_texture_2d[unit] = sample_texture_2d_lod; - break; - case PIPE_TEX_MIPFILTER_NONE: - spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; - break; - default: - ASSERT(0); - } - - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); -} - - -static void -cmd_state_texture(const struct cell_command_texture *texture) -{ - const uint unit = texture->unit; - uint i; - - D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); - - spu.texture[unit].max_level = 0; - spu.texture[unit].target = texture->target; - - for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { - uint width = texture->width[i]; - uint height = texture->height[i]; - uint depth = texture->depth[i]; - - D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, - texture->start[i], texture->width[i], texture->height[i]); - - spu.texture[unit].level[i].start = texture->start[i]; - spu.texture[unit].level[i].width = width; - spu.texture[unit].level[i].height = height; - spu.texture[unit].level[i].depth = depth; - - spu.texture[unit].level[i].tiles_per_row = - (width + TILE_SIZE - 1) / TILE_SIZE; - - spu.texture[unit].level[i].bytes_per_image = - 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; - - spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); - spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); - - if (texture->start[i]) - spu.texture[unit].max_level = i; - } - - update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); -} - - -static void -cmd_state_vertex_info(const struct vertex_info *vinfo) -{ - D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); - ASSERT(vinfo->num_attribs >= 1); - ASSERT(vinfo->num_attribs <= 8); - memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); -} - - -static void -cmd_state_vs_array_info(const struct cell_array_info *vs_info) -{ - const unsigned attr = vs_info->attr; - - ASSERT(attr < PIPE_MAX_ATTRIBS); - draw.vertex_fetch.src_ptr[attr] = vs_info->base; - draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.size[attr] = vs_info->size; - draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; - draw.vertex_fetch.dirty = 1; -} - - -static void -cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) -{ - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; -} - - -static void -cmd_finish(void) -{ - D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); - really_clear_tiles(0); - /* wait for all outstanding DMAs to finish */ - mfc_write_tag_mask(~0); - mfc_read_tag_status_all(); - /* send mbox message to PPU */ - spu_write_out_mbox(CELL_CMD_FINISH); -} - - -/** - * Execute a batch of commands which was sent to us by the PPU. - * See the cell_emit_state.c code to see where the commands come from. - * - * The opcode param encodes the location of the buffer and its size. - */ -static void -cmd_batch(uint opcode) -{ - const uint buf = (opcode >> 8) & 0xff; - uint size = (opcode >> 16); - PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16]; - const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); - uint pos; - - D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", - buf, size, spu.init.buffers[buf]); - - ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - size = ROUNDUP16(size); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - mfc_get(buffer, /* dest */ - (unsigned int) spu.init.buffers[buf], /* src */ - size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - /* Tell PPU we're done copying the buffer to local store */ - D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); - release_buffer(buf); - - /* - * Loop over commands in the batch buffer - */ - for (pos = 0; pos < usize; /* no incr */) { - switch (si_to_uint(buffer[pos])) { - /* - * rendering commands - */ - case CELL_CMD_CLEAR_SURFACE: - { - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) &buffer[pos]; - cmd_clear_surface(clr); - pos += sizeof(*clr) / 16; - } - break; - case CELL_CMD_RENDER: - { - struct cell_command_render *render - = (struct cell_command_render *) &buffer[pos]; - uint pos_incr; - cmd_render(render, &pos_incr); - pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return - } - break; - /* - * state-update commands - */ - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 16; - } - break; - case CELL_CMD_STATE_FRAGMENT_OPS: - { - struct cell_command_fragment_ops *fops - = (struct cell_command_fragment_ops *) &buffer[pos]; - cmd_state_fragment_ops(fops); - /* This is a variant-sized command */ - pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16; - } - break; - case CELL_CMD_STATE_FRAGMENT_PROGRAM: - { - struct cell_command_fragment_program *fp - = (struct cell_command_fragment_program *) &buffer[pos]; - cmd_state_fragment_program(fp); - pos += sizeof(*fp) / 16; - } - break; - case CELL_CMD_STATE_FS_CONSTANTS: - pos = cmd_state_fs_constants(buffer, pos); - break; - case CELL_CMD_STATE_RASTERIZER: - { - struct cell_command_rasterizer *rast = - (struct cell_command_rasterizer *) &buffer[pos]; - spu.rasterizer = rast->rasterizer; - pos += sizeof(*rast) / 16; - } - break; - case CELL_CMD_STATE_SAMPLER: - { - struct cell_command_sampler *sampler - = (struct cell_command_sampler *) &buffer[pos]; - cmd_state_sampler(sampler); - pos += sizeof(*sampler) / 16; - } - break; - case CELL_CMD_STATE_TEXTURE: - { - struct cell_command_texture *texture - = (struct cell_command_texture *) &buffer[pos]; - cmd_state_texture(texture); - pos += sizeof(*texture) / 16; - } - break; - case CELL_CMD_STATE_VERTEX_INFO: - cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16; - break; - case CELL_CMD_STATE_VIEWPORT: - (void) memcpy(& draw.viewport, &buffer[pos+1], - sizeof(struct pipe_viewport_state)); - pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16; - break; - case CELL_CMD_STATE_UNIFORMS: - draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0); - pos += 2; - break; - case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); - pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16; - break; - case CELL_CMD_STATE_BIND_VS: -#if 0 - spu_bind_vertex_shader(&draw, - (struct cell_shader_info *) &buffer[pos+1]); -#endif - pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16; - break; - case CELL_CMD_STATE_ATTRIB_FETCH: - cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) - &buffer[pos+1]); - pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16; - break; - /* - * misc commands - */ - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_FENCE: - { - struct cell_command_fence *fence_cmd = - (struct cell_command_fence *) &buffer[pos]; - cmd_fence(fence_cmd); - pos += sizeof(*fence_cmd) / 16; - } - break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 16; - } - break; - case CELL_CMD_FLUSH_BUFFER_RANGE: { - struct cell_buffer_range *br = (struct cell_buffer_range *) - &buffer[pos+1]; - - spu_dcache_mark_dirty((unsigned) br->base, br->size); - pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16; - break; - } - default: - printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos])); - ASSERT(0); - break; - } - } - - D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); -} - - -#define PERF 0 - - -/** - * Main loop for SPEs: Get a command, execute it, repeat. - */ -void -command_loop(void) -{ - int exitFlag = 0; - uint t0, t1; - - D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); - - while (!exitFlag) { - unsigned opcode; - - D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); - - if (PERF) - spu_write_decrementer(~0); - - /* read/wait from mailbox */ - opcode = (unsigned int) spu_read_in_mbox(); - D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); - - if (PERF) - t0 = spu_read_decrementer(); - - switch (opcode & CELL_CMD_OPCODE_MASK) { - case CELL_CMD_EXIT: - D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); - exitFlag = 1; - break; - case CELL_CMD_VS_EXECUTE: -#if 0 - spu_execute_vertex_shader(&draw, &cmd.vs); -#endif - break; - case CELL_CMD_BATCH: - cmd_batch(opcode); - break; - default: - printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); - } - - if (PERF) { - t1 = spu_read_decrementer(); - printf("wait mbox time: %gms batch time: %gms\n", - (~0u - t0) * spu.init.inv_timebase, - (t0 - t1) * spu.init.inv_timebase); - } - } - - D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); - - if (spu.init.debug_flags & CELL_DEBUG_CACHE) - spu_dcache_report(); -} - -/* Initialize this module; we manage the fragment ops buffer here. */ -void -spu_command_init(void) -{ - /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function - * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. - */ - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; - - /* Set up the basic empty buffer for code-gen'ed fragment ops */ - spu.fragment_ops_code = NULL; - spu.fragment_ops_code_size = 0; -} - -void -spu_command_close(void) -{ - /* Deallocate the code-gen buffer for fragment ops, and reset the - * fragment ops functions to their initial setting (just to leave - * things in a good state). - */ - if (spu.fragment_ops_code != NULL) { - free(spu.fragment_ops_code); - } - spu_command_init(); -} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h deleted file mode 100644 index 83dcdade288..00000000000 --- a/src/gallium/drivers/cell/spu/spu_command.h +++ /dev/null @@ -1,35 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -extern void -command_loop(void); - -extern void -spu_command_init(void); - -extern void -spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c deleted file mode 100644 index a6d67634fd8..00000000000 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "cell/common.h" -#include "spu_main.h" -#include "spu_dcache.h" - -#define CACHELINE_LOG2SIZE 7 -#define LINE_SIZE (1U << 7) -#define ALIGN_MASK (~(LINE_SIZE - 1)) - -#define CACHE_NAME data -#define CACHED_TYPE qword -#define CACHE_TYPE CACHE_TYPE_RO -#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) -#define CACHE_LOG2NNWAY 2 -#define CACHE_LOG2NSETS 6 -#ifdef DEBUG -#define CACHE_STATS 1 -#endif -#include - -/* Yes folks, this is ugly. - */ -#undef CACHE_NWAY -#undef CACHE_NSETS -#define CACHE_NAME data -#define CACHE_NWAY 4 -#define CACHE_NSETS (1U << 6) - - -/** - * Fetch between arbitrary number of bytes from an unaligned address - * - * \param dst Destination data buffer - * \param ea Main memory effective address of source data - * \param size Number of bytes to read - * - * \warning - * As is hinted by the type of the \c dst pointer, this function writes - * multiples of 16-bytes. - */ -void -spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) -{ - const int shift = ea & 0x0f; - const unsigned read_size = ROUNDUP16(size + shift); - const unsigned last_read = ROUNDUP16(ea + size); - const qword *const last_write = dst + (ROUNDUP16(size) / 16); - unsigned i; - - - if (shift == 0) { - /* Data is already aligned. Fetch directly into the destination buffer. - */ - for (i = 0; i < size; i += 16) { - *(dst++) = cache_rd(data, ea + i); - } - } else { - qword hi; - - - /* Please exercise extreme caution when modifying this code. This code - * must not read past the end of the page containing the source data, - * and it must not write more than ((size + 15) / 16) qwords to the - * destination buffer. - */ - ea &= ~0x0f; - hi = cache_rd(data, ea); - for (i = 16; i < read_size; i += 16) { - qword lo = cache_rd(data, ea + i); - - *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), - (qword) spu_rlmaskqwbyte(lo, shift - 16)); - hi = lo; - } - - if (dst != last_write) { - *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); - } - } - - ASSERT((ea + i) == last_read); - ASSERT(dst == last_write); -} - - -/** - * Notify the cache that a range of main memory may have been modified - */ -void -spu_dcache_mark_dirty(unsigned ea, unsigned size) -{ - unsigned i; - const unsigned aligned_start = (ea & ALIGN_MASK); - const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) - & ALIGN_MASK; - - - for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { - const unsigned entry = __cache_dir[i]; - const unsigned addr = entry & ~0x0f; - - __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) - ? (entry & ~CACHELINE_VALID) : entry; - } -} - - -/** - * Print cache utilization report - */ -void -spu_dcache_report(void) -{ -#ifdef CACHE_STATS - if (spu.init.id == 0) { - printf("SPU 0: Texture cache report:\n"); - cache_pr_stats(data); - } -#endif -} - - diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h deleted file mode 100644 index 39a19eb31b5..00000000000 --- a/src/gallium/drivers/cell/spu/spu_dcache.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SPU_DCACHE_H -#define SPU_DCACHE_H - -extern void -spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); - -extern void -spu_dcache_mark_dirty(unsigned ea, unsigned size); - -extern void -spu_dcache_report(void); - -#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c deleted file mode 100644 index e4ebeb595ce..00000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ /dev/null @@ -1,1870 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI interpretor/executor. - * - * Flow control information: - * - * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) - * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special - * care since a condition may be true for some quad components but false - * for other components. - * - * We basically execute all statements (even if they're in the part of - * an IF/ELSE clause that's "not taken") and use a special mask to - * control writing to destination registers. This is the ExecMask. - * See store_dest(). - * - * The ExecMask is computed from three other masks (CondMask, LoopMask and - * ContMask) which are controlled by the flow control instructions (namely: - * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). - * - * - * Authors: - * Michal Krol - * Brian Paul - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/tgsi_parse.h" -#include "tgsi/tgsi_util.h" -#include "spu_exec.h" -#include "spu_main.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" -#include "cell/common.h" - -#define TILE_TOP_LEFT 0 -#define TILE_TOP_RIGHT 1 -#define TILE_BOTTOM_LEFT 2 -#define TILE_BOTTOM_RIGHT 3 - -/* - * Shorthand locations of various utility registers (_I = Index, _C = Channel) - */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C -#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I -#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C -#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I -#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C -#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I -#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) - - -/** The execution mask depends on the conditional mask and the loop mask */ -#define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - -/** - * Initialize machine state by expanding tokens to full instructions, - * allocating temporary storage, setting up constants, etc. - * After this, we can call spu_exec_machine_run() many times. - */ -void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor) -{ - const qword zero = si_il(0); - const qword not_zero = si_il(~0); - - (void) numSamplers; - mach->Samplers = samplers; - mach->Processor = processor; - mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; - - /* Setup constants. */ - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); - - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); -} - - -static INLINE qword -micro_abs(qword src) -{ - return si_rotmi(si_shli(src, 1), -1); -} - -static INLINE qword -micro_ceil(qword src) -{ - return (qword) _ceilf4((vec_float4) src); -} - -static INLINE qword -micro_cos(qword src) -{ - return (qword) _cosf4((vec_float4) src); -} - -static const qword br_shuf = { - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, - TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, - TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, -}; - -static const qword bl_shuf = { - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, - TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, - TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, -}; - -static const qword tl_shuf = { - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, - TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, - TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, -}; - -static qword -micro_ddx(qword src) -{ - qword bottom_right = si_shufb(src, src, br_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(bottom_right, bottom_left); -} - -static qword -micro_ddy(qword src) -{ - qword top_left = si_shufb(src, src, tl_shuf); - qword bottom_left = si_shufb(src, src, bl_shuf); - - return si_fs(top_left, bottom_left); -} - -static INLINE qword -micro_div(qword src0, qword src1) -{ - return (qword) _divf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_flr(qword src) -{ - return (qword) _floorf4((vec_float4) src); -} - -static qword -micro_frc(qword src) -{ - return si_fs(src, (qword) _floorf4((vec_float4) src)); -} - -static INLINE qword -micro_ge(qword src0, qword src1) -{ - return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); -} - -static qword -micro_lg2(qword src) -{ - return (qword) _log2f4((vec_float4) src); -} - -static INLINE qword -micro_lt(qword src0, qword src1) -{ - const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); - - return si_xori(tmp, 0xff); -} - -static INLINE qword -micro_max(qword src0, qword src1) -{ - return si_selb(src1, src0, si_fcgt(src0, src1)); -} - -static INLINE qword -micro_min(qword src0, qword src1) -{ - return si_selb(src0, src1, si_fcgt(src0, src1)); -} - -static qword -micro_neg(qword src) -{ - return si_xor(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_set_sign(qword src) -{ - return si_or(src, (qword) spu_splats(0x80000000)); -} - -static qword -micro_pow(qword src0, qword src1) -{ - return (qword) _powf4((vec_float4) src0, (vec_float4) src1); -} - -static qword -micro_rnd(qword src) -{ - const qword half = (qword) spu_splats(0.5f); - - /* May be able to use _roundf4. There may be some difference, though. - */ - return (qword) _floorf4((vec_float4) si_fa(src, half)); -} - -static INLINE qword -micro_ishr(qword src0, qword src1) -{ - return si_rotma(src0, si_sfi(src1, 0)); -} - -static qword -micro_trunc(qword src) -{ - return (qword) _truncf4((vec_float4) src); -} - -static qword -micro_sin(qword src) -{ - return (qword) _sinf4((vec_float4) src); -} - -static INLINE qword -micro_sqrt(qword src) -{ - return (qword) _sqrtf4((vec_float4) src); -} - -static void -fetch_src_file_channel( - const struct spu_exec_machine *mach, - const uint file, - const uint swizzle, - const union spu_exec_channel *index, - union spu_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_SWIZZLE_X: - case TGSI_SWIZZLE_Y: - case TGSI_SWIZZLE_Z: - case TGSI_SWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: { - unsigned i; - - for (i = 0; i < 4; i++) { - const float *ptr = mach->Consts[index->i[i]]; - float tmp[4]; - - spu_dcache_fetch_unaligned((qword *) tmp, - (uintptr_t)(ptr + swizzle), - sizeof(float)); - - chan->f[i] = tmp[0]; - } - break; - } - - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_TEMPORARY: - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_IMMEDIATE: - ASSERT( index->i[0] < (int) mach->ImmLimit ); - ASSERT( index->i[1] < (int) mach->ImmLimit ); - ASSERT( index->i[2] < (int) mach->ImmLimit ); - ASSERT( index->i[3] < (int) mach->ImmLimit ); - - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - default: - ASSERT( 0 ); - } - break; - - default: - ASSERT( 0 ); - } -} - -static void -fetch_source( - const struct spu_exec_machine *mach, - union spu_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union spu_exec_channel index; - uint swizzle; - - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->Register.Index; - - if (reg->Register.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->Indirect.Index; - - swizzle = tgsi_util_get_src_register_swizzle(®->Indirect, - CHAN_X); - fetch_src_file_channel( - mach, - reg->Indirect.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - - if( reg->Register.Dimension ) { - switch( reg->Register.File ) { - case TGSI_FILE_INPUT: - index.q = si_mpyi(index.q, 17); - break; - case TGSI_FILE_CONSTANT: - index.q = si_shli(index.q, 12); - break; - default: - ASSERT( 0 ); - } - - index.i[0] += reg->Dimension.Index; - index.i[1] += reg->Dimension.Index; - index.i[2] += reg->Dimension.Index; - index.i[3] += reg->Dimension.Index; - - if (reg->Dimension.Indirect) { - union spu_exec_channel index2; - union spu_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->DimIndirect.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); - fetch_src_file_channel( - mach, - reg->DimIndirect.File, - swizzle, - &index2, - &indir_index ); - - index.q = si_a(index.q, indir_index.q); - } - } - - swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->Register.File, - swizzle, - &index, - chan ); - - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - chan->q = micro_abs(chan->q); - break; - - case TGSI_UTIL_SIGN_SET: - chan->q = micro_set_sign(chan->q); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - chan->q = micro_neg(chan->q); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - if (reg->RegisterExtMod.Complement) { - chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); - } -} - -static void -store_dest( - struct spu_exec_machine *mach, - const union spu_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) -{ - union spu_exec_channel *dst; - - switch( reg->Register.File ) { - case TGSI_FILE_NULL: - return; - - case TGSI_FILE_OUTPUT: - dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->Register.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - dst = &mach->Temps[reg->Register.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index]; - break; - - default: - ASSERT( 0 ); - return; - } - - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; - break; - - case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - ASSERT( 0 ); - break; - - default: - ASSERT( 0 ); - } -} - -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->Src[INDEX], CHAN) - -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN ) - - -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ -static void -exec_kil(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union spu_exec_channel r[1]; - - /* This mask stores component bits that were already tested. */ - uniquemask = 0; - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_swizzle ( - &inst->Src[0], - chan_index); - - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; - - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; - } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - -/** - * Execute NVIDIA-style KIL which is predicated by a condition code. - * Kill fragment if the condition code is TRUE. - */ -static void -exec_kilp(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - - /* TODO: build kilmask from CC mask */ - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - -/* - * Fetch a texel using STR texture coordinates. - */ -static void -fetch_texel( struct spu_sampler *sampler, - const union spu_exec_channel *s, - const union spu_exec_channel *t, - const union spu_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union spu_exec_channel *r, - union spu_exec_channel *g, - union spu_exec_channel *b, - union spu_exec_channel *a ) -{ - qword rgba[4]; - qword out[4]; - - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, - (float (*)[4]) rgba); - - _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); - r->q = out[0]; - g->q = out[1]; - b->q = out[2]; - a->q = out[3]; -} - - -static void -exec_tex(struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod, boolean projected) -{ - const uint unit = inst->Src[1].Register.Index; - union spu_exec_channel r[8]; - uint chan_index; - float lodBias; - - /* printf("Sampler %u unit %u\n", sampler, unit); */ - - switch (inst->InstructionExtTexture.Texture) { - case TGSI_TEXTURE_1D: - - FETCH(&r[0], 0, CHAN_X); - - if (projected) { - FETCH(&r[1], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[1].q); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; - - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; - - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - r[0].q = micro_div(r[0].q, r[3].q); - r[1].q = micro_div(r[1].q, r[3].q); - r[2].q = micro_div(r[2].q, r[3].q); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; - - default: - ASSERT (0); - } - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } -} - - - -static void -constant_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } -} - -static void -linear_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -static void -perspective_interpolation( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void (* interpolation_func)( - struct spu_exec_machine *mach, - unsigned attrib, - unsigned chan ); - -static void -exec_declaration(struct spu_exec_machine *mach, - const struct tgsi_full_declaration *decl) -{ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - interpolation_func interp; - - first = decl->Range.First; - last = decl->Range.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - interp = constant_interpolation; - break; - - case TGSI_INTERPOLATE_LINEAR: - interp = linear_interpolation; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - interp = perspective_interpolation; - break; - - default: - ASSERT( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - interp( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - interp( mach, i, j ); - } - } - } - } - } - } -} - -static void -exec_instruction( - struct spu_exec_machine *mach, - const struct tgsi_full_instruction *inst, - int *pc ) -{ - uint chan_index; - union spu_exec_channel r[8]; - - (*pc)++; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_cflts(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - - FETCH( &r[2], 0, CHAN_W ); - r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); - r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); - r[1].q = micro_pow(r[1].q, r[2].q); - - /* r0 = (r0 > 0.0) ? r1 : 0.0 - */ - r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); - r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, - r[0].q); - STORE( &r[0], 0, CHAN_Z ); - } - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_RCP: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sqrt(r[0].q); - r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - ASSERT (0); - break; - - case TGSI_OPCODE_LOG: - ASSERT (0); - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fm(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fa(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - r[0].q = si_fm(r[0].q, r[1].q); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_min(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = micro_max(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = micro_ge(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ge(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - FETCH( &r[2], 2, chan_index ); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - r[0].q = si_fs(r[0].q, r[1].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_LRP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - r[1].q = si_fs(r[1].q, r[2].q); - r[0].q = si_fma(r[0].q, r[1].q, r[2].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - ASSERT (0); - break; - - case TGSI_OPCODE_DP2A: - ASSERT (0); - break; - - case TGSI_OPCODE_FRC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_frc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - ASSERT (0); - break; - - case TGSI_OPCODE_FLR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_flr(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_rnd(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EX2: - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LG2: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_lg2(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_POW: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = micro_pow(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_XPD: - /* TGSI_OPCODE_XPD */ - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - /* r2 = (r0 * r1) - (r3 * r5) - */ - r[2].q = si_fm(r[3].q, r[5].q); - r[2].q = si_fms(r[0].q, r[1].q, r[2].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } - - FETCH(&r[2], 1, CHAN_X); - FETCH(&r[5], 0, CHAN_X); - - /* r3 = (r3 * r2) - (r1 * r5) - */ - r[1].q = si_fm(r[1].q, r[5].q); - r[3].q = si_fms(r[3].q, r[2].q, r[1].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } - - /* r5 = (r5 * r4) - (r0 * r2) - */ - r[0].q = si_fm(r[0].q, r[2].q); - r[5].q = si_fms(r[5].q, r[4].q, r[0].q); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - r[0].q = micro_abs(r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - ASSERT (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FETCH(&r[1], 1, CHAN_W); - - r[0].q = si_fa(r[0].q, r[1].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - r[0].q = micro_cos(r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddx(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ddy(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_KILP: - exec_kilp (mach, inst); - break; - - case TGSI_OPCODE_KIL: - exec_kil (mach, inst); - break; - - case TGSI_OPCODE_PK2H: - ASSERT (0); - break; - - case TGSI_OPCODE_PK2US: - ASSERT (0); - break; - - case TGSI_OPCODE_PK4B: - ASSERT (0); - break; - - case TGSI_OPCODE_PK4UB: - ASSERT (0); - break; - - case TGSI_OPCODE_RFL: - ASSERT (0); - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SFL: - ASSERT (0); - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_fcgt(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - r[0].q = micro_sin(r[0].q); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fcgt(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_fceq(r[0].q, r[1].q); - r[0].q = si_xori(r[0].q, 0xff); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_STR: - ASSERT (0); - break; - - case TGSI_OPCODE_TEX: - /* simple texture lookup */ - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); - break; - - case TGSI_OPCODE_TXB: - /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXD: - /* Texture lookup with explict partial derivatives */ - /* src[0] = texcoord */ - /* src[1] = d[strq]/dx */ - /* src[2] = d[strq]/dy */ - /* src[3] = sampler unit */ - ASSERT (0); - break; - - case TGSI_OPCODE_TXL: - /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = load bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXP: - /* Texture lookup with projection */ - /* src[0] = texcoord (src[0].w = projection) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, TRUE); - break; - - case TGSI_OPCODE_UP2H: - ASSERT (0); - break; - - case TGSI_OPCODE_UP2US: - ASSERT (0); - break; - - case TGSI_OPCODE_UP4B: - ASSERT (0); - break; - - case TGSI_OPCODE_UP4UB: - ASSERT (0); - break; - - case TGSI_OPCODE_X2D: - ASSERT (0); - break; - - case TGSI_OPCODE_ARA: - ASSERT (0); - break; - - case TGSI_OPCODE_ARR: - ASSERT (0); - break; - - case TGSI_OPCODE_BRA: - ASSERT (0); - break; - - case TGSI_OPCODE_CAL: - /* skip the call if no execution channels are enabled */ - if (mach->ExecMask) { - /* do the call */ - - /* push the Cond, Loop, Cont stacks */ - ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - - ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); - mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; - *pc = inst->InstructionExtLabel.Label; - } - break; - - case TGSI_OPCODE_RET: - mach->FuncMask &= ~mach->ExecMask; - UPDATE_EXEC_MASK(mach); - - if (mach->ExecMask == 0x0) { - /* really return now (otherwise, keep executing */ - - if (mach->CallStackTop == 0) { - /* returning from main() */ - *pc = -1; - return; - } - *pc = mach->CallStack[--mach->CallStackTop]; - - /* pop the Cond, Loop, Cont stacks */ - ASSERT(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - ASSERT(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - ASSERT(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - ASSERT(mach->FuncStackTop > 0); - mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; - - UPDATE_EXEC_MASK(mach); - } - break; - - case TGSI_OPCODE_SSG: - ASSERT (0); - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - /* r0 = (r0 < 0.0) ? r1 : r2 - */ - r[3].q = si_xor(r[3].q, r[3].q); - r[0].q = micro_lt(r[0].q, r[3].q); - r[0].q = si_selb(r[1].q, r[2].q, r[0].q); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - r[1].q = micro_cos(r[0].q); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - r[1].q = micro_sin(r[0].q); - STORE( &r[1], 0, CHAN_Y ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_NRM: - ASSERT (0); - break; - - case TGSI_OPCODE_DIV: - ASSERT( 0 ); - break; - - case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - r[0].q = si_fm(r[0].q, r[1].q); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - r[0].q = si_fma(r[1].q, r[2].q, r[0].q); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_IF: - /* push CondMask */ - ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - FETCH( &r[0], 0, CHAN_X ); - /* update CondMask */ - if( ! r[0].u[0] ) { - mach->CondMask &= ~0x1; - } - if( ! r[0].u[1] ) { - mach->CondMask &= ~0x2; - } - if( ! r[0].u[2] ) { - mach->CondMask &= ~0x4; - } - if( ! r[0].u[3] ) { - mach->CondMask &= ~0x8; - } - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ELSE */ - break; - - case TGSI_OPCODE_ELSE: - /* invert CondMask wrt previous mask */ - { - uint prevMask; - ASSERT(mach->CondStackTop > 0); - prevMask = mach->CondStack[mach->CondStackTop - 1]; - mach->CondMask = ~mach->CondMask & prevMask; - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ENDIF */ - } - break; - - case TGSI_OPCODE_ENDIF: - /* pop CondMask */ - ASSERT(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_END: - /* halt execution */ - *pc = -1; - break; - - case TGSI_OPCODE_PUSHA: - ASSERT (0); - break; - - case TGSI_OPCODE_POPA: - ASSERT (0); - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_ceil(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_csflt(r[0].q, 0); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = si_xorbi(r[0].q, 0xff); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - r[0].q = micro_trunc(r[0].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - - r[0].q = si_shl(r[0].q, r[1].q); - - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ISHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = micro_ishr(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_and(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_or(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOD: - ASSERT (0); - break; - - case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - r[0].q = si_xor(r[0].q, r[1].q); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SAD: - ASSERT (0); - break; - - case TGSI_OPCODE_TXF: - ASSERT (0); - break; - - case TGSI_OPCODE_TXQ: - ASSERT (0); - break; - - case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; - break; - - case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; - break; - - case TGSI_OPCODE_BGNLOOP: - /* push LoopMask and ContMasks */ - ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - break; - - case TGSI_OPCODE_ENDLOOP: - /* Restore ContMask, but don't pop */ - ASSERT(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - if (mach->LoopMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; - } - else { - /* exit loop: pop LoopMask */ - ASSERT(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - ASSERT(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_CONT: - /* turn off cont channels for each enabled exec channel */ - mach->ContMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BGNSUB: - /* no-op */ - break; - - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - - case TGSI_OPCODE_NOP: - break; - - default: - ASSERT( 0 ); - } -} - - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -spu_exec_machine_run( struct spu_exec_machine *mach ) -{ - uint i; - int pc = 0; - - mach->CondMask = 0xf; - mach->LoopMask = 0xf; - mach->ContMask = 0xf; - mach->FuncMask = 0xf; - mach->ExecMask = 0xf; - - mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ - ASSERT(mach->CondStackTop == 0); - ASSERT(mach->LoopStackTop == 0); - ASSERT(mach->ContStackTop == 0); - ASSERT(mach->CallStackTop == 0); - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; - } - - - /* execute declarations (interpolants) */ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - for (i = 0; i < mach->NumDeclarations; i++) { - PIPE_ALIGN_VAR(16) - union { - struct tgsi_full_declaration decl; - qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; - } d; - unsigned ea = (unsigned) (mach->Declarations + pc); - - spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); - - exec_declaration( mach, &d.decl ); - } - } - - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - PIPE_ALIGN_VAR(16) - union { - struct tgsi_full_instruction inst; - qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; - } i; - unsigned ea = (unsigned) (mach->Instructions + pc); - - spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); - exec_instruction( mach, & i.inst, &pc ); - } - -#if 0 - /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - /* - * Scale back depth component. - */ - for (i = 0; i < 4; i++) - mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; - } -#endif - - return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; -} - - diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h deleted file mode 100644 index 68f4479e53d..00000000000 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ /dev/null @@ -1,173 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#if !defined SPU_EXEC_H -#define SPU_EXEC_H - -#include "pipe/p_compiler.h" - -#include "spu_tgsi_exec.h" - -#if defined __cplusplus -extern "C" { -#endif - -/** - * Registers may be treated as float, signed int or unsigned int. - */ -union spu_exec_channel -{ - float f[QUAD_SIZE]; - int i[QUAD_SIZE]; - unsigned u[QUAD_SIZE]; - qword q; -}; - -/** - * A vector[RGBA] of channels[4 pixels] - */ -struct spu_exec_vector -{ - union spu_exec_channel xyzw[NUM_CHANNELS]; -}; - -/** - * For fragment programs, information for computing fragment input - * values from plane equation of the triangle/line. - */ -struct spu_interp_coef -{ - float a0[NUM_CHANNELS]; /* in an xyzw layout */ - float dadx[NUM_CHANNELS]; - float dady[NUM_CHANNELS]; -}; - - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - -/** - * Information for sampling textures, which must be implemented - * by code outside the TGSI executor. - */ -struct spu_sampler -{ - const struct pipe_sampler_state *state; - struct pipe_resource *texture; - /** Get samples for four fragments in a quad */ - void (*get_samples)(struct spu_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; -}; - - -/** - * Run-time virtual machine state for executing TGSI shader. - */ -struct spu_exec_machine -{ - /* - * 32 program temporaries - * 4 internal temporaries - * 1 address - */ - PIPE_ALIGN_VAR(16) - struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS - + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; - - struct spu_exec_vector *Addrs; - - struct spu_sampler *Samplers; - - float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - unsigned ImmLimit; - float (*Consts)[4]; - struct spu_exec_vector *Inputs; - struct spu_exec_vector *Outputs; - unsigned Processor; - - /* GEOMETRY processor only. */ - unsigned *Primitives; - - /* FRAGMENT processor only. */ - const struct spu_interp_coef *InterpCoefs; - struct spu_exec_vector QuadPos; - - /* Conditional execution masks */ - uint CondMask; /**< For IF/ELSE/ENDIF */ - uint LoopMask; /**< For BGNLOOP/ENDLOOP */ - uint ContMask; /**< For loop CONT statements */ - uint FuncMask; /**< For function calls */ - uint ExecMask; /**< = CondMask & LoopMask */ - - /** Condition mask stack (for nested conditionals) */ - uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; - int CondStackTop; - - /** Loop mask stack (for nested loops) */ - uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int LoopStackTop; - - /** Loop continue mask stack (see comments in tgsi_exec.c) */ - uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int ContStackTop; - - /** Function execution mask stack (for executing subroutine code) */ - uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; - int FuncStackTop; - - /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; - int CallStackTop; - - struct tgsi_full_instruction *Instructions; - uint NumInstructions; - - struct tgsi_full_declaration *Declarations; - uint NumDeclarations; -}; - - -extern void -spu_exec_machine_init(struct spu_exec_machine *mach, - uint numSamplers, - struct spu_sampler *samplers, - unsigned processor); - -extern uint -spu_exec_machine_run( struct spu_exec_machine *mach ); - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c deleted file mode 100644 index 98919c43ffc..00000000000 --- a/src/gallium/drivers/cell/spu/spu_funcs.c +++ /dev/null @@ -1,173 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * SPU functions accessed by shaders. - * - * Authors: Brian Paul - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cell/common.h" -#include "spu_main.h" -#include "spu_funcs.h" -#include "spu_texture.h" - - -/** For "return"-ing four vectors */ -struct vec_4x4 -{ - vector float v[4]; -}; - - -static vector float -spu_cos(vector float x) -{ - return _cos14_v(x); -} - -static vector float -spu_sin(vector float x) -{ - return _sin14_v(x); -} - -static vector float -spu_pow(vector float x, vector float y) -{ - return _powf4(x, y); -} - -static vector float -spu_exp2(vector float x) -{ - return _exp2f4(x); -} - -static vector float -spu_log2(vector float x) -{ - return _log2f4(x); -} - - -static struct vec_4x4 -spu_tex_2d(vector float s, vector float t, vector float r, vector float q, - unsigned unit) -{ - struct vec_4x4 colors; - (void) r; - (void) q; - spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); - return colors; -} - -static struct vec_4x4 -spu_tex_3d(vector float s, vector float t, vector float r, vector float q, - unsigned unit) -{ - struct vec_4x4 colors; - (void) r; - (void) q; - spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); - return colors; -} - -static struct vec_4x4 -spu_tex_cube(vector float s, vector float t, vector float r, vector float q, - unsigned unit) -{ - struct vec_4x4 colors; - (void) q; - sample_texture_cube(s, t, r, unit, colors.v); - return colors; -} - - -/** - * Add named function to list of "exported" functions that will be - * made available to the PPU-hosted code generator. - */ -static void -export_func(struct cell_spu_function_info *spu_functions, - const char *name, void *addr) -{ - uint n = spu_functions->num; - ASSERT(strlen(name) < 16); - strcpy(spu_functions->names[n], name); - spu_functions->addrs[n] = (uint) addr; - spu_functions->num++; - ASSERT(spu_functions->num <= 16); -} - - -/** - * Return info about the SPU's function to the PPU / main memory. - * The PPU needs to know the address of some SPU-side functions so - * that we can generate shader code with function calls. - */ -void -return_function_info(void) -{ - PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs; - int tag = TAG_MISC; - - ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ - - funcs.num = 0; - export_func(&funcs, "spu_cos", &spu_cos); - export_func(&funcs, "spu_sin", &spu_sin); - export_func(&funcs, "spu_pow", &spu_pow); - export_func(&funcs, "spu_exp2", &spu_exp2); - export_func(&funcs, "spu_log2", &spu_log2); - export_func(&funcs, "spu_tex_2d", &spu_tex_2d); - export_func(&funcs, "spu_tex_3d", &spu_tex_3d); - export_func(&funcs, "spu_tex_cube", &spu_tex_cube); - - /* Send the function info back to the PPU / main memory */ - mfc_put((void *) &funcs, /* src in local store */ - (unsigned int) spu.init.spu_functions, /* dst in main memory */ - sizeof(funcs), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << tag); -} - - - diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h deleted file mode 100644 index 3adb6ae99f9..00000000000 --- a/src/gallium/drivers/cell/spu/spu_funcs.h +++ /dev/null @@ -1,35 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_FUNCS_H -#define SPU_FUNCS_H - -extern void -return_function_info(void); - -#endif - diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c deleted file mode 100644 index 97c86d194da..00000000000 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ /dev/null @@ -1,117 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/* main() for Cell SPU code */ - - -#include -#include - -#include "pipe/p_defines.h" - -#include "spu_funcs.h" -#include "spu_command.h" -#include "spu_main.h" -#include "spu_per_fragment_op.h" -#include "spu_texture.h" -//#include "spu_test.h" -#include "cell/common.h" - - -/* -helpful headers: -/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h -/opt/cell/sdk/usr/include/libmisc.h -*/ - -struct spu_global spu; - - -static void -one_time_init(void) -{ - memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); - memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); - invalidate_tex_cache(); -} - -/* In some versions of the SDK the SPE main takes 'unsigned long' as a - * parameter. In others it takes 'unsigned long long'. Use a define to - * select between the two. - */ -#ifdef SPU_MAIN_PARAM_LONG_LONG -typedef unsigned long long main_param_t; -#else -typedef unsigned long main_param_t; -#endif - -/** - * SPE entrypoint. - */ -int -main(main_param_t speid, main_param_t argp) -{ - int tag = 0; - - (void) speid; - - ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); - ASSERT(sizeof(struct cell_command_render) % 8 == 0); - ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); - - one_time_init(); - spu_command_init(); - - D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); - D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); - - /* get initialization data */ - mfc_get(&spu.init, /* dest */ - (unsigned int) argp, /* src */ - sizeof(struct cell_init_info), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - if (spu.init.id == 0) { - return_function_info(); - } - -#if 0 - if (spu.init.id==0) - spu_test_misc(spu.init.id); -#endif - - command_loop(); - - spu_command_close(); - - return 0; -} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h deleted file mode 100644 index a9d72f84d56..00000000000 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ /dev/null @@ -1,269 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_MAIN_H -#define SPU_MAIN_H - - -#include - -#include "cell/common.h" -#include "draw/draw_vertex.h" -#include "pipe/p_state.h" - - -#if DEBUG -/* These debug macros use the unusual construction ", ##__VA_ARGS__" - * which expands to the expected comma + args if variadic arguments - * are supplied, but swallows the comma if there are no variadic - * arguments (which avoids syntax errors that would otherwise occur). - */ -#define D_PRINTF(flag, format,...) \ - if (spu.init.debug_flags & (flag)) \ - printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) -#else -#define D_PRINTF(...) -#endif - - -/** - * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. - * The data may be addressed through several different types. - */ -typedef union { - ushort us[TILE_SIZE][TILE_SIZE]; - uint ui[TILE_SIZE][TILE_SIZE]; - vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; - vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; -} tile_t; - - -#define TILE_STATUS_CLEAR 1 -#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ -#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ -#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ -#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ - - -/** Function for sampling textures */ -typedef void (*spu_sample_texture_2d_func)(vector float s, - vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -/** Function for performing per-fragment ops */ -typedef void (*spu_fragment_ops_func)(uint x, uint y, - tile_t *colorTile, - tile_t *depthStencilTile, - vector float fragZ, - vector float fragRed, - vector float fragGreen, - vector float fragBlue, - vector float fragAlpha, - vector unsigned int mask); - -/** Function for running fragment program */ -typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, - vector float *outputs, - vector float *constants); - - -PIPE_ALIGN_TYPE(16, -struct spu_framebuffer -{ - void *color_start; /**< addr of color surface in main memory */ - void *depth_start; /**< addr of depth surface in main memory */ - enum pipe_format color_format; - enum pipe_format depth_format; - uint width; /**< width in pixels */ - uint height; /**< height in pixels */ - uint width_tiles; /**< width in tiles */ - uint height_tiles; /**< width in tiles */ - - uint color_clear_value; - uint depth_clear_value; - - uint zsize; /**< 0, 2 or 4 bytes per Z */ - float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ -}); - - -/** per-texture level info */ -PIPE_ALIGN_TYPE(16, -struct spu_texture_level -{ - void *start; - ushort width; - ushort height; - ushort depth; - ushort tiles_per_row; - uint bytes_per_image; - /** texcoord scale factors */ - vector float scale_s; - vector float scale_t; - vector float scale_r; - /** texcoord masks (if REPEAT then size-1, else ~0) */ - vector signed int mask_s; - vector signed int mask_t; - vector signed int mask_r; - /** texcoord clamp limits */ - vector signed int max_s; - vector signed int max_t; - vector signed int max_r; -}); - - -PIPE_ALIGN_TYPE(16, -struct spu_texture -{ - struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; - uint max_level; - uint target; /**< PIPE_TEXTURE_x */ -}); - - -/** - * All SPU global/context state will be in a singleton object of this type: - */ -PIPE_ALIGN_TYPE(16, -struct spu_global -{ - /** One-time init/constant info */ - struct cell_init_info init; - - /* - * Current state - */ - struct spu_framebuffer fb; - struct pipe_depth_stencil_alpha_state depth_stencil_alpha; - struct pipe_blend_state blend; - struct pipe_blend_color blend_color; - struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; - struct pipe_rasterizer_state rasterizer; - struct spu_texture texture[PIPE_MAX_SAMPLERS]; - struct vertex_info vertex_info; - - /** Current color and Z tiles */ - PIPE_ALIGN_VAR(16) tile_t ctile; - PIPE_ALIGN_VAR(16) tile_t ztile; - - /** Read depth/stencil tiles? */ - boolean read_depth_stencil; - - /** Current tiles' status */ - ubyte cur_ctile_status; - ubyte cur_ztile_status; - - /** Status of all tiles in framebuffer */ - PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; - PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE]; - - /** Current fragment ops machine code, at 8-byte boundary */ - uint *fragment_ops_code; - uint fragment_ops_code_size; - /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ - spu_fragment_ops_func fragment_ops[2]; - - /** Current fragment program machine code, at 8-byte boundary */ - PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; - /** Current fragment ops function */ - spu_fragment_program_func fragment_program; - - /** Current texture sampler function */ - spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; - spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; - spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; - - /** Fragment program constants */ - vector float constants[4 * CELL_MAX_CONSTANTS]; - -}); - - -extern struct spu_global spu; - - - -/* DMA TAGS */ - -#define TAG_SURFACE_CLEAR 10 -#define TAG_VERTEX_BUFFER 11 -#define TAG_READ_TILE_COLOR 12 -#define TAG_READ_TILE_Z 13 -#define TAG_WRITE_TILE_COLOR 14 -#define TAG_WRITE_TILE_Z 15 -#define TAG_INDEX_BUFFER 16 -#define TAG_BATCH_BUFFER 17 -#define TAG_MISC 18 -#define TAG_DCACHE0 20 -#define TAG_DCACHE1 21 -#define TAG_DCACHE2 22 -#define TAG_DCACHE3 23 -#define TAG_FENCE 24 - - -static INLINE void -wait_on_mask(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_any(); -} - - -static INLINE void -wait_on_mask_all(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_all(); -} - - - - - -static INLINE void -memset16(ushort *d, ushort value, uint count) -{ - uint i; - for (i = 0; i < count; i++) - d[i] = value; -} - - -static INLINE void -memset32(uint *d, uint value, uint count) -{ - uint i; - for (i = 0; i < count; i++) - d[i] = value; -} - - -#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c deleted file mode 100644 index 2415226a244..00000000000 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ /dev/null @@ -1,631 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \author Brian Paul - */ - - -#include -#include "pipe/p_format.h" -#include "spu_main.h" -#include "spu_colorpack.h" -#include "spu_per_fragment_op.h" - - -#define LINEAR_QUAD_LAYOUT 1 - - -static INLINE vector float -spu_min(vector float a, vector float b) -{ - vector unsigned int m; - m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ - return spu_sel(a, b, m); -} - - -static INLINE vector float -spu_max(vector float a, vector float b) -{ - vector unsigned int m; - m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ - return spu_sel(b, a, m); -} - - -/** - * Called by rasterizer for each quad after the shader has run. Do - * all the per-fragment operations including alpha test, z test, - * stencil test, blend, colormask and logicops. This is a - * fallback/debug function. In reality we'll use a generated function - * produced by the PPU. But this function is useful for - * debug/validation. - */ -void -spu_fallback_fragment_ops(uint x, uint y, - tile_t *colorTile, - tile_t *depthStencilTile, - vector float fragZ, - vector float fragR, - vector float fragG, - vector float fragB, - vector float fragA, - vector unsigned int mask) -{ - vector float frag_aos[4]; - unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ - unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ - - /* - * Do alpha test - */ - if (spu.depth_stencil_alpha.alpha.enabled) { - vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value); - vector unsigned int amask; - - switch (spu.depth_stencil_alpha.alpha.func) { - case PIPE_FUNC_LESS: - amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ - break; - case PIPE_FUNC_GREATER: - amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ - break; - case PIPE_FUNC_GEQUAL: - amask = spu_cmpgt(ref, fragA); - amask = spu_nor(amask, amask); - break; - case PIPE_FUNC_LEQUAL: - amask = spu_cmpgt(fragA, ref); - amask = spu_nor(amask, amask); - break; - case PIPE_FUNC_EQUAL: - amask = spu_cmpeq(ref, fragA); - break; - case PIPE_FUNC_NOTEQUAL: - amask = spu_cmpeq(ref, fragA); - amask = spu_nor(amask, amask); - break; - case PIPE_FUNC_ALWAYS: - amask = spu_splats(0xffffffffU); - break; - case PIPE_FUNC_NEVER: - amask = spu_splats( 0x0U); - break; - default: - ; - } - - mask = spu_and(mask, amask); - } - - - /* - * Z and/or stencil testing... - */ - if (spu.depth_stencil_alpha.depth.enabled || - spu.depth_stencil_alpha.stencil[0].enabled) { - - /* get four Z/Stencil values from tile */ - vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); - vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; - vector unsigned int ifbZ = spu_and(ifbZS, mask24); - vector unsigned int ifbS = spu_andc(ifbZS, mask24); - - if (spu.depth_stencil_alpha.stencil[0].enabled) { - /* do stencil test */ - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT); - - } - else if (spu.depth_stencil_alpha.depth.enabled) { - /* do depth test */ - - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM); - - vector unsigned int ifragZ; - vector unsigned int zmask; - - /* convert four fragZ from float to uint */ - fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); - ifragZ = spu_convtu(fragZ, 0); - - /* do depth comparison, setting zmask with results */ - switch (spu.depth_stencil_alpha.depth.func) { - case PIPE_FUNC_LESS: - zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ - break; - case PIPE_FUNC_GREATER: - zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ - break; - case PIPE_FUNC_GEQUAL: - zmask = spu_cmpgt(ifbZ, ifragZ); - zmask = spu_nor(zmask, zmask); - break; - case PIPE_FUNC_LEQUAL: - zmask = spu_cmpgt(ifragZ, ifbZ); - zmask = spu_nor(zmask, zmask); - break; - case PIPE_FUNC_EQUAL: - zmask = spu_cmpeq(ifbZ, ifragZ); - break; - case PIPE_FUNC_NOTEQUAL: - zmask = spu_cmpeq(ifbZ, ifragZ); - zmask = spu_nor(zmask, zmask); - break; - case PIPE_FUNC_ALWAYS: - zmask = spu_splats(0xffffffffU); - break; - case PIPE_FUNC_NEVER: - zmask = spu_splats( 0x0U); - break; - default: - ; - } - - mask = spu_and(mask, zmask); - - /* merge framebuffer Z and fragment Z according to the mask */ - ifbZ = spu_or(spu_and(ifragZ, mask), - spu_andc(ifbZ, mask)); - } - - if (spu_extract(spu_orx(mask), 0)) { - /* put new fragment Z/Stencil values back into Z/Stencil tile */ - depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); - - spu.cur_ztile_status = TILE_STATUS_DIRTY; - } - } - - - /* - * If we'll need the current framebuffer/tile colors for blending - * or logicop or colormask, fetch them now. - */ - if (spu.blend.rt[0].blend_enable || - spu.blend.logicop_enable || - spu.blend.rt[0].colormask != 0xf) { - -#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ - fbc0 = colorTile->ui[y][x*2+0]; - fbc1 = colorTile->ui[y][x*2+1]; - fbc2 = colorTile->ui[y][x*2+2]; - fbc3 = colorTile->ui[y][x*2+3]; -#else - fbc0 = colorTile->ui[y+0][x+0]; - fbc1 = colorTile->ui[y+0][x+1]; - fbc2 = colorTile->ui[y+1][x+0]; - fbc3 = colorTile->ui[y+1][x+1]; -#endif - } - - - /* - * Do blending - */ - if (spu.blend.rt[0].blend_enable) { - /* blending terms, misc regs */ - vector float term1r, term1g, term1b, term1a; - vector float term2r, term2g, term2b, term2a; - vector float one, tmp; - - vector float fbRGBA[4]; /* current framebuffer colors */ - - /* convert framebuffer colors from packed int to vector float */ - { - vector float temp[4]; /* float colors in AOS form */ - switch (spu.fb.color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - temp[0] = spu_unpack_B8G8R8A8(fbc0); - temp[1] = spu_unpack_B8G8R8A8(fbc1); - temp[2] = spu_unpack_B8G8R8A8(fbc2); - temp[3] = spu_unpack_B8G8R8A8(fbc3); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - temp[0] = spu_unpack_A8R8G8B8(fbc0); - temp[1] = spu_unpack_A8R8G8B8(fbc1); - temp[2] = spu_unpack_A8R8G8B8(fbc2); - temp[3] = spu_unpack_A8R8G8B8(fbc3); - break; - default: - ASSERT(0); - } - _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ - } - - /* - * Compute Src RGB terms (fragment color * factor) - */ - switch (spu.blend.rt[0].rgb_src_factor) { - case PIPE_BLENDFACTOR_ONE: - term1r = fragR; - term1g = fragG; - term1b = fragB; - break; - case PIPE_BLENDFACTOR_ZERO: - term1r = - term1g = - term1b = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term1r = spu_mul(fragR, fragR); - term1g = spu_mul(fragG, fragG); - term1b = spu_mul(fragB, fragB); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term1r = spu_mul(fragR, fragA); - term1g = spu_mul(fragG, fragA); - term1b = spu_mul(fragB, fragA); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - term1r = spu_mul(fragR, fbRGBA[0]); - term1g = spu_mul(fragG, fbRGBA[1]); - term1b = spu_mul(fragB, fbRGBA[1]); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - term1r = spu_mul(fragR, fbRGBA[3]); - term1g = spu_mul(fragG, fbRGBA[3]); - term1b = spu_mul(fragB, fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); - term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); - term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); - term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); - term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Compute Src Alpha term (fragment alpha * factor) - */ - switch (spu.blend.rt[0].alpha_src_factor) { - case PIPE_BLENDFACTOR_ONE: - term1a = fragA; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term1a = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term1a = spu_mul(fragA, fragA); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - term1a = spu_mul(fragA, fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Compute Dest RGB terms (framebuffer color * factor) - */ - switch (spu.blend.rt[0].rgb_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - term2r = fbRGBA[0]; - term2g = fbRGBA[1]; - term2b = fbRGBA[2]; - break; - case PIPE_BLENDFACTOR_ZERO: - term2r = - term2g = - term2b = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term2r = spu_mul(fbRGBA[0], fragR); - term2g = spu_mul(fbRGBA[1], fragG); - term2b = spu_mul(fbRGBA[2], fragB); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term2r = spu_mul(fbRGBA[0], fragA); - term2g = spu_mul(fbRGBA[1], fragA); - term2b = spu_mul(fbRGBA[2], fragA); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - one = spu_splats(1.0f); - tmp = spu_sub(one, fragA); - term2r = spu_mul(fbRGBA[0], tmp); - term2g = spu_mul(fbRGBA[1], tmp); - term2b = spu_mul(fbRGBA[2], tmp); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - term2r = spu_mul(fbRGBA[0], fbRGBA[0]); - term2g = spu_mul(fbRGBA[1], fbRGBA[1]); - term2b = spu_mul(fbRGBA[2], fbRGBA[2]); - break; - case PIPE_BLENDFACTOR_DST_ALPHA: - term2r = spu_mul(fbRGBA[0], fbRGBA[3]); - term2g = spu_mul(fbRGBA[1], fbRGBA[3]); - term2b = spu_mul(fbRGBA[2], fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); - term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); - term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); - break; - case PIPE_BLENDFACTOR_CONST_ALPHA: - term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); - term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); - term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Compute Dest Alpha term (framebuffer alpha * factor) - */ - switch (spu.blend.rt[0].alpha_dst_factor) { - case PIPE_BLENDFACTOR_ONE: - term2a = fbRGBA[3]; - break; - case PIPE_BLENDFACTOR_SRC_COLOR: - term2a = spu_splats(0.0f); - break; - case PIPE_BLENDFACTOR_SRC_ALPHA: - term2a = spu_mul(fbRGBA[3], fragA); - break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - one = spu_splats(1.0f); - tmp = spu_sub(one, fragA); - term2a = spu_mul(fbRGBA[3], tmp); - break; - case PIPE_BLENDFACTOR_DST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_DST_ALPHA: - term2a = spu_mul(fbRGBA[3], fbRGBA[3]); - break; - case PIPE_BLENDFACTOR_CONST_COLOR: - /* fall-through */ - case PIPE_BLENDFACTOR_CONST_ALPHA: - term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); - break; - /* XXX more cases */ - default: - ASSERT(0); - } - - /* - * Combine Src/Dest RGB terms - */ - switch (spu.blend.rt[0].rgb_func) { - case PIPE_BLEND_ADD: - fragR = spu_add(term1r, term2r); - fragG = spu_add(term1g, term2g); - fragB = spu_add(term1b, term2b); - break; - case PIPE_BLEND_SUBTRACT: - fragR = spu_sub(term1r, term2r); - fragG = spu_sub(term1g, term2g); - fragB = spu_sub(term1b, term2b); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - fragR = spu_sub(term2r, term1r); - fragG = spu_sub(term2g, term1g); - fragB = spu_sub(term2b, term1b); - break; - case PIPE_BLEND_MIN: - fragR = spu_min(term1r, term2r); - fragG = spu_min(term1g, term2g); - fragB = spu_min(term1b, term2b); - break; - case PIPE_BLEND_MAX: - fragR = spu_max(term1r, term2r); - fragG = spu_max(term1g, term2g); - fragB = spu_max(term1b, term2b); - break; - default: - ASSERT(0); - } - - /* - * Combine Src/Dest A term - */ - switch (spu.blend.rt[0].alpha_func) { - case PIPE_BLEND_ADD: - fragA = spu_add(term1a, term2a); - break; - case PIPE_BLEND_SUBTRACT: - fragA = spu_sub(term1a, term2a); - break; - case PIPE_BLEND_REVERSE_SUBTRACT: - fragA = spu_sub(term2a, term1a); - break; - case PIPE_BLEND_MIN: - fragA = spu_min(term1a, term2a); - break; - case PIPE_BLEND_MAX: - fragA = spu_max(term1a, term2a); - break; - default: - ASSERT(0); - } - } - - - /* - * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. - */ -#if 0 - /* original code */ - { - vector float frag_soa[4]; - frag_soa[0] = fragR; - frag_soa[1] = fragG; - frag_soa[2] = fragB; - frag_soa[3] = fragA; - _transpose_matrix4x4(frag_aos, frag_soa); - } -#else - /* short-cut relying on function parameter layout: */ - _transpose_matrix4x4(frag_aos, &fragR); - (void) fragG; - (void) fragB; -#endif - - /* - * Pack fragment float colors into 32-bit RGBA words. - */ - switch (spu.fb.color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); - fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); - fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); - fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); - fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); - fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); - fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); - break; - default: - fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); - ASSERT(0); - } - - - /* - * Do color masking - */ - if (spu.blend.rt[0].colormask != 0xf) { - uint cmask = 0x0; /* each byte corresponds to a color channel */ - - /* Form bitmask depending on color buffer format and colormask bits */ - switch (spu.fb.color_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.rt[0].colormask & PIPE_MASK_R) - cmask |= 0x00ff0000; /* red */ - if (spu.blend.rt[0].colormask & PIPE_MASK_G) - cmask |= 0x0000ff00; /* green */ - if (spu.blend.rt[0].colormask & PIPE_MASK_B) - cmask |= 0x000000ff; /* blue */ - if (spu.blend.rt[0].colormask & PIPE_MASK_A) - cmask |= 0xff000000; /* alpha */ - break; - case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.rt[0].colormask & PIPE_MASK_R) - cmask |= 0x0000ff00; /* red */ - if (spu.blend.rt[0].colormask & PIPE_MASK_G) - cmask |= 0x00ff0000; /* green */ - if (spu.blend.rt[0].colormask & PIPE_MASK_B) - cmask |= 0xff000000; /* blue */ - if (spu.blend.rt[0].colormask & PIPE_MASK_A) - cmask |= 0x000000ff; /* alpha */ - break; - default: - ASSERT(0); - } - - /* - * Apply color mask to the 32-bit packed colors. - * if (cmask[i]) - * frag color[i] = frag color[i]; - * else - * frag color[i] = framebuffer color[i]; - */ - fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); - fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); - fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); - fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); - } - - - /* - * Do logic ops - */ - if (spu.blend.logicop_enable) { - /* XXX to do */ - /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ - } - - - /* - * If mask is non-zero, mark tile as dirty. - */ - if (spu_extract(spu_orx(mask), 0)) { - spu.cur_ctile_status = TILE_STATUS_DIRTY; - } - else { - /* write no fragments */ - return; - } - - - /* - * Write new fragment/quad colors to the framebuffer/tile. - * Only write pixels where the corresponding mask word is set. - */ -#if LINEAR_QUAD_LAYOUT - /* - * Quad layout: - * +--+--+--+--+ - * |p0|p1|p2|p3|... - * +--+--+--+--+ - */ - if (spu_extract(mask, 0)) - colorTile->ui[y][x*2] = fragc0; - if (spu_extract(mask, 1)) - colorTile->ui[y][x*2+1] = fragc1; - if (spu_extract(mask, 2)) - colorTile->ui[y][x*2+2] = fragc2; - if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = fragc3; -#else - /* - * Quad layout: - * +--+--+ - * |p0|p1|... - * +--+--+ - * |p2|p3|... - * +--+--+ - */ - if (spu_extract(mask, 0)) - colorTile->ui[y+0][x+0] = fragc0; - if (spu_extract(mask, 1)) - colorTile->ui[y+0][x+1] = fragc1; - if (spu_extract(mask, 2)) - colorTile->ui[y+1][x+0] = fragc2; - if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = fragc3; -#endif -} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h deleted file mode 100644 index f817abf0463..00000000000 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ /dev/null @@ -1,44 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_PER_FRAGMENT_OP -#define SPU_PER_FRAGMENT_OP - - -extern void -spu_fallback_fragment_ops(uint x, uint y, - tile_t *colorTile, - tile_t *depthStencilTile, - vector float fragZ, - vector float fragRed, - vector float fragGreen, - vector float fragBlue, - vector float fragAlpha, - vector unsigned int mask); - - -#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c deleted file mode 100644 index 14987e3c3a2..00000000000 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ /dev/null @@ -1,356 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include -#include -#include - -#include "spu_main.h" -#include "spu_render.h" -#include "spu_shuffle.h" -#include "spu_tri.h" -#include "spu_tile.h" -#include "cell/common.h" -#include "util/u_memory.h" - - -/** - * Given a rendering command's bounding box (in pixels) compute the - * location of the corresponding screen tile bounding box. - */ -static INLINE void -tile_bounding_box(const struct cell_command_render *render, - uint *txmin, uint *tymin, - uint *box_num_tiles, uint *box_width_tiles) -{ -#if 0 - /* Debug: full-window bounding box */ - uint txmax = spu.fb.width_tiles - 1; - uint tymax = spu.fb.height_tiles - 1; - *txmin = 0; - *tymin = 0; - *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - *box_width_tiles = spu.fb.width_tiles; - (void) render; - (void) txmax; - (void) tymax; -#else - uint txmax, tymax, box_height_tiles; - - *txmin = (uint) render->xmin / TILE_SIZE; - *tymin = (uint) render->ymin / TILE_SIZE; - txmax = (uint) render->xmax / TILE_SIZE; - tymax = (uint) render->ymax / TILE_SIZE; - if (txmax >= spu.fb.width_tiles) - txmax = spu.fb.width_tiles-1; - if (tymax >= spu.fb.height_tiles) - tymax = spu.fb.height_tiles-1; - *box_width_tiles = txmax - *txmin + 1; - box_height_tiles = tymax - *tymin + 1; - *box_num_tiles = *box_width_tiles * box_height_tiles; -#endif -#if 0 - printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, - render->xmin, render->ymin, render->xmax, render->ymax); - printf("SPU %u: tiles: %u, %u .. %u, %u\n", - spu.init.id, *txmin, *tymin, txmax, tymax); - ASSERT(render->xmin <= render->xmax); - ASSERT(render->ymin <= render->ymax); -#endif -} - - -/** Check if the tile at (tx,ty) belongs to this SPU */ -static INLINE boolean -my_tile(uint tx, uint ty) -{ - return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; -} - - -/** - * Start fetching non-clear color/Z tiles from main memory - */ -static INLINE void -get_cz_tiles(uint tx, uint ty) -{ - if (spu.read_depth_stencil) { - if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_GETTING; - } - } - - if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { - //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); - get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_GETTING; - } -} - - -/** - * Start putting dirty color/Z tiles back to main memory - */ -static INLINE void -put_cz_tiles(uint tx, uint ty) -{ - if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); - spu.cur_ztile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ztile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); - } - - if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { - /* tile was modified and needs to be written back */ - //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); - put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); - spu.cur_ctile_status = TILE_STATUS_DEFINED; - } - else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* tile was never used */ - spu.cur_ctile_status = TILE_STATUS_DEFINED; - //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); - } -} - - -/** - * Wait for 'put' of color/z tiles to complete. - */ -static INLINE void -wait_put_cz_tiles(void) -{ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.read_depth_stencil) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } -} - - -/** - * Render primitives - * \param pos_incr returns value indicating how may words to skip after - * this command in the batch buffer - */ -void -cmd_render(const struct cell_command_render *render, uint *pos_incr) -{ - /* we'll DMA into these buffers */ - PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE]; - const uint vertex_size = render->vertex_size; /* in bytes */ - /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; - uint index_bytes; - const ubyte *vertices; - const ushort *indexes; - uint i, j; - uint num_tiles; - - D_PRINTF(CELL_DEBUG_CMD, - "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - ASSERT(sizeof(*render) % 4 == 0); - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); - - - /* indexes are right after the render command in the batch buffer */ - indexes = (const ushort *) (render + 1); - index_bytes = ROUNDUP8(render->num_indexes * 2); - *pos_incr = index_bytes / 8 + sizeof(*render) / 8; - - - if (render->inline_verts) { - /* Vertices are after indexes in batch buffer at next 16-byte addr */ - vertices = (const ubyte *) render + (*pos_incr * 8); - vertices = (const ubyte *) align_pointer((void *) vertices, 16); - ASSERT_ALIGN16(vertices); - *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; - } - else { - /* Begin DMA fetch of vertex buffer */ - ubyte *src = spu.init.buffers[render->vertex_buf]; - ubyte *dest = vertex_data; - - /* skip vertex data we won't use */ -#if 01 - src += render->min_index * vertex_size; - dest += render->min_index * vertex_size; - total_vertex_bytes -= render->min_index * vertex_size; -#endif - ASSERT(total_vertex_bytes % 16 == 0); - ASSERT_ALIGN16(dest); - ASSERT_ALIGN16(src); - - mfc_get(dest, /* in vertex_data[] array */ - (unsigned int) src, /* src in main memory */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - - vertices = vertex_data; - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - } - - - /** - ** find tiles which intersect the prim bounding box - **/ - uint txmin, tymin, box_width_tiles, box_num_tiles; - tile_bounding_box(render, &txmin, &tymin, - &box_num_tiles, &box_width_tiles); - - - /* make sure any pending clears have completed */ - wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ - - - num_tiles = 0; - - /** - ** loop over tiles, rendering tris - **/ - for (i = 0; i < box_num_tiles; i++) { - const uint tx = txmin + i % box_width_tiles; - const uint ty = tymin + i / box_width_tiles; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - - if (!my_tile(tx, ty)) - continue; - - num_tiles++; - - spu.cur_ctile_status = spu.ctile_status[ty][tx]; - spu.cur_ztile_status = spu.ztile_status[ty][tx]; - - get_cz_tiles(tx, ty); - - uint drawn = 0; - - const qword vertex_sizes = (qword)spu_splats(vertex_size); - const qword verticess = (qword)spu_splats((uint)vertices); - - ASSERT_ALIGN16(&indexes[0]); - - const uint num_indexes = render->num_indexes; - - /* loop over tris - * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled - * avoiding variable rotates when extracting vertex indices. - */ - for (j = 0; j < num_indexes; j += 24) { - /* Load three vectors, containing 24 ushort indices */ - const qword* lower_qword = (qword*)&indexes[j]; - const qword indices0 = lower_qword[0]; - const qword indices1 = lower_qword[1]; - const qword indices2 = lower_qword[2]; - - /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */ - /* Straightforward rotates for these */ - qword vs0 = indices0; - qword vs1 = si_shlqbyi(indices0, 6); - qword vs3 = si_shlqbyi(indices1, 2); - qword vs4 = si_shlqbyi(indices1, 8); - qword vs6 = si_shlqbyi(indices2, 4); - qword vs7 = si_shlqbyi(indices2, 10); - - /* For tri 2 and 5, the three indices are split across two machine - * words - rotate and combine */ - const qword tmp2a = si_shlqbyi(indices0, 12); - const qword tmp2b = si_rotqmbyi(indices1, 12|16); - qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20))); - - const qword tmp5a = si_shlqbyi(indices1, 14); - const qword tmp5b = si_rotqmbyi(indices2, 14|16); - qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60))); - - /* unpack indices from halfword slots to word slots */ - vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0)); - vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0)); - vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0)); - vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0)); - vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0)); - vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0)); - vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0)); - vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0)); - - /* Calculate address of vertex in vertices[] */ - vs0 = si_mpya(vs0, vertex_sizes, verticess); - vs1 = si_mpya(vs1, vertex_sizes, verticess); - vs2 = si_mpya(vs2, vertex_sizes, verticess); - vs3 = si_mpya(vs3, vertex_sizes, verticess); - vs4 = si_mpya(vs4, vertex_sizes, verticess); - vs5 = si_mpya(vs5, vertex_sizes, verticess); - vs6 = si_mpya(vs6, vertex_sizes, verticess); - vs7 = si_mpya(vs7, vertex_sizes, verticess); - - /* Select the appropriate call based on the number of vertices - * remaining */ - switch(num_indexes - j) { - default: drawn += tri_draw(vs7, tx, ty); - case 21: drawn += tri_draw(vs6, tx, ty); - case 18: drawn += tri_draw(vs5, tx, ty); - case 15: drawn += tri_draw(vs4, tx, ty); - case 12: drawn += tri_draw(vs3, tx, ty); - case 9: drawn += tri_draw(vs2, tx, ty); - case 6: drawn += tri_draw(vs1, tx, ty); - case 3: drawn += tri_draw(vs0, tx, ty); - } - } - - //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); - - /* write color/z tiles back to main framebuffer, if dirtied */ - put_cz_tiles(tx, ty); - - wait_put_cz_tiles(); /* XXX seems unnecessary... */ - - spu.ctile_status[ty][tx] = spu.cur_ctile_status; - spu.ztile_status[ty][tx] = spu.cur_ztile_status; - } - - D_PRINTF(CELL_DEBUG_CMD, - "RENDER done (%u tiles hit)\n", - num_tiles); -} diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h deleted file mode 100644 index 493434f0878..00000000000 --- a/src/gallium/drivers/cell/spu/spu_render.h +++ /dev/null @@ -1,38 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_RENDER_H -#define SPU_RENDER_H - -#include "cell/common.h" - -extern void -cmd_render(const struct cell_command_render *render, uint *pos_incr); - -#endif /* SPU_RENDER_H */ - diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h deleted file mode 100644 index 74f2a0b6d2e..00000000000 --- a/src/gallium/drivers/cell/spu/spu_shuffle.h +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef SPU_SHUFFLE_H -#define SPU_SHUFFLE_H - -/* - * Generate shuffle patterns with minimal fuss. - * - * Based on ideas from - * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf - * - * A-P indicates 0-15th position in first vector - * a-p indicates 0-15th position in second vector - * - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * | A| B| C| D| - * +-----+-----+-----+-----+-----+-----+-----+-----+ - * | A| B| C| D| E| F| G| H| - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| - * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ - * - * x or X indicates 0xff - * 8 indicates 0x80 - * 0 indicates 0x00 - * - * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector - * unsigned char literal suitable for use with spu_shuffle(). - * - * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector - * literal suitable for use with si_shufb(). - * - * - * For example : - * SHUFB4(A,A,A,A) - * expands to : - * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) - * - * SHUFFLE8(A,B,a,b,C,c,8,8) - * expands to : - * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, - * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) - * - */ - -#include - -#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03 -#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07 -#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b -#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f -#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13 -#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17 -#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b -#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f -#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0 -#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0 -#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80 -#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0 - -#define SHUFFLE_VECTOR_4__(A, B, C, D) \ - SHUFFLE_PATTERN_4_##A##__, \ - SHUFFLE_PATTERN_4_##B##__, \ - SHUFFLE_PATTERN_4_##C##__, \ - SHUFFLE_PATTERN_4_##D##__ - -#define SHUFFLE4(A, B, C, D) \ - ((const vector unsigned char){ \ - SHUFFLE_VECTOR_4__(A, B, C, D) \ - }) - -#define SHUFB4(A, B, C, D) \ - ((const qword){ \ - SHUFFLE_VECTOR_4__(A, B, C, D) \ - }) - - -#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01 -#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03 -#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05 -#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07 -#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09 -#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b -#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d -#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f -#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11 -#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13 -#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15 -#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17 -#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19 -#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b -#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d -#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f -#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0 -#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0 -#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80 -#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0 - - -#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ - SHUFFLE_PATTERN_8_##A##__, \ - SHUFFLE_PATTERN_8_##B##__, \ - SHUFFLE_PATTERN_8_##C##__, \ - SHUFFLE_PATTERN_8_##D##__, \ - SHUFFLE_PATTERN_8_##E##__, \ - SHUFFLE_PATTERN_8_##F##__, \ - SHUFFLE_PATTERN_8_##G##__, \ - SHUFFLE_PATTERN_8_##H##__ - -#define SHUFFLE8(A, B, C, D, E, F, G, H) \ - ((const vector unsigned char){ \ - SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ - }) - -#define SHUFB8(A, B, C, D, E, F, G, H) \ - ((const qword){ \ - SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ - }) - - -#define SHUFFLE_PATTERN_16_A__ 0x00 -#define SHUFFLE_PATTERN_16_B__ 0x01 -#define SHUFFLE_PATTERN_16_C__ 0x02 -#define SHUFFLE_PATTERN_16_D__ 0x03 -#define SHUFFLE_PATTERN_16_E__ 0x04 -#define SHUFFLE_PATTERN_16_F__ 0x05 -#define SHUFFLE_PATTERN_16_G__ 0x06 -#define SHUFFLE_PATTERN_16_H__ 0x07 -#define SHUFFLE_PATTERN_16_I__ 0x08 -#define SHUFFLE_PATTERN_16_J__ 0x09 -#define SHUFFLE_PATTERN_16_K__ 0x0a -#define SHUFFLE_PATTERN_16_L__ 0x0b -#define SHUFFLE_PATTERN_16_M__ 0x0c -#define SHUFFLE_PATTERN_16_N__ 0x0d -#define SHUFFLE_PATTERN_16_O__ 0x0e -#define SHUFFLE_PATTERN_16_P__ 0x0f -#define SHUFFLE_PATTERN_16_a__ 0x10 -#define SHUFFLE_PATTERN_16_b__ 0x11 -#define SHUFFLE_PATTERN_16_c__ 0x12 -#define SHUFFLE_PATTERN_16_d__ 0x13 -#define SHUFFLE_PATTERN_16_e__ 0x14 -#define SHUFFLE_PATTERN_16_f__ 0x15 -#define SHUFFLE_PATTERN_16_g__ 0x16 -#define SHUFFLE_PATTERN_16_h__ 0x17 -#define SHUFFLE_PATTERN_16_i__ 0x18 -#define SHUFFLE_PATTERN_16_j__ 0x19 -#define SHUFFLE_PATTERN_16_k__ 0x1a -#define SHUFFLE_PATTERN_16_l__ 0x1b -#define SHUFFLE_PATTERN_16_m__ 0x1c -#define SHUFFLE_PATTERN_16_n__ 0x1d -#define SHUFFLE_PATTERN_16_o__ 0x1e -#define SHUFFLE_PATTERN_16_p__ 0x1f -#define SHUFFLE_PATTERN_16_X__ 0xc0 -#define SHUFFLE_PATTERN_16_x__ 0xc0 -#define SHUFFLE_PATTERN_16_0__ 0x80 -#define SHUFFLE_PATTERN_16_8__ 0xe0 - -#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - SHUFFLE_PATTERN_16_##A##__, \ - SHUFFLE_PATTERN_16_##B##__, \ - SHUFFLE_PATTERN_16_##C##__, \ - SHUFFLE_PATTERN_16_##D##__, \ - SHUFFLE_PATTERN_16_##E##__, \ - SHUFFLE_PATTERN_16_##F##__, \ - SHUFFLE_PATTERN_16_##G##__, \ - SHUFFLE_PATTERN_16_##H##__, \ - SHUFFLE_PATTERN_16_##I##__, \ - SHUFFLE_PATTERN_16_##J##__, \ - SHUFFLE_PATTERN_16_##K##__, \ - SHUFFLE_PATTERN_16_##L##__, \ - SHUFFLE_PATTERN_16_##M##__, \ - SHUFFLE_PATTERN_16_##N##__, \ - SHUFFLE_PATTERN_16_##O##__, \ - SHUFFLE_PATTERN_16_##P##__ - -#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - ((const vector unsigned char){ \ - SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - }) - -#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - ((const qword){ \ - SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ - }) - -#endif diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c deleted file mode 100644 index 69784c89788..00000000000 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ /dev/null @@ -1,641 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include - -#include "pipe/p_compiler.h" -#include "spu_main.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_colorpack.h" -#include "spu_dcache.h" - - -/** - * Mark all tex cache entries as invalid. - */ -void -invalidate_tex_cache(void) -{ - uint lvl; - for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { - uint unit = 0; - uint bytes = 4 * spu.texture[unit].level[lvl].width - * spu.texture[unit].level[lvl].height; - - if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) - bytes *= 6; - else if (spu.texture[unit].target == PIPE_TEXTURE_3D) - bytes *= spu.texture[unit].level[lvl].depth; - - spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); - } -} - - -/** - * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... - * - * NOTE: in the typical case of bilinear filtering, the four texels - * are in a 2x2 group so we could get by with just two dcache fetches - * (two side-by-side texels per fetch). But when bilinear filtering - * wraps around a texture edge, we'll probably need code like we have - * now. - * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, - * it's quite likely that the four pixels in a quad will need some of the - * same texels. So look into doing texture fetches for four pixels at - * a time. - */ -static void -get_four_texels(const struct spu_texture_level *tlevel, uint face, - vec_int4 x, vec_int4 y, - vec_uint4 *texels) -{ - unsigned texture_ea = (uintptr_t) tlevel->start; - const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ - const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ - const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ - const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ - - const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); - const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); - - qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); - tile_offset = si_mpy((qword) tile_offset, tile_size); - - qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); - texel_offset = si_mpyui(texel_offset, 4); - - vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); - - texture_ea = texture_ea + face * tlevel->bytes_per_image; - - spu_dcache_fetch_unaligned((qword *) & texels[0], - texture_ea + spu_extract(offset, 0), 4); - spu_dcache_fetch_unaligned((qword *) & texels[1], - texture_ea + spu_extract(offset, 1), 4); - spu_dcache_fetch_unaligned((qword *) & texels[2], - texture_ea + spu_extract(offset, 2), 4); - spu_dcache_fetch_unaligned((qword *) & texels[3], - texture_ea + spu_extract(offset, 3), 4); -} - - -/** clamp vec to [0, max] */ -static INLINE vector signed int -spu_clamp(vector signed int vec, vector signed int max) -{ - static const vector signed int zero = {0,0,0,0}; - vector unsigned int c; - c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ - vec = spu_sel(zero, vec, c); - c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ - vec = spu_sel(vec, max, c); - return vec; -} - - - -/** - * Do nearest texture sampling for four pixels. - * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). - */ -void -sample_texture_2d_nearest(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]) -{ - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - vector float ss = spu_mul(s, tlevel->scale_s); - vector float tt = spu_mul(t, tlevel->scale_t); - vector signed int is = spu_convts(ss, 0); - vector signed int it = spu_convts(tt, 0); - vec_uint4 texels[4]; - - /* PIPE_TEX_WRAP_REPEAT */ - is = spu_and(is, tlevel->mask_s); - it = spu_and(it, tlevel->mask_t); - - /* PIPE_TEX_WRAP_CLAMP */ - is = spu_clamp(is, tlevel->max_s); - it = spu_clamp(it, tlevel->max_t); - - get_four_texels(tlevel, face, is, it, texels); - - /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ - spu_unpack_A8R8G8B8_transpose4(texels, colors); -} - - -/** - * Do bilinear texture sampling for four pixels. - * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). - */ -void -sample_texture_2d_bilinear(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]) -{ - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; - - vector float ss = spu_madd(s, tlevel->scale_s, half); - vector float tt = spu_madd(t, tlevel->scale_t, half); - - vector signed int is0 = spu_convts(ss, 0); - vector signed int it0 = spu_convts(tt, 0); - - /* is + 1, it + 1 */ - vector signed int is1 = spu_add(is0, 1); - vector signed int it1 = spu_add(it0, 1); - - /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, tlevel->mask_s); - it0 = spu_and(it0, tlevel->mask_t); - is1 = spu_and(is1, tlevel->mask_s); - it1 = spu_and(it1, tlevel->mask_t); - - /* PIPE_TEX_WRAP_CLAMP */ - is0 = spu_clamp(is0, tlevel->max_s); - it0 = spu_clamp(it0, tlevel->max_t); - is1 = spu_clamp(is1, tlevel->max_s); - it1 = spu_clamp(it1, tlevel->max_t); - - /* get packed int texels */ - vector unsigned int texels[16]; - get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ - get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ - get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ - get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ - - /* convert packed int texels to float colors */ - vector float ftexels[16]; - spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); - spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); - spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); - spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); - - /* Compute weighting factors in [0,1] - * Multiply texcoord by 1024, AND with 1023, convert back to float. - */ - vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); - vector signed int iss1024 = spu_convts(ss1024, 0); - iss1024 = spu_and(iss1024, 1023); - vector float sWeights0 = spu_convtf(iss1024, 10); - - vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); - vector signed int itt1024 = spu_convts(tt1024, 0); - itt1024 = spu_and(itt1024, 1023); - vector float tWeights0 = spu_convtf(itt1024, 10); - - /* 1 - sWeight and 1 - tWeight */ - vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); - vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); - - /* reds, for four pixels */ - ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), - spu_add(ftexels[8], ftexels[12])); - - /* greens, for four pixels */ - ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), - spu_add(ftexels[9], ftexels[13])); - - /* blues, for four pixels */ - ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), - spu_add(ftexels[10], ftexels[14])); - - /* alphas, for four pixels */ - ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ - ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ - ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ - ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ - colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), - spu_add(ftexels[11], ftexels[15])); -} - - - -/** - * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h - */ -static INLINE void -transpose(vector unsigned int *mOut0, - vector unsigned int *mOut1, - vector unsigned int *mOut2, - vector unsigned int *mOut3, - vector unsigned int *mIn) -{ - vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ - vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ - vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ - - vector unsigned char shufflehi = ((vector unsigned char) { - 0x00, 0x01, 0x02, 0x03, - 0x10, 0x11, 0x12, 0x13, - 0x04, 0x05, 0x06, 0x07, - 0x14, 0x15, 0x16, 0x17}); - vector unsigned char shufflelo = ((vector unsigned char) { - 0x08, 0x09, 0x0A, 0x0B, - 0x18, 0x19, 0x1A, 0x1B, - 0x0C, 0x0D, 0x0E, 0x0F, - 0x1C, 0x1D, 0x1E, 0x1F}); - abcd = *(mIn+0); - efgh = *(mIn+1); - ijkl = *(mIn+2); - mnop = *(mIn+3); - - aibj = spu_shuffle(abcd, ijkl, shufflehi); - ckdl = spu_shuffle(abcd, ijkl, shufflelo); - emfn = spu_shuffle(efgh, mnop, shufflehi); - gohp = spu_shuffle(efgh, mnop, shufflelo); - - aeim = spu_shuffle(aibj, emfn, shufflehi); - bfjn = spu_shuffle(aibj, emfn, shufflelo); - cgko = spu_shuffle(ckdl, gohp, shufflehi); - dhlp = spu_shuffle(ckdl, gohp, shufflelo); - - *mOut0 = aeim; - *mOut1 = bfjn; - *mOut2 = cgko; - *mOut3 = dhlp; -} - - -/** - * Bilinear filtering, using int instead of float arithmetic for computing - * sample weights. - */ -void -sample_texture_2d_bilinear_int(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]) -{ - const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; - static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; - - /* Scale texcoords by size of texture, and add half pixel bias */ - vector float ss = spu_madd(s, tlevel->scale_s, half); - vector float tt = spu_madd(t, tlevel->scale_t, half); - - /* convert float coords to fixed-pt coords with 7 fraction bits */ - vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ - vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ - - /* compute integer texel weights in [0, 127] */ - vector signed int sWeights0 = spu_and(is, 127); - vector signed int tWeights0 = spu_and(it, 127); - vector signed int sWeights1 = spu_sub(127, sWeights0); - vector signed int tWeights1 = spu_sub(127, tWeights0); - - /* texel coords: is0 = is / 128, it0 = is / 128 */ - vector signed int is0 = spu_rlmask(is, -7); - vector signed int it0 = spu_rlmask(it, -7); - - /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ - vector signed int is1 = spu_add(is0, 1); - vector signed int it1 = spu_add(it0, 1); - - /* PIPE_TEX_WRAP_REPEAT */ - is0 = spu_and(is0, tlevel->mask_s); - it0 = spu_and(it0, tlevel->mask_t); - is1 = spu_and(is1, tlevel->mask_s); - it1 = spu_and(it1, tlevel->mask_t); - - /* PIPE_TEX_WRAP_CLAMP */ - is0 = spu_clamp(is0, tlevel->max_s); - it0 = spu_clamp(it0, tlevel->max_t); - is1 = spu_clamp(is1, tlevel->max_s); - it1 = spu_clamp(it1, tlevel->max_t); - - /* get packed int texels */ - vector unsigned int texels[16]; - get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ - get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ - get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ - get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ - - /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ - { - static const unsigned char ZERO = 0x80; - int i; - for (i = 0; i < 16; i++) { - texels[i] = spu_shuffle(texels[i], texels[i], - ((vector unsigned char) { - ZERO, ZERO, ZERO, 1, - ZERO, ZERO, ZERO, 2, - ZERO, ZERO, ZERO, 3, - ZERO, ZERO, ZERO, 0})); - } - } - - /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ - vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, - texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; - transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); - transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); - transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); - transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); - - /* computed weighted colors */ - vector unsigned int c0, c1, c2, c3, cSum; - - /* red */ - c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[0] = spu_convtf(cSum, 22); - - /* green */ - c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[1] = spu_convtf(cSum, 22); - - /* blue */ - c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[2] = spu_convtf(cSum, 22); - - /* alpha */ - c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ - c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ - c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ - c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ - cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); - colors[3] = spu_convtf(cSum, 22); -} - - - -/** - * Compute level of detail factor from texcoords. - */ -static INLINE float -compute_lambda_2d(uint unit, vector float s, vector float t) -{ - uint baseLevel = 0; - float width = spu.texture[unit].level[baseLevel].width; - float height = spu.texture[unit].level[baseLevel].width; - float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); - float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); - float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); - float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); -#if 0 - /* ideal value */ - float x = dsdx * dsdx + dtdx * dtdx; - float y = dsdy * dsdy + dtdy * dtdy; - float rho = x > y ? x : y; - rho = sqrtf(rho); -#else - /* approximation */ - dsdx = fabsf(dsdx); - dsdy = fabsf(dsdy); - dtdx = fabsf(dtdx); - dtdy = fabsf(dtdy); - float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; -#endif - float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ - return lambda; -} - - -/** - * Blend two sets of colors according to weight. - */ -static void -blend_colors(vector float c0[4], const vector float c1[4], float weight) -{ - vector float t = spu_splats(weight); - vector float dc0 = spu_sub(c1[0], c0[0]); - vector float dc1 = spu_sub(c1[1], c0[1]); - vector float dc2 = spu_sub(c1[2], c0[2]); - vector float dc3 = spu_sub(c1[3], c0[3]); - c0[0] = spu_madd(dc0, t, c0[0]); - c0[1] = spu_madd(dc1, t, c0[1]); - c0[2] = spu_madd(dc2, t, c0[2]); - c0[3] = spu_madd(dc3, t, c0[3]); -} - - -/** - * Texture sampling with level of detail selection and possibly mipmap - * interpolation. - */ -void -sample_texture_2d_lod(vector float s, vector float t, - uint unit, uint level_ignored, uint face, - vector float colors[4]) -{ - /* - * Note that we're computing a lambda/lod here that's used for all - * four pixels in the quad. - */ - float lambda = compute_lambda_2d(unit, s, t); - - (void) face; - (void) level_ignored; - - /* apply lod bias */ - lambda += spu.sampler[unit].lod_bias; - - /* clamp */ - if (lambda < spu.sampler[unit].min_lod) - lambda = spu.sampler[unit].min_lod; - else if (lambda > spu.sampler[unit].max_lod) - lambda = spu.sampler[unit].max_lod; - - if (lambda <= 0.0f) { - /* magnify */ - spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); - } - else { - /* minify */ - if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { - /* sample two mipmap levels and interpolate */ - int level = (int) lambda; - if (level > (int) spu.texture[unit].max_level) - level = spu.texture[unit].max_level; - spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); - if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { - /* sample second mipmap level */ - float weight = lambda - (float) level; - level++; - if (level <= (int) spu.texture[unit].max_level) { - vector float colors2[4]; - spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); - blend_colors(colors, colors2, weight); - } - } - } - else { - /* sample one mipmap level */ - int level = (int) (lambda + 0.5f); - if (level > (int) spu.texture[unit].max_level) - level = spu.texture[unit].max_level; - spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); - } - } -} - - -/** XXX need a SIMD version of this */ -static unsigned -choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) -{ - /* - major axis - direction target sc tc ma - ---------- ------------------------------- --- --- --- - +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx - -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx - +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry - -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry - +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz - -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz - */ - const float arx = fabsf(rx); - const float ary = fabsf(ry); - const float arz = fabsf(rz); - unsigned face; - float sc, tc, ma; - - if (arx > ary && arx > arz) { - if (rx >= 0.0F) { - face = PIPE_TEX_FACE_POS_X; - sc = -rz; - tc = -ry; - ma = arx; - } - else { - face = PIPE_TEX_FACE_NEG_X; - sc = rz; - tc = -ry; - ma = arx; - } - } - else if (ary > arx && ary > arz) { - if (ry >= 0.0F) { - face = PIPE_TEX_FACE_POS_Y; - sc = rx; - tc = rz; - ma = ary; - } - else { - face = PIPE_TEX_FACE_NEG_Y; - sc = rx; - tc = -rz; - ma = ary; - } - } - else { - if (rz > 0.0F) { - face = PIPE_TEX_FACE_POS_Z; - sc = rx; - tc = -ry; - ma = arz; - } - else { - face = PIPE_TEX_FACE_NEG_Z; - sc = -rx; - tc = -ry; - ma = arz; - } - } - - *newS = (sc / ma + 1.0F) * 0.5F; - *newT = (tc / ma + 1.0F) * 0.5F; - - return face; -} - - - -void -sample_texture_cube(vector float s, vector float t, vector float r, - uint unit, vector float colors[4]) -{ - uint p, faces[4], level = 0; - float newS[4], newT[4]; - - /* Compute cube faces referenced by the four sets of texcoords. - * XXX we should SIMD-ize this. - */ - for (p = 0; p < 4; p++) { - float rx = spu_extract(s, p); - float ry = spu_extract(t, p); - float rz = spu_extract(r, p); - faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); - } - - if (faces[0] == faces[1] && - faces[0] == faces[2] && - faces[0] == faces[3]) { - /* GOOD! All four texcoords refer to the same cube face */ - s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; - t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; - spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); - } - else { - /* BAD! The four texcoords refer to different faces */ - for (p = 0; p < 4; p++) { - vector float c[4]; - - spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), - unit, level, faces[p], c); - - float red = spu_extract(c[0], p); - float green = spu_extract(c[1], p); - float blue = spu_extract(c[2], p); - float alpha = spu_extract(c[3], p); - - colors[0] = spu_insert(red, colors[0], p); - colors[1] = spu_insert(green, colors[1], p); - colors[2] = spu_insert(blue, colors[2], p); - colors[3] = spu_insert(alpha, colors[3], p); - } - } -} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h deleted file mode 100644 index 7b75b007b5a..00000000000 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ /dev/null @@ -1,67 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TEXTURE_H -#define SPU_TEXTURE_H - - -#include "pipe/p_compiler.h" - - -extern void -invalidate_tex_cache(void); - - -extern void -sample_texture_2d_nearest(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -extern void -sample_texture_2d_bilinear(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - -extern void -sample_texture_2d_bilinear_int(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -extern void -sample_texture_2d_lod(vector float s, vector float t, - uint unit, uint level, uint face, - vector float colors[4]); - - -extern void -sample_texture_cube(vector float s, vector float t, vector float r, - uint unit, vector float colors[4]); - - -#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h b/src/gallium/drivers/cell/spu/spu_tgsi_exec.h deleted file mode 100644 index 6f2a3d30b91..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tgsi_exec.h +++ /dev/null @@ -1,158 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * Copyright 2009-2010 VMware, Inc. All rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TGSI_EXEC_H -#define SPU_TGSI_EXEC_H - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" - -#if defined __cplusplus -extern "C" { -#endif - - -#define NUM_CHANNELS 4 /* R,G,B,A */ -#define QUAD_SIZE 4 /* 4 pixel/quad */ - - - -#define TGSI_EXEC_NUM_TEMPS 128 -#define TGSI_EXEC_NUM_IMMEDIATES 256 - -/* - * Locations of various utility registers (_I = Index, _C = Channel) - */ -#define TGSI_EXEC_TEMP_00000000_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_00000000_CHAN 0 - -#define TGSI_EXEC_TEMP_7FFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_7FFFFFFF_CHAN 1 - -#define TGSI_EXEC_TEMP_80000000_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_80000000_CHAN 2 - -#define TGSI_EXEC_TEMP_FFFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_FFFFFFFF_CHAN 3 - -#define TGSI_EXEC_TEMP_ONE_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_ONE_CHAN 0 - -#define TGSI_EXEC_TEMP_TWO_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_TWO_CHAN 1 - -#define TGSI_EXEC_TEMP_128_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_128_CHAN 2 - -#define TGSI_EXEC_TEMP_MINUS_128_IDX (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_MINUS_128_CHAN 3 - -#define TGSI_EXEC_TEMP_KILMASK_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_KILMASK_CHAN 0 - -#define TGSI_EXEC_TEMP_OUTPUT_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_OUTPUT_CHAN 1 - -#define TGSI_EXEC_TEMP_PRIMITIVE_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_PRIMITIVE_CHAN 2 - -/* NVIDIA condition code (CC) vector - */ -#define TGSI_EXEC_CC_GT 0x01 -#define TGSI_EXEC_CC_EQ 0x02 -#define TGSI_EXEC_CC_LT 0x04 -#define TGSI_EXEC_CC_UN 0x08 - -#define TGSI_EXEC_CC_X_MASK 0x000000ff -#define TGSI_EXEC_CC_X_SHIFT 0 -#define TGSI_EXEC_CC_Y_MASK 0x0000ff00 -#define TGSI_EXEC_CC_Y_SHIFT 8 -#define TGSI_EXEC_CC_Z_MASK 0x00ff0000 -#define TGSI_EXEC_CC_Z_SHIFT 16 -#define TGSI_EXEC_CC_W_MASK 0xff000000 -#define TGSI_EXEC_CC_W_SHIFT 24 - -#define TGSI_EXEC_TEMP_CC_IDX (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_CC_CHAN 3 - -#define TGSI_EXEC_TEMP_THREE_IDX (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_THREE_CHAN 0 - -#define TGSI_EXEC_TEMP_HALF_IDX (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_HALF_CHAN 1 - -/* execution mask, each value is either 0 or ~0 */ -#define TGSI_EXEC_MASK_IDX (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_MASK_CHAN 2 - -/* 4 register buffer for various purposes */ -#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) -#define TGSI_EXEC_NUM_TEMP_R 4 - -#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8) -#define TGSI_EXEC_NUM_ADDRS 1 - -/* predicate register */ -#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9) -#define TGSI_EXEC_NUM_PREDS 1 - -#define TGSI_EXEC_NUM_TEMP_EXTRAS 10 - - - -#define TGSI_EXEC_MAX_NESTING 32 -#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING -#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING -#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING -#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING - -/* The maximum number of input attributes per vertex. For 2D - * input register files, this is the stride between two 1D - * arrays. - */ -#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 - -/* The maximum number of constant vectors per constant buffer. - */ -#define TGSI_EXEC_MAX_CONST_BUFFER 4096 - -/* The maximum number of vertices per primitive */ -#define TGSI_MAX_PRIM_VERTICES 6 - -/* The maximum number of primitives to be generated */ -#define TGSI_MAX_PRIMITIVES 64 - -/* The maximum total number of vertices */ -#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS) - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c deleted file mode 100644 index 6905015a483..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ /dev/null @@ -1,126 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - - -#include "spu_tile.h" -#include "spu_main.h" - - -/** - * Get tile of color or Z values from main memory, put into SPU memory. - */ -void -get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) -{ - const uint offset = ty * spu.fb.width_tiles + tx; - const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); - const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; - - src += offset * bytesPerTile; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - ASSERT_ALIGN16(tile); - /* - printf("get_tile: dest: %p src: 0x%x size: %d\n", - tile, (unsigned int) src, bytesPerTile); - */ - mfc_get(tile->ui, /* dest in local memory */ - (unsigned int) src, /* src in main memory */ - bytesPerTile, - tag, - 0, /* tid */ - 0 /* rid */); -} - - -/** - * Move tile of color or Z values from SPU memory to main memory. - */ -void -put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) -{ - const uint offset = ty * spu.fb.width_tiles + tx; - const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); - ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; - - dst += offset * bytesPerTile; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - ASSERT_ALIGN16(tile); - /* - printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", - spu.init.id, - tile, (unsigned int) dst, bytesPerTile); - */ - mfc_put((void *) tile->ui, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - bytesPerTile, - tag, - 0, /* tid */ - 0 /* rid */); -} - - -/** - * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled - * tiles back to the main framebuffer. - */ -void -really_clear_tiles(uint surfaceIndex) -{ - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - - if (surfaceIndex == 0) { - clear_c_tile(&spu.ctile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - } - } - } - else { - clear_z_tile(&spu.ztile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); - } - } - -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif -} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h deleted file mode 100644 index 7bfb52be8f3..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ /dev/null @@ -1,75 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef SPU_TILE_H -#define SPU_TILE_H - - -#include -#include -#include "spu_main.h" -#include "cell/common.h" - - - -extern void -get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); - -extern void -put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); - -extern void -really_clear_tiles(uint surfaceIndex); - - -static INLINE void -clear_c_tile(tile_t *ctile) -{ - memset32((uint*) ctile->ui, - spu.fb.color_clear_value, - TILE_SIZE * TILE_SIZE); -} - - -static INLINE void -clear_z_tile(tile_t *ztile) -{ - if (spu.fb.zsize == 2) { - memset16((ushort*) ztile->us, - spu.fb.depth_clear_value, - TILE_SIZE * TILE_SIZE); - } - else { - ASSERT(spu.fb.zsize != 0); - memset32((uint*) ztile->ui, - spu.fb.depth_clear_value, - TILE_SIZE * TILE_SIZE); - } -} - - -#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c deleted file mode 100644 index efeebca27bb..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ /dev/null @@ -1,843 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Triangle rendering within a tile. - */ - -#include "pipe/p_compiler.h" -#include "pipe/p_format.h" -#include "util/u_math.h" -#include "spu_colorpack.h" -#include "spu_main.h" -#include "spu_shuffle.h" -#include "spu_texture.h" -#include "spu_tile.h" -#include "spu_tri.h" - - -/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ -typedef vector unsigned int mask_t; - - - -/** - * Simplified types taken from other parts of Gallium - */ -struct vertex_header { - vector float data[1]; -}; - - - -/* XXX fix this */ -#undef CEILF -#define CEILF(X) ((float) (int) ((X) + 0.99999f)) - - -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -#define CHAN0 0 -#define CHAN1 1 -#define CHAN2 2 -#define CHAN3 3 - - -#define DEBUG_VERTS 0 - -/** - * Triangle edge info - */ -struct edge { - union { - struct { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - }; - vec_float4 ds; /**< vector accessor for dx and dy */ - }; - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - - -struct interp_coef -{ - vector float a0; - vector float dadx; - vector float dady; -}; - - -/** - * Triangle setup info (derived from draw_stage). - * Also used for line drawing (taking some liberties). - */ -struct setup_stage { - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - union { - struct { - const struct vertex_header *vmin; - const struct vertex_header *vmid; - const struct vertex_header *vmax; - const struct vertex_header *vprovoke; - }; - qword vertex_headers; - }; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneOverArea; /* XXX maybe make into vector? */ - - uint facing; - - uint tx, ty; /**< position of current tile (x, y) */ - - union { - struct { - int cliprect_minx; - int cliprect_miny; - int cliprect_maxx; - int cliprect_maxy; - }; - qword cliprect; - }; - - struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - - struct { - vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ - int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ - } span; -}; - - -static struct setup_stage setup; - - -static INLINE vector float -splatx(vector float v) -{ - return spu_splats(spu_extract(v, CHAN0)); -} - -static INLINE vector float -splaty(vector float v) -{ - return spu_splats(spu_extract(v, CHAN1)); -} - -static INLINE vector float -splatz(vector float v) -{ - return spu_splats(spu_extract(v, CHAN2)); -} - -static INLINE vector float -splatw(vector float v) -{ - return spu_splats(spu_extract(v, CHAN3)); -} - - -/** - * Setup fragment shader inputs by evaluating triangle's vertex - * attribute coefficient info. - * \param x quad x pos - * \param y quad y pos - * \param fragZ returns quad Z values - * \param fragInputs returns fragment program inputs - * Note: this code could be incorporated into the fragment program - * itself to avoid the loop and switch. - */ -static void -eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) -{ - static const vector float deltaX = (const vector float) {0, 1, 0, 1}; - static const vector float deltaY = (const vector float) {0, 0, 1, 1}; - - const uint posSlot = 0; - const vector float pos = setup.coef[posSlot].a0; - const vector float dposdx = setup.coef[posSlot].dadx; - const vector float dposdy = setup.coef[posSlot].dady; - const vector float fragX = spu_splats(x) + deltaX; - const vector float fragY = spu_splats(y) + deltaY; - vector float fragW, wInv; - uint i; - - *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy); - fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy); - wInv = spu_re(fragW); /* 1 / w */ - - /* loop over fragment program inputs */ - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - uint attr = i + 1; - enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode; - - /* constant term */ - vector float a0 = setup.coef[attr].a0; - vector float r0 = splatx(a0); - vector float r1 = splaty(a0); - vector float r2 = splatz(a0); - vector float r3 = splatw(a0); - - if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) { - /* linear term */ - vector float dadx = setup.coef[attr].dadx; - vector float dady = setup.coef[attr].dady; - /* Use SPU intrinsics here to get slightly better code. - * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); - */ - r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); - r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); - r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); - r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); - if (interp == INTERP_PERSPECTIVE) { - /* perspective term */ - r0 *= wInv; - r1 *= wInv; - r2 *= wInv; - r3 *= wInv; - } - } - fragInputs[CHAN0] = r0; - fragInputs[CHAN1] = r1; - fragInputs[CHAN2] = r2; - fragInputs[CHAN3] = r3; - fragInputs += 4; - } -} - - -/** - * Emit a quad (pass to next stage). No clipping is done. - * Note: about 1/5 to 1/7 of the time, mask is zero and this function - * should be skipped. But adding the test for that slows things down - * overall. - */ -static INLINE void -emit_quad( int x, int y, mask_t mask) -{ - /* If any bits in mask are set... */ - if (spu_extract(spu_orx(mask), 0)) { - const int ix = x - setup.cliprect_minx; - const int iy = y - setup.cliprect_miny; - - spu.cur_ctile_status = TILE_STATUS_DIRTY; - spu.cur_ztile_status = TILE_STATUS_DIRTY; - - { - /* - * Run fragment shader, execute per-fragment ops, update fb/tile. - */ - vector float inputs[4*4], outputs[2*4]; - vector unsigned int kill_mask; - vector float fragZ; - - eval_inputs((float) x, (float) y, &fragZ, inputs); - - ASSERT(spu.fragment_program); - ASSERT(spu.fragment_ops); - - /* Execute the current fragment program */ - kill_mask = spu.fragment_program(inputs, outputs, spu.constants); - - mask = spu_andc(mask, kill_mask); - - /* Execute per-fragment/quad operations, including: - * alpha test, z test, stencil test, blend and framebuffer writing. - * Note that there are two different fragment operations functions - * that can be called, one for front-facing fragments, and one - * for back-facing fragments. (Often the two are the same; - * but in some cases, like two-sided stenciling, they can be - * very different.) So choose the correct function depending - * on the calculated facing. - */ - spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, - fragZ, - outputs[0*4+0], - outputs[0*4+1], - outputs[0*4+2], - outputs[0*4+3], - mask); - } - } -} - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int -block(int x) -{ - return x & ~1; -} - - -/** - * Render a horizontal span of quads - */ -static void -flush_spans(void) -{ - int minleft, maxright; - - const int l0 = spu_extract(setup.span.quad, 0); - const int l1 = spu_extract(setup.span.quad, 1); - const int r0 = spu_extract(setup.span.quad, 2); - const int r1 = spu_extract(setup.span.quad, 3); - - switch (setup.span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = MIN2(l0, l1); - maxright = MAX2(r0, r1); - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = l0; - maxright = r0; - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = l1; - maxright = r1; - break; - - default: - return; - } - - /* OK, we're very likely to need the tile data now. - * clear or finish waiting if needed. - */ - if (spu.cur_ctile_status == TILE_STATUS_GETTING) { - /* wait for mfc_get() to complete */ - //printf("SPU: %u: waiting for ctile\n", spu.init.id); - wait_on_mask(1 << TAG_READ_TILE_COLOR); - spu.cur_ctile_status = TILE_STATUS_CLEAN; - } - else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { - //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); - clear_c_tile(&spu.ctile); - spu.cur_ctile_status = TILE_STATUS_DIRTY; - } - ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - - if (spu.read_depth_stencil) { - if (spu.cur_ztile_status == TILE_STATUS_GETTING) { - /* wait for mfc_get() to complete */ - //printf("SPU: %u: waiting for ztile\n", spu.init.id); - wait_on_mask(1 << TAG_READ_TILE_Z); - spu.cur_ztile_status = TILE_STATUS_CLEAN; - } - else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { - //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); - clear_z_tile(&spu.ztile); - spu.cur_ztile_status = TILE_STATUS_DIRTY; - } - ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); - } - - /* XXX this loop could be moved into the above switch cases... */ - - /* Setup for mask calculation */ - const vec_int4 quad_LlRr = setup.span.quad; - const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); - const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); - const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); - - const vec_int4 twos = spu_splats(2); - - const int x = block(minleft); - vec_int4 xs = {x, x+1, x, x+1}; - - for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { - /** - * Computes mask to indicate which pixels in the 2x2 quad are actually - * inside the triangle's bounds. - */ - - /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ - const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); - const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); - - /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ - const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); - - /* Combine results to create mask */ - const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); - - emit_quad(spu_extract(xs, 0), setup.span.y, mask); - } - - setup.span.y = 0; - setup.span.y_flags = 0; - /* Zero right elements */ - setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); -} - - -#if DEBUG_VERTS -static void -print_vertex(const struct vertex_header *v) -{ - uint i; - fprintf(stderr, " Vertex: (%p)\n", v); - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - spu_extract(v->data[i], 0), - spu_extract(v->data[i], 1), - spu_extract(v->data[i], 2), - spu_extract(v->data[i], 3)); - } -} -#endif - -/* Returns the minimum of each slot of two vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n]); - */ -static qword -minfq(qword q0, qword q1) -{ - const qword q0q1m = si_fcgt(q0, q1); - return si_selb(q0, q1, q0q1m); -} - -/* Returns the minimum of each slot of three vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n],q2[n]); - */ -static qword -min3fq(qword q0, qword q1, qword q2) -{ - return minfq(minfq(q0, q1), q2); -} - -/* Returns the maximum of each slot of two vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n],q2[n]); - */ -static qword -maxfq(qword q0, qword q1) { - const qword q0q1m = si_fcgt(q0, q1); - return si_selb(q1, q0, q0q1m); -} - -/* Returns the maximum of each slot of three vec_float4s as qwords. - * i.e. return[n] = min(q0[n],q1[n],q2[n]); - */ -static qword -max3fq(qword q0, qword q1, qword q2) { - return maxfq(maxfq(q0, q1), q2); -} - -/** - * Sort vertices from top to bottom. - * Compute area and determine front vs. back facing. - * Do coarse clip test against tile bounds - * \return FALSE if tri is totally outside tile, TRUE otherwise - */ -static boolean -setup_sort_vertices(const qword vs) -{ - float area, sign; - -#if DEBUG_VERTS - if (spu.init.id==0) { - fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); - } -#endif - - { - /* Load the float values for various processing... */ - const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]); - const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]); - const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]); - - /* Check if triangle is completely outside the tile bounds - * Find the min and max x and y positions of the three poits */ - const qword minf = min3fq(f0, f1, f2); - const qword maxf = max3fq(f0, f1, f2); - - /* Compare min and max against cliprect vals */ - const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b)); - const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0)); - - /* Use a little magic to work out of the tri is visible or not */ - if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE; - - /* determine bottom to top order of vertices */ - /* A table of shuffle patterns for putting vertex_header pointers into - correct order. Quite magical. */ - const qword sort_order_patterns[] = { - SHUFB4(A,B,C,C), - SHUFB4(C,A,B,C), - SHUFB4(A,C,B,C), - SHUFB4(B,C,A,C), - SHUFB4(B,A,C,C), - SHUFB4(C,B,A,C) }; - - /* Collate y values into two vectors for comparison. - Using only one shuffle constant! ;) */ - const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C)); - const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C)); - const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C)); - const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C)); - - /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ - const qword compare = si_fcgt(y_012, y_120); - /* Compress the result of the comparison into 4 bits */ - const qword gather = si_gb(compare); - /* Subtract one to attain the index into the LUT. Magical. */ - const unsigned int index = si_to_uint(gather) - 1; - - /* Load the appropriate pattern and construct the desired vector. */ - setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]); - - /* Using the result of the comparison, set sign. - Very magical. */ - sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f); - } - - setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); - setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); - setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - */ - area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; - - setup.oneOverArea = 1.0f / area; - - /* The product of area * sign indicates front/back orientation (0/1). - * Just in case someone gets the bright idea of switching the front - * and back constants without noticing that we're assuming their - * values in this operation, also assert that the values are - * what we think they are. - */ - ASSERT(CELL_FACING_FRONT == 0); - ASSERT(CELL_FACING_BACK == 1); - setup.facing = (area * sign > 0.0f) - ^ (!spu.rasterizer.front_ccw); - - return TRUE; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot]. - * The result will be put into setup.coef[slot].a0. - * \param slot which attribute slot - */ -static INLINE void -const_coeff4(uint slot) -{ - setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].a0 = setup.vprovoke->data[slot]; -} - - -/** - * As above, but interp setup all four vector components. - */ -static INLINE void -tri_linear_coeff4(uint slot) -{ - const vector float vmin_d = setup.vmin->data[slot]; - const vector float vmid_d = setup.vmid->data[slot]; - const vector float vmax_d = setup.vmax->data[slot]; - const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); - const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); - - vector float botda = vmid_d - vmin_d; - vector float majda = vmax_d - vmin_d; - - vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), - spu_mul(botda, spu_splats(setup.emaj.dy))); - vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), - spu_mul(majda, spu_splats(setup.ebot.dx))); - - setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); - setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - - vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - - setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void -tri_persp_coeff4(uint slot) -{ - const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); - const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); - - const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); - const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); - const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); - - vector float vmin_d = setup.vmin->data[slot]; - vector float vmid_d = setup.vmid->data[slot]; - vector float vmax_d = setup.vmax->data[slot]; - - vmin_d = spu_mul(vmin_d, vmin_w); - vmid_d = spu_mul(vmid_d, vmid_w); - vmax_d = spu_mul(vmax_d, vmax_w); - - vector float botda = vmid_d - vmin_d; - vector float majda = vmax_d - vmin_d; - - vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), - spu_mul(botda, spu_splats(setup.emaj.dy))); - vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), - spu_mul(majda, spu_splats(setup.ebot.dx))); - - setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); - setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - - vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - - setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); -} - - - -/** - * Compute the setup.coef[] array dadx, dady, a0 values. - * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. - */ -static void -setup_tri_coefficients(void) -{ - uint i; - - for (i = 0; i < spu.vertex_info.num_attribs; i++) { - switch (spu.vertex_info.attrib[i].interp_mode) { - case INTERP_NONE: - break; - case INTERP_CONSTANT: - const_coeff4(i); - break; - case INTERP_POS: - /* fall-through */ - case INTERP_LINEAR: - tri_linear_coeff4(i); - break; - case INTERP_PERSPECTIVE: - tri_persp_coeff4(i); - break; - default: - ASSERT(0); - } - } -} - - -static void -setup_tri_edges(void) -{ - float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; - float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; - - float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; - float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; - - setup.emaj.sy = CEILF(vmin_y); - setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); - setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; - setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; - - setup.etop.sy = CEILF(vmid_y); - setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); - setup.etop.dxdy = setup.etop.dx / setup.etop.dy; - setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; - - setup.ebot.sy = CEILF(vmin_y); - setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); - setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; - setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void -subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) -{ - const int minx = setup.cliprect_minx; - const int maxx = setup.cliprect_maxx; - const int miny = setup.cliprect_miny; - const int maxy = setup.cliprect_maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - ASSERT((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - finish_y = sy + lines; - - if (start_y < miny) - start_y = miny; - - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup.span.y) { - flush_spans(); - setup.span.y = block(_y); - } - - int offset = _y&1; - vec_int4 quad_LlRr = {left, left, right, right}; - /* Store left and right in 0 or 1 row of quad based on offset */ - setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Draw triangle into tile at (tx, ty) (tile coords) - * The tile data should have already been fetched. - */ -boolean -tri_draw(const qword vs, - uint tx, uint ty) -{ - setup.tx = tx; - setup.ty = ty; - - /* set clipping bounds to tile bounds */ - const qword clipbase = (qword)((vec_uint4){tx, ty}); - const qword clipmin = si_mpyui(clipbase, TILE_SIZE); - const qword clipmax = si_ai(clipmin, TILE_SIZE); - setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b)); - - if(!setup_sort_vertices(vs)) { - return FALSE; /* totally clipped */ - } - - setup_tri_coefficients(); - setup_tri_edges(); - - setup.span.y = 0; - setup.span.y_flags = 0; - /* Zero right elements */ - setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); - - if (setup.oneOverArea < 0.0) { - /* emaj on left */ - subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); - subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); - } - else { - /* emaj on right */ - subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); - subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); - } - - flush_spans(); - - return TRUE; -} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h deleted file mode 100644 index 82e3b19ad7e..00000000000 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#ifndef SPU_TRI_H -#define SPU_TRI_H - - -extern boolean -tri_draw(const qword vs, uint tx, uint ty); - - -#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c deleted file mode 100644 index 24057e29e36..00000000000 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ /dev/null @@ -1,77 +0,0 @@ - -#include "cell/common.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_debug.h" -#include "tgsi/tgsi_parse.h" -//#include "tgsi_build.h" -#include "tgsi/tgsi_util.h" - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->SwizzleX; - case 1: - return reg->SwizzleY; - case 2: - return reg->SwizzleZ; - case 3: - return reg->SwizzleW; - default: - ASSERT( 0 ); - } - return 0; -} - - -unsigned -tgsi_util_get_full_src_register_swizzle( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - return tgsi_util_get_src_register_swizzle( - reg->Register, - component ); -} - - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned sign_mode; - - if( reg->RegisterExtMod.Absolute ) { - /* Consider only the post-abs negation. */ - - if( reg->RegisterExtMod.Negate ) { - sign_mode = TGSI_UTIL_SIGN_SET; - } - else { - sign_mode = TGSI_UTIL_SIGN_CLEAR; - } - } - else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->Register.Negate; - if( reg->RegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { - sign_mode = TGSI_UTIL_SIGN_TOGGLE; - } - else { - sign_mode = TGSI_UTIL_SIGN_KEEP; - } - } - - return sign_mode; -} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c deleted file mode 100644 index 087963960df..00000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ /dev/null @@ -1,146 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - * Ian Romanick - */ - -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "spu_exec.h" -#include "spu_vertex_shader.h" -#include "spu_main.h" -#include "spu_dcache.h" - -typedef void (*spu_fetch_func)(qword *out, const qword *in, - const qword *shuffle_data); - - -PIPE_ALIGN_VAR(16) static const qword -fetch_shuffle_data[5] = { - /* Shuffle used by CVT_64_FLOAT - */ - { - 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - }, - - /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED - */ - { - 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, - 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, - }, - - /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED - */ - { - 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, - 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, - }, - - /* High value shuffle used by trans4x4. - */ - { - 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, - 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 - }, - - /* Low value shuffle used by trans4x4. - */ - { - 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, - 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F - } -}; - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void generic_vertex_fetch(struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count) -{ - unsigned nr_attrs = draw->vertex_fetch.nr_attrs; - unsigned attr; - - ASSERT(count <= 4); - -#if DRAW_DBG - printf("SPU: %s count = %u, nr_attrs = %u\n", - __FUNCTION__, count, nr_attrs); -#endif - - /* loop over vertex attributes (vertex shader inputs) - */ - for (attr = 0; attr < nr_attrs; attr++) { - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const uint64_t src = draw->vertex_fetch.src_ptr[attr]; - const spu_fetch_func fetch = (spu_fetch_func) - (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); - unsigned i; - unsigned idx; - const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; - const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; - PIPE_ALIGN_VAR(16) qword in[2 * 4]; - - - /* Fetch four attributes for four vertices. - */ - idx = 0; - for (i = 0; i < count; i++) { - const uint64_t addr = src + (elts[i] * pitch); - -#if DRAW_DBG - printf("SPU: fetching = 0x%llx\n", addr); -#endif - - spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); - idx += quads_per_entry; - } - - /* Be nice and zero out any missing vertices. - */ - (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); - - - /* Convert all 4 vertices to vectors of float. - */ - (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); - } -} - - -void spu_update_vertex_fetch( struct spu_vs_context *draw ) -{ - draw->vertex_fetch.fetch_func = generic_vertex_fetch; -} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c deleted file mode 100644 index d6febd36f41..00000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ /dev/null @@ -1,245 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - * Brian Paul - * Ian Romanick - */ - -#include - -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" -#include "util/u_math.h" -#include "draw/draw_private.h" -#include "draw/draw_context.h" -#include "cell/common.h" -#include "spu_vertex_shader.h" -#include "spu_exec.h" -#include "spu_main.h" - - -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) - - -#define CLIP_RIGHT_BIT 0x01 -#define CLIP_LEFT_BIT 0x02 -#define CLIP_TOP_BIT 0x04 -#define CLIP_BOTTOM_BIT 0x08 -#define CLIP_FAR_BIT 0x10 -#define CLIP_NEAR_BIT 0x20 - - -static INLINE float -dot4(const float *a, const float *b) -{ - return (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); -} - -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<machine; - unsigned int j; - - PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS]; - PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS]; - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - ASSERT(count <= 4); - - machine->Processor = TGSI_PROCESSOR_VERTEX; - - ASSERT_ALIGN16(draw->constants); - machine->Consts = (float (*)[4]) draw->constants; - - machine->Inputs = inputs; - machine->Outputs = outputs; - - spu_vertex_fetch( draw, machine, elts, count ); - - /* run shader */ - spu_exec_machine_run( machine ); - - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - PIPE_ALIGN_VAR(16) - unsigned char buffer[sizeof(struct vertex_header) - + MAX_VERTEX_SIZE]; - struct vertex_header *const tmpOut = - (struct vertex_header *) buffer; - const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) - + (sizeof(float) * 4 - * draw->num_vs_outputs)); - - mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, - draw->nr_planes); - tmpOut->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - tmpOut->data[0][0] = x * scale[0] + trans[0]; - tmpOut->data[0][1] = y * scale[1] + trans[1]; - tmpOut->data[0][2] = z * scale[2] + trans[2]; - tmpOut->data[0][3] = w; - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - - mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); - } /* loop over vertices */ -} - - -PIPE_ALIGN_VAR(16) unsigned char -immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]; - - -void -spu_bind_vertex_shader(struct spu_vs_context *draw, - struct cell_shader_info *vs) -{ - const unsigned immediate_addr = vs->immediates; - const unsigned immediate_size = - ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) - + (immediate_addr & 0x0f)); - - - mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, - TAG_VERTEX_BUFFER, 0, 0); - - draw->machine.Instructions = (struct tgsi_full_instruction *) - vs->instructions; - draw->machine.NumInstructions = vs->num_instructions; - - draw->machine.Declarations = (struct tgsi_full_declaration *) - vs->declarations; - draw->machine.NumDeclarations = vs->num_declarations; - - draw->num_vs_outputs = vs->num_outputs; - - /* specify the shader to interpret/execute */ - spu_exec_machine_init(&draw->machine, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/, - PIPE_SHADER_VERTEX); - - wait_on_mask(1 << TAG_VERTEX_BUFFER); - - (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], - sizeof(float) * 4 * vs->num_immediates); -} - - -void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs) -{ - unsigned i; - - (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); - draw->nr_planes = vs->nr_planes; - draw->vertex_fetch.nr_attrs = vs->nr_attrs; - - for (i = 0; i < vs->num_elts; i += 4) { - const unsigned batch_size = MIN2(vs->num_elts - i, 4); - - run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); - } -} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h deleted file mode 100644 index 4c74f5e74d5..00000000000 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef SPU_VERTEX_SHADER_H -#define SPU_VERTEX_SHADER_H - -#include "cell/common.h" -#include "pipe/p_format.h" -#include "spu_exec.h" - -struct spu_vs_context; - -typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count ); - -struct spu_vs_context { - struct pipe_viewport_state viewport; - - struct { - uint64_t src_ptr[PIPE_MAX_ATTRIBS]; - unsigned pitch[PIPE_MAX_ATTRIBS]; - unsigned size[PIPE_MAX_ATTRIBS]; - unsigned code_offset[PIPE_MAX_ATTRIBS]; - unsigned nr_attrs; - boolean dirty; - - spu_full_fetch_func fetch_func; - void *code; - } vertex_fetch; - - /* Clip derived state: - */ - float plane[12][4]; - unsigned nr_planes; - - struct spu_exec_machine machine; - const float (*constants)[4]; - - unsigned num_vs_outputs; -}; - -extern void spu_update_vertex_fetch(struct spu_vs_context *draw); - -static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, - struct spu_exec_machine *machine, - const unsigned *elts, - unsigned count) -{ - if (draw->vertex_fetch.dirty) { - spu_update_vertex_fetch(draw); - draw->vertex_fetch.dirty = 0; - } - - (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); -} - -struct cell_command_vs; - -extern void -spu_bind_vertex_shader(struct spu_vs_context *draw, - struct cell_shader_info *vs); - -extern void -spu_execute_vertex_shader(struct spu_vs_context *draw, - const struct cell_command_vs *vs); - -#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/targets/libgl-xlib/Makefile b/src/gallium/targets/libgl-xlib/Makefile index f8f6c81b3f2..867b2da323b 100644 --- a/src/gallium/targets/libgl-xlib/Makefile +++ b/src/gallium/targets/libgl-xlib/Makefile @@ -29,7 +29,6 @@ DEFINES += \ -DGALLIUM_RBUG \ -DGALLIUM_TRACE \ -DGALLIUM_GALAHAD -#-DGALLIUM_CELL will be defined by the config */ XLIB_TARGET_SOURCES = \ xlib.c @@ -38,7 +37,6 @@ XLIB_TARGET_SOURCES = \ XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o) -# Note: CELL_SPU_LIB is only defined for cell configs LIBS = \ $(GALLIUM_DRIVERS) \ @@ -50,7 +48,6 @@ LIBS = \ $(TOP)/src/mapi/glapi/libglapi.a \ $(TOP)/src/mesa/libmesagallium.a \ $(GALLIUM_AUXILIARIES) \ - $(CELL_SPU_LIB) \ # LLVM diff --git a/src/gallium/targets/libgl-xlib/SConscript b/src/gallium/targets/libgl-xlib/SConscript index ad8b0992e46..25a4582d7a3 100644 --- a/src/gallium/targets/libgl-xlib/SConscript +++ b/src/gallium/targets/libgl-xlib/SConscript @@ -42,11 +42,6 @@ if True: if env['llvm']: env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE']) env.Prepend(LIBS = [llvmpipe]) - -if False: - # TODO: Detect Cell SDK - env.Append(CPPDEFINES = 'GALLIUM_CELL') - env.Prepend(LIBS = [cell]) # libGL.so.1.5 libgl_1_5 = env.SharedLibrary( diff --git a/src/gallium/targets/libgl-xlib/xlib.c b/src/gallium/targets/libgl-xlib/xlib.c index 1a5892b94a0..0ede7e6096b 100644 --- a/src/gallium/targets/libgl-xlib/xlib.c +++ b/src/gallium/targets/libgl-xlib/xlib.c @@ -42,7 +42,7 @@ /* Helper function to build a subset of a driver stack consisting of - * one of the software rasterizers (cell, llvmpipe, softpipe) and the + * one of the software rasterizers (llvmpipe, softpipe) and the * xlib winsys. */ static struct pipe_screen *