Complicates Gallium3D development and doesn't seem to have active users.
Signed-off-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Signed-off-by: José Fonseca <jfonseca@vmware.com>
linux-i965 \
linux-alpha \
linux-alpha-static \
-linux-cell \
-linux-cell-debug \
linux-debug \
linux-dri \
linux-dri-debug \
opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine,
allowed_values=('generic', 'ppc', 'x86', 'x86_64')))
opts.Add(EnumOption('platform', 'target platform', host_platform,
- allowed_values=('linux', 'cell', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8')))
+ allowed_values=('linux', 'windows', 'darwin', 'cygwin', 'sunos', 'freebsd8')))
opts.Add(BoolOption('embedded', 'embedded build', 'no'))
opts.Add('toolchain', 'compiler toolchain', default_toolchain)
opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support', 'no'))
+++ /dev/null
-# linux-cell (non-debug build)
-
-include $(TOP)/configs/linux
-
-CONFIG_NAME = linux-cell
-
-
-# Omiting other gallium drivers:
-GALLIUM_DRIVERS_DIRS = cell softpipe trace rbug identity
-
-
-# Compiler and flags
-CC = ppu32-gcc
-CXX = ppu32-g++
-HOST_CC = gcc
-APP_CC = gcc
-APP_CXX = g++
-
-OPT_FLAGS = -O3
-
-# Cell SDK location
-## For SDK 2.1: (plus, remove -DSPU_MAIN_PARAM_LONG_LONG below)
-#SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr
-## For SDK 3.0:
-SDK = /opt/cell/sdk/usr
-
-
-
-COMMON_C_CPP_FLAGS = $(OPT_FLAGS) -Wall -Winline \
- -fPIC -m32 -mabi=altivec -maltivec \
- -I. -I$(SDK)/include \
- -DGALLIUM_CELL $(DEFINES)
-
-CFLAGS = $(COMMON_C_CPP_FLAGS) -Wmissing-prototypes -std=c99
-
-CXXFLAGS = $(COMMON_C_CPP_FLAGS)
-
-
-SRC_DIRS = glsl mapi/glapi mapi/vgapi mesa \
- gallium gallium/winsys gallium/targets glu
-
-# Build no traditional Mesa drivers:
-DRIVER_DIRS =
-
-
-MKDEP_OPTIONS = -fdepend -Y
-
-
-GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \
- -L$(SDK)/lib -m32 -Wl,-m,elf32ppc -R$(SDK)/lib -lspe2
-
-
-CELL_SPU_LIB = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a
-
-
-### SPU stuff
-
-SPU_CC = spu-gcc
-
-SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main \
- -I. -I$(SDK)/spu/include -I$(TOP)/src/mesa/ $(INCLUDE_DIRS) \
- -DSPU_MAIN_PARAM_LONG_LONG \
- -include spu_intrinsics.h
-
-SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc -lm
-
-SPU_AR = ppu-ar
-SPU_AR_FLAGS = -qcs
-
-SPU_EMBED = ppu32-embedspu
-SPU_EMBED_FLAGS = -m32
+++ /dev/null
-# linux-cell-debug
-
-include $(TOP)/configs/linux-cell
-
-# just override name and OPT_FLAGS here:
-
-CONFIG_NAME = linux-cell-debug
-
-OPT_FLAGS = -g -DDEBUG
-
+++ /dev/null
-<HTML>
-
-<TITLE>Cell Driver</TITLE>
-
-<link rel="stylesheet" type="text/css" href="mesa.css"></head>
-
-<BODY>
-
-<H1>Mesa/Gallium Cell Driver</H1>
-
-<p>
-The Mesa
-<a href="http://en.wikipedia.org/wiki/Cell_%28microprocessor%29" target="_parent">Cell</a>
-driver is part of the
-<a href="http://wiki.freedesktop.org/wiki/Software/gallium" target="_parent">Gallium3D</a>
-architecture.
-Tungsten Graphics did the original implementation of the Cell driver.
-</p>
-
-
-<H2>Source Code</H2>
-
-<p>
-The latest Cell driver source code is on the master branch of the Mesa
-git repository.
-</p>
-<p>
-To build the driver you'll need the IBM Cell SDK (version 2.1 or 3.0).
-To use the driver you'll need a Cell system, such as a PS3 running Linux,
-or the Cell Simulator (untested, though).
-</p>
-
-<p>
-If using Cell SDK 2.1, see the configs/linux-cell file for some
-special changes.
-</p>
-
-<p>
-To compile the code, run <code>make linux-cell</code>.
-Or to build in debug mode, run <code>make linux-cell-debug</code>.
-</p>
-
-<p>
-To use the library, make sure your current directory is the top of the
-Mesa tree, then set <code>LD_LIBRARY_PATH</code> like this:
-<pre>
- export LD_LIBRARY_PATH=$PWD/lib/gallium:$PWD/lib/
-</pre>
-
-<p>
-Verify that the Cell driver is being used by running
-<code>progs/xdemos/glxinfo</code> and looking for:
-<pre>
- OpenGL renderer string: Gallium 0.3, Cell on Xlib
-</pre>
-
-
-<H2>Driver Implementation Summary</H2>
-
-<p>
-Rasterization is parallelized across the SPUs in a tiled-based manner.
-Batches of transformed triangles are sent to the SPUs (actually, pulled by from
-main memory by the SPUs).
-Each SPU loops over a set of 32x32-pixel screen tiles, rendering the triangles
-into each tile.
-Because of the limited SPU memory, framebuffer tiles are paged in/out of
-SPU local store as needed.
-Similarly, textures are tiled and brought into local store as needed.
-</p>
-
-
-<H2>Status</H2>
-
-<p>
-As of October 2008, the driver runs quite a few OpenGL demos.
-Features that work include:
-</p>
-<ul>
-<li>Point/line/triangle rendering, glDrawPixels
-<li>2D, NPOT and cube texture maps with nearest/linear/mipmap filtering
-<li>Dynamic SPU code generation for fragment shaders, but not complete
-<li>Dynamic SPU code generation for fragment ops (blend, Z-test, etc), but not complete
-<li>Dynamic PPU/PPC code generation for vertex shaders, but not complete
-</ul>
-<p>
-Performance has recently improved with the addition of PPC code generation
-for vertex shaders, but the code quality isn't too great yet.
-</p>
-<p>
-Another bottleneck is SwapBuffers. It may be the limiting factor for
-many simple GL tests.
-</p>
-
-
-
-<H2>Debug Options</H2>
-
-<p>
-The CELL_DEBUG env var can be set to a comma-separated list of one or
-more of the following debug options:
-</p>
-<ul>
-<li><b>checker</b> - use a different background clear color for each SPU.
- This lets you see which SPU is rendering which screen tiles.
-<li><b>sync</b> - wait/synchronize after each DMA transfer
-<li><b>asm</b> - print generated SPU assembly code to stdout
-<li><b>fragops</b> - emit fragment ops debug messages
-<li><b>fragopfallback</b> - don't use codegen for fragment ops
-<li><b>cmd</b> - print SPU commands as their received
-<li><b>cache</b> - print texture cache statistics when program exits
-</ul>
-<p>
-Note that some of these options may only work for linux-cell-debug builds.
-</p>
-
-<p>
-If the GALLIUM_NOPPC env var is set, PPC code generation will not be used
-and vertex shaders will be run with the TGSI interpreter.
-</p>
-<p>
-If the GALLIUM_NOCELL env var is set, the softpipe driver will be used
-intead of the Cell driver.
-This is useful for comparison/validation.
-</p>
-
-
-
-<H2>Contributing</H2>
-
-<p>
-If you're interested in contributing to the effort, familiarize yourself
-with the code, join the <a href="lists.html">mesa3d-dev mailing list</a>,
-and describe what you'd like to do.
-</p>
-
-
-</BODY>
-</HTML>
<li><a href="devinfo.html" target="MainFrame">Development Notes</a>
<li><a href="sourcedocs.html" target="MainFrame">Source Documentation</a>
<li><a href="subset.html" target="MainFrame">Mesa Subset Driver</a>
-<LI><A HREF="dispatch.html" target="MainFrame">GL Dispatch</A>
-<li><a href="cell.html" target="MainFrame">Cell Driver</A>
+<li><a HREF="dispatch.html" target="MainFrame">GL Dispatch</a>
</ul>
<b>Links</b>
<h2>January 24, 2008</h2>
<p>
-Added a new page describing the <a href="cell.html">Mesa Cell driver</a>.
+Added a new page describing the Mesa Cell driver.
</p>
by the gallium drivers for this hardware.</li>
<li>Removed the i965g driver, which was broken and with nobody in sight to fix
the situation</li>
+<li>Removed the Gallium cell driver, it was just a burden on Gallium
+ development and nobody seems to use it.</li>
</ul>
<ul>
<li>softpipe - a software/reference driver
<li>i915 - Intel 915/945 driver
- <li><a href="cell.html">Cell</a> - IBM/Sony/Toshiba Cell processor driver
+ <li>Cell - IBM/Sony/Toshiba Cell processor driver
<li>nouveau (for NVIDIA GPUs) and R300 for (AMD/ATI R300).
<b>PLEASE NOTE: these drivers are incomplete and still under development.
It's probably NOT worthwhile to report any bugs unless you have patches.
interfaces
<li><b>drivers</b> - Gallium3D device drivers
<ul>
- <li><b>cell</b> - Driver for Cell processor.
<li><b>i915</b> - Driver for Intel i915/i945.
<li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation.
<li><b>nv*</b> - Drivers for NVIDIA GPUs.
- Pipe drivers:
- \ref softpipe
- \ref i915g
- - Cell driver (cell_context.h, cell_winsys.h)
- \ref failover
- Winsys drivers:
rtasm/rtasm_cpu.c \
rtasm/rtasm_execmem.c \
rtasm/rtasm_ppc.c \
- rtasm/rtasm_ppc_spe.c \
rtasm/rtasm_x86sse.c \
tgsi/tgsi_build.c \
tgsi/tgsi_dump.c \
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file
- * Real-time assembly generation interface for Cell B.E. SPEs.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- * \author Brian Paul
- */
-
-
-#include <stdio.h>
-#include "pipe/p_compiler.h"
-#include "util/u_memory.h"
-#include "rtasm_ppc_spe.h"
-
-
-#ifdef GALLIUM_CELL
-/**
- * SPE instruction types
- *
- * There are 6 primary instruction encodings used on the Cell's SPEs. Each of
- * the following unions encodes one type.
- *
- * \bug
- * If, at some point, we start generating SPE code from a little-endian host
- * these unions will not work.
- */
-/*@{*/
-/**
- * Encode one output register with two input registers
- */
-union spe_inst_RR {
- uint32_t bits;
- struct {
- unsigned op:11;
- unsigned rB:7;
- unsigned rA:7;
- unsigned rT:7;
- } inst;
-};
-
-
-/**
- * Encode one output register with three input registers
- */
-union spe_inst_RRR {
- uint32_t bits;
- struct {
- unsigned op:4;
- unsigned rT:7;
- unsigned rB:7;
- unsigned rA:7;
- unsigned rC:7;
- } inst;
-};
-
-
-/**
- * Encode one output register with one input reg. and a 7-bit signed immed
- */
-union spe_inst_RI7 {
- uint32_t bits;
- struct {
- unsigned op:11;
- unsigned i7:7;
- unsigned rA:7;
- unsigned rT:7;
- } inst;
-};
-
-
-/**
- * Encode one output register with one input reg. and an 8-bit signed immed
- */
-union spe_inst_RI8 {
- uint32_t bits;
- struct {
- unsigned op:10;
- unsigned i8:8;
- unsigned rA:7;
- unsigned rT:7;
- } inst;
-};
-
-
-/**
- * Encode one output register with one input reg. and a 10-bit signed immed
- */
-union spe_inst_RI10 {
- uint32_t bits;
- struct {
- unsigned op:8;
- unsigned i10:10;
- unsigned rA:7;
- unsigned rT:7;
- } inst;
-};
-
-
-/**
- * Encode one output register with a 16-bit signed immediate
- */
-union spe_inst_RI16 {
- uint32_t bits;
- struct {
- unsigned op:9;
- unsigned i16:16;
- unsigned rT:7;
- } inst;
-};
-
-
-/**
- * Encode one output register with a 18-bit signed immediate
- */
-union spe_inst_RI18 {
- uint32_t bits;
- struct {
- unsigned op:7;
- unsigned i18:18;
- unsigned rT:7;
- } inst;
-};
-/*@}*/
-
-
-static void
-indent(const struct spe_function *p)
-{
- int i;
- for (i = 0; i < p->indent; i++) {
- putchar(' ');
- }
-}
-
-
-static const char *
-rem_prefix(const char *longname)
-{
- return longname + 4;
-}
-
-
-static const char *
-reg_name(int reg)
-{
- switch (reg) {
- case SPE_REG_SP:
- return "$sp";
- case SPE_REG_RA:
- return "$lr";
- default:
- {
- /* cycle through four buffers to handle multiple calls per printf */
- static char buf[4][10];
- static int b = 0;
- b = (b + 1) % 4;
- sprintf(buf[b], "$%d", reg);
- return buf[b];
- }
- }
-}
-
-
-static void
-emit_instruction(struct spe_function *p, uint32_t inst_bits)
-{
- if (!p->store)
- return; /* out of memory, drop the instruction */
-
- if (p->num_inst == p->max_inst) {
- /* allocate larger buffer */
- uint32_t *newbuf;
- p->max_inst *= 2; /* 2x larger */
- newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16);
- if (newbuf) {
- memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE);
- }
- align_free(p->store);
- p->store = newbuf;
- if (!p->store) {
- /* out of memory */
- p->num_inst = 0;
- return;
- }
- }
-
- p->store[p->num_inst++] = inst_bits;
-}
-
-
-
-static void emit_RR(struct spe_function *p, unsigned op, int rT,
- int rA, int rB, const char *name)
-{
- union spe_inst_RR inst;
- inst.inst.op = op;
- inst.inst.rB = rB;
- inst.inst.rA = rA;
- inst.inst.rT = rT;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, %s, %s\n",
- rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB));
- }
-}
-
-
-static void emit_RRR(struct spe_function *p, unsigned op, int rT,
- int rA, int rB, int rC, const char *name)
-{
- union spe_inst_RRR inst;
- inst.inst.op = op;
- inst.inst.rT = rT;
- inst.inst.rB = rB;
- inst.inst.rA = rA;
- inst.inst.rC = rC;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT),
- reg_name(rA), reg_name(rB), reg_name(rC));
- }
-}
-
-
-static void emit_RI7(struct spe_function *p, unsigned op, int rT,
- int rA, int imm, const char *name)
-{
- union spe_inst_RI7 inst;
- inst.inst.op = op;
- inst.inst.i7 = imm;
- inst.inst.rA = rA;
- inst.inst.rT = rT;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, %s, 0x%x\n",
- rem_prefix(name), reg_name(rT), reg_name(rA), imm);
- }
-}
-
-
-
-static void emit_RI8(struct spe_function *p, unsigned op, int rT,
- int rA, int imm, const char *name)
-{
- union spe_inst_RI8 inst;
- inst.inst.op = op;
- inst.inst.i8 = imm;
- inst.inst.rA = rA;
- inst.inst.rT = rT;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, %s, 0x%x\n",
- rem_prefix(name), reg_name(rT), reg_name(rA), imm);
- }
-}
-
-
-
-static void emit_RI10(struct spe_function *p, unsigned op, int rT,
- int rA, int imm, const char *name)
-{
- union spe_inst_RI10 inst;
- inst.inst.op = op;
- inst.inst.i10 = imm;
- inst.inst.rA = rA;
- inst.inst.rT = rT;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, %s, 0x%x\n",
- rem_prefix(name), reg_name(rT), reg_name(rA), imm);
- }
-}
-
-
-/** As above, but do range checking on signed immediate value */
-static void emit_RI10s(struct spe_function *p, unsigned op, int rT,
- int rA, int imm, const char *name)
-{
- assert(imm <= 511);
- assert(imm >= -512);
- emit_RI10(p, op, rT, rA, imm, name);
-}
-
-
-static void emit_RI16(struct spe_function *p, unsigned op, int rT,
- int imm, const char *name)
-{
- union spe_inst_RI16 inst;
- inst.inst.op = op;
- inst.inst.i16 = imm;
- inst.inst.rT = rT;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
- }
-}
-
-
-static void emit_RI18(struct spe_function *p, unsigned op, int rT,
- int imm, const char *name)
-{
- union spe_inst_RI18 inst;
- inst.inst.op = op;
- inst.inst.i18 = imm;
- inst.inst.rT = rT;
- emit_instruction(p, inst.bits);
- if (p->print) {
- indent(p);
- printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm);
- }
-}
-
-
-#define EMIT(_name, _op) \
-void _name (struct spe_function *p) \
-{ \
- emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \
-}
-
-#define EMIT_(_name, _op) \
-void _name (struct spe_function *p, int rT) \
-{ \
- emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \
-}
-
-#define EMIT_R(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA) \
-{ \
- emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \
-}
-
-#define EMIT_RR(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int rB) \
-{ \
- emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \
-}
-
-#define EMIT_RRR(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \
-{ \
- emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \
-}
-
-#define EMIT_RI7(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
- emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \
-}
-
-#define EMIT_RI8(_name, _op, bias) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
- emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \
-}
-
-#define EMIT_RI10(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
- emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \
-}
-
-#define EMIT_RI10s(_name, _op) \
-void _name (struct spe_function *p, int rT, int rA, int imm) \
-{ \
- emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \
-}
-
-#define EMIT_RI16(_name, _op) \
-void _name (struct spe_function *p, int rT, int imm) \
-{ \
- emit_RI16(p, _op, rT, imm, __FUNCTION__); \
-}
-
-#define EMIT_RI18(_name, _op) \
-void _name (struct spe_function *p, int rT, int imm) \
-{ \
- emit_RI18(p, _op, rT, imm, __FUNCTION__); \
-}
-
-#define EMIT_I16(_name, _op) \
-void _name (struct spe_function *p, int imm) \
-{ \
- emit_RI16(p, _op, 0, imm, __FUNCTION__); \
-}
-
-#include "rtasm_ppc_spe.h"
-
-
-
-/**
- * Initialize an spe_function.
- * \param code_size initial size of instruction buffer to allocate, in bytes.
- * If zero, use a default.
- */
-void spe_init_func(struct spe_function *p, unsigned code_size)
-{
- uint i;
-
- if (!code_size)
- code_size = 64;
-
- p->num_inst = 0;
- p->max_inst = code_size / SPE_INST_SIZE;
- p->store = align_malloc(code_size, 16);
-
- p->set_count = 0;
- memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0]));
-
- /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile.
- */
- p->regs[0] = p->regs[1] = p->regs[2] = 1;
- for (i = 80; i <= 127; i++) {
- p->regs[i] = 1;
- }
-
- p->print = FALSE;
- p->indent = 0;
-}
-
-
-void spe_release_func(struct spe_function *p)
-{
- assert(p->num_inst <= p->max_inst);
- if (p->store != NULL) {
- align_free(p->store);
- }
- p->store = NULL;
-}
-
-
-/** Return current code size in bytes. */
-unsigned spe_code_size(const struct spe_function *p)
-{
- return p->num_inst * SPE_INST_SIZE;
-}
-
-
-/**
- * Allocate a SPE register.
- * \return register index or -1 if none left.
- */
-int spe_allocate_available_register(struct spe_function *p)
-{
- unsigned i;
- for (i = 0; i < SPE_NUM_REGS; i++) {
- if (p->regs[i] == 0) {
- p->regs[i] = 1;
- return i;
- }
- }
-
- return -1;
-}
-
-
-/**
- * Mark the given SPE register as "allocated".
- */
-int spe_allocate_register(struct spe_function *p, int reg)
-{
- assert(reg < SPE_NUM_REGS);
- assert(p->regs[reg] == 0);
- p->regs[reg] = 1;
- return reg;
-}
-
-
-/**
- * Mark the given SPE register as "unallocated". Note that this should
- * only be used on registers allocated in the current register set; an
- * assertion will fail if an attempt is made to deallocate a register
- * allocated in an earlier register set.
- */
-void spe_release_register(struct spe_function *p, int reg)
-{
- assert(reg >= 0);
- assert(reg < SPE_NUM_REGS);
- assert(p->regs[reg] == 1);
-
- p->regs[reg] = 0;
-}
-
-/**
- * Start a new set of registers. This can be called if
- * it will be difficult later to determine exactly what
- * registers were actually allocated during a code generation
- * sequence, and you really just want to deallocate all of them.
- */
-void spe_allocate_register_set(struct spe_function *p)
-{
- uint i;
-
- /* Keep track of the set count. If it ever wraps around to 0,
- * we're in trouble.
- */
- p->set_count++;
- assert(p->set_count > 0);
-
- /* Increment the allocation count of all registers currently
- * allocated. Then any registers that are allocated in this set
- * will be the only ones with a count of 1; they'll all be released
- * when the register set is released.
- */
- for (i = 0; i < SPE_NUM_REGS; i++) {
- if (p->regs[i] > 0)
- p->regs[i]++;
- }
-}
-
-void spe_release_register_set(struct spe_function *p)
-{
- uint i;
-
- /* If the set count drops below zero, we're in trouble. */
- assert(p->set_count > 0);
- p->set_count--;
-
- /* Drop the allocation level of all registers. Any allocated
- * during this register set will drop to 0 and then become
- * available.
- */
- for (i = 0; i < SPE_NUM_REGS; i++) {
- if (p->regs[i] > 0)
- p->regs[i]--;
- }
-}
-
-
-unsigned
-spe_get_registers_used(const struct spe_function *p, ubyte used[])
-{
- unsigned i, num = 0;
- /* only count registers in the range available to callers */
- for (i = 2; i < 80; i++) {
- if (p->regs[i]) {
- used[num++] = i;
- }
- }
- return num;
-}
-
-
-void
-spe_print_code(struct spe_function *p, boolean enable)
-{
- p->print = enable;
-}
-
-
-void
-spe_indent(struct spe_function *p, int spaces)
-{
- p->indent += spaces;
-}
-
-
-void
-spe_comment(struct spe_function *p, int rel_indent, const char *s)
-{
- if (p->print) {
- p->indent += rel_indent;
- indent(p);
- p->indent -= rel_indent;
- printf("# %s\n", s);
- }
-}
-
-
-/**
- * Load quad word.
- * NOTE: offset is in bytes and the least significant 4 bits must be zero!
- */
-void spe_lqd(struct spe_function *p, int rT, int rA, int offset)
-{
- const boolean pSave = p->print;
-
- /* offset must be a multiple of 16 */
- assert(offset % 16 == 0);
- /* offset must fit in 10-bit signed int field, after shifting */
- assert((offset >> 4) <= 511);
- assert((offset >> 4) >= -512);
-
- p->print = FALSE;
- emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd");
- p->print = pSave;
-
- if (p->print) {
- indent(p);
- printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
- }
-}
-
-
-/**
- * Store quad word.
- * NOTE: offset is in bytes and the least significant 4 bits must be zero!
- */
-void spe_stqd(struct spe_function *p, int rT, int rA, int offset)
-{
- const boolean pSave = p->print;
-
- /* offset must be a multiple of 16 */
- assert(offset % 16 == 0);
- /* offset must fit in 10-bit signed int field, after shifting */
- assert((offset >> 4) <= 511);
- assert((offset >> 4) >= -512);
-
- p->print = FALSE;
- emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd");
- p->print = pSave;
-
- if (p->print) {
- indent(p);
- printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA));
- }
-}
-
-
-/**
- * For branch instructions:
- * \param d if 1, disable interupts if branch is taken
- * \param e if 1, enable interupts if branch is taken
- * If d and e are both zero, don't change interupt status (right?)
- */
-
-/** Branch Indirect to address in rA */
-void spe_bi(struct spe_function *p, int rA, int d, int e)
-{
- emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Interupt Return */
-void spe_iret(struct spe_function *p, int rA, int d, int e)
-{
- emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect and set link on external data */
-void spe_bisled(struct spe_function *p, int rT, int rA, int d,
- int e)
-{
- emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect and set link. Save PC in rT, jump to rA. */
-void spe_bisl(struct spe_function *p, int rT, int rA, int d,
- int e)
-{
- emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */
-void spe_biz(struct spe_function *p, int rT, int rA, int d, int e)
-{
- emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */
-void spe_binz(struct spe_function *p, int rT, int rA, int d, int e)
-{
- emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */
-void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e)
-{
- emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */
-void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e)
-{
- emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__);
-}
-
-
-/* Hint-for-branch instructions
- */
-#if 0
-hbr;
-hbra;
-hbrr;
-#endif
-
-
-/* Control instructions
- */
-#if 0
-stop;
-EMIT_RR (spe_stopd, 0x140);
-EMIT_ (spe_nop, 0x201);
-sync;
-EMIT_ (spe_dsync, 0x003);
-EMIT_R (spe_mfspr, 0x00c);
-EMIT_R (spe_mtspr, 0x10c);
-#endif
-
-
-/**
- ** Helper / "macro" instructions.
- ** Use somewhat verbose names as a reminder that these aren't native
- ** SPE instructions.
- **/
-
-
-void
-spe_load_float(struct spe_function *p, int rT, float x)
-{
- if (x == 0.0f) {
- spe_il(p, rT, 0x0);
- }
- else if (x == 0.5f) {
- spe_ilhu(p, rT, 0x3f00);
- }
- else if (x == 1.0f) {
- spe_ilhu(p, rT, 0x3f80);
- }
- else if (x == -1.0f) {
- spe_ilhu(p, rT, 0xbf80);
- }
- else {
- union {
- float f;
- unsigned u;
- } bits;
- bits.f = x;
- spe_ilhu(p, rT, bits.u >> 16);
- spe_iohl(p, rT, bits.u & 0xffff);
- }
-}
-
-
-void
-spe_load_int(struct spe_function *p, int rT, int i)
-{
- if (-32768 <= i && i <= 32767) {
- spe_il(p, rT, i);
- }
- else {
- spe_ilhu(p, rT, i >> 16);
- if (i & 0xffff)
- spe_iohl(p, rT, i & 0xffff);
- }
-}
-
-void spe_load_uint(struct spe_function *p, int rT, uint ui)
-{
- /* If the whole value is in the lower 18 bits, use ila, which
- * doesn't sign-extend. Otherwise, if the two halfwords of
- * the constant are identical, use ilh. Otherwise, if every byte of
- * the desired value is 0x00 or 0xff, we can use Form Select Mask for
- * Bytes Immediate (fsmbi) to load the value in a single instruction.
- * Otherwise, in the general case, we have to use ilhu followed by iohl.
- */
- if ((ui & 0x0003ffff) == ui) {
- spe_ila(p, rT, ui);
- }
- else if ((ui >> 16) == (ui & 0xffff)) {
- spe_ilh(p, rT, ui & 0xffff);
- }
- else if (
- ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) &&
- ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) &&
- ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) &&
- ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000)
- ) {
- uint mask = 0;
- /* fsmbi duplicates each bit in the given mask eight times,
- * using a 16-bit value to initialize a 16-byte quadword.
- * Each 4-bit nybble of the mask corresponds to a full word
- * of the result; look at the value and figure out the mask
- * (replicated for each word in the quadword), and then
- * form the "select mask" to get the value.
- */
- if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111;
- if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222;
- if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444;
- if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888;
- spe_fsmbi(p, rT, mask);
- }
- else {
- /* The general case: this usually uses two instructions, but
- * may use only one if the low-order 16 bits of each word are 0.
- */
- spe_ilhu(p, rT, ui >> 16);
- if (ui & 0xffff)
- spe_iohl(p, rT, ui & 0xffff);
- }
-}
-
-/**
- * This function is constructed identically to spe_xor_uint() below.
- * Changes to one should be made in the other.
- */
-void
-spe_and_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
- /* If we can, emit a single instruction, either And Byte Immediate
- * (which uses the same constant across each byte), And Halfword Immediate
- * (which sign-extends a 10-bit immediate to 16 bits and uses that
- * across each halfword), or And Word Immediate (which sign-extends
- * a 10-bit immediate to 32 bits).
- *
- * Otherwise, we'll need to use a temporary register.
- */
- uint tmp;
-
- /* If the upper 23 bits are all 0s or all 1s, sign extension
- * will work and we can use And Word Immediate
- */
- tmp = ui & 0xfffffe00;
- if (tmp == 0xfffffe00 || tmp == 0) {
- spe_andi(p, rT, rA, ui & 0x000003ff);
- return;
- }
-
- /* If the ui field is symmetric along halfword boundaries and
- * the upper 7 bits of each halfword are all 0s or 1s, we
- * can use And Halfword Immediate
- */
- tmp = ui & 0xfe00fe00;
- if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
- spe_andhi(p, rT, rA, ui & 0x000003ff);
- return;
- }
-
- /* If the ui field is symmetric in each byte, then we can use
- * the And Byte Immediate instruction.
- */
- tmp = ui & 0x000000ff;
- if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
- spe_andbi(p, rT, rA, tmp);
- return;
- }
-
- /* Otherwise, we'll have to use a temporary register. */
- int tmp_reg = spe_allocate_available_register(p);
- spe_load_uint(p, tmp_reg, ui);
- spe_and(p, rT, rA, tmp_reg);
- spe_release_register(p, tmp_reg);
-}
-
-
-/**
- * This function is constructed identically to spe_and_uint() above.
- * Changes to one should be made in the other.
- */
-void
-spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
- /* If we can, emit a single instruction, either Exclusive Or Byte
- * Immediate (which uses the same constant across each byte), Exclusive
- * Or Halfword Immediate (which sign-extends a 10-bit immediate to
- * 16 bits and uses that across each halfword), or Exclusive Or Word
- * Immediate (which sign-extends a 10-bit immediate to 32 bits).
- *
- * Otherwise, we'll need to use a temporary register.
- */
- uint tmp;
-
- /* If the upper 23 bits are all 0s or all 1s, sign extension
- * will work and we can use Exclusive Or Word Immediate
- */
- tmp = ui & 0xfffffe00;
- if (tmp == 0xfffffe00 || tmp == 0) {
- spe_xori(p, rT, rA, ui & 0x000003ff);
- return;
- }
-
- /* If the ui field is symmetric along halfword boundaries and
- * the upper 7 bits of each halfword are all 0s or 1s, we
- * can use Exclusive Or Halfword Immediate
- */
- tmp = ui & 0xfe00fe00;
- if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) {
- spe_xorhi(p, rT, rA, ui & 0x000003ff);
- return;
- }
-
- /* If the ui field is symmetric in each byte, then we can use
- * the Exclusive Or Byte Immediate instruction.
- */
- tmp = ui & 0x000000ff;
- if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) {
- spe_xorbi(p, rT, rA, tmp);
- return;
- }
-
- /* Otherwise, we'll have to use a temporary register. */
- int tmp_reg = spe_allocate_available_register(p);
- spe_load_uint(p, tmp_reg, ui);
- spe_xor(p, rT, rA, tmp_reg);
- spe_release_register(p, tmp_reg);
-}
-
-void
-spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
- /* If the comparison value is 9 bits or less, it fits inside a
- * Compare Equal Word Immediate instruction.
- */
- if ((ui & 0x000001ff) == ui) {
- spe_ceqi(p, rT, rA, ui);
- }
- /* Otherwise, we're going to have to load a word first. */
- else {
- int tmp_reg = spe_allocate_available_register(p);
- spe_load_uint(p, tmp_reg, ui);
- spe_ceq(p, rT, rA, tmp_reg);
- spe_release_register(p, tmp_reg);
- }
-}
-
-void
-spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui)
-{
- /* If the comparison value is 10 bits or less, it fits inside a
- * Compare Logical Greater Than Word Immediate instruction.
- */
- if ((ui & 0x000003ff) == ui) {
- spe_clgti(p, rT, rA, ui);
- }
- /* Otherwise, we're going to have to load a word first. */
- else {
- int tmp_reg = spe_allocate_available_register(p);
- spe_load_uint(p, tmp_reg, ui);
- spe_clgt(p, rT, rA, tmp_reg);
- spe_release_register(p, tmp_reg);
- }
-}
-
-void
-spe_splat(struct spe_function *p, int rT, int rA)
-{
- /* Use a temporary, just in case rT == rA */
- int tmp_reg = spe_allocate_available_register(p);
- /* Duplicate bytes 0, 1, 2, and 3 across the whole register */
- spe_ila(p, tmp_reg, 0x00010203);
- spe_shufb(p, rT, rA, rA, tmp_reg);
- spe_release_register(p, tmp_reg);
-}
-
-
-void
-spe_complement(struct spe_function *p, int rT, int rA)
-{
- spe_nor(p, rT, rA, rA);
-}
-
-
-void
-spe_move(struct spe_function *p, int rT, int rA)
-{
- /* Use different instructions depending on the instruction address
- * to take advantage of the dual pipelines.
- */
- if (p->num_inst & 1)
- spe_shlqbyi(p, rT, rA, 0); /* odd pipe */
- else
- spe_ori(p, rT, rA, 0); /* even pipe */
-}
-
-
-void
-spe_zero(struct spe_function *p, int rT)
-{
- spe_xor(p, rT, rT, rT);
-}
-
-
-void
-spe_splat_word(struct spe_function *p, int rT, int rA, int word)
-{
- assert(word >= 0);
- assert(word <= 3);
-
- if (word == 0) {
- int tmp1 = rT;
- spe_ila(p, tmp1, 66051);
- spe_shufb(p, rT, rA, rA, tmp1);
- }
- else {
- /* XXX review this, we may not need the rotqbyi instruction */
- int tmp1 = rT;
- int tmp2 = spe_allocate_available_register(p);
-
- spe_ila(p, tmp1, 66051);
- spe_rotqbyi(p, tmp2, rA, 4 * word);
- spe_shufb(p, rT, tmp2, tmp2, tmp1);
-
- spe_release_register(p, tmp2);
- }
-}
-
-/**
- * For each 32-bit float element of rA and rB, choose the smaller of the
- * two, compositing them into the rT register.
- *
- * The Float Compare Greater Than (fcgt) instruction will put 1s into
- * compare_reg where rA > rB, and 0s where rA <= rB.
- *
- * Then the Select Bits (selb) instruction will take bits from rA where
- * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA
- * where rA <= rB and from rB where rB > rA, which is exactly the
- * "min" operation.
- *
- * The compare_reg could in many cases be the same as rT, unless
- * rT == rA || rt == rB. But since this is common in constructions
- * like "x = min(x, a)", we always allocate a new register to be safe.
- */
-void
-spe_float_min(struct spe_function *p, int rT, int rA, int rB)
-{
- int compare_reg = spe_allocate_available_register(p);
- spe_fcgt(p, compare_reg, rA, rB);
- spe_selb(p, rT, rA, rB, compare_reg);
- spe_release_register(p, compare_reg);
-}
-
-/**
- * For each 32-bit float element of rA and rB, choose the greater of the
- * two, compositing them into the rT register.
- *
- * The logic is similar to that of spe_float_min() above; the only
- * difference is that the registers on spe_selb() have been reversed,
- * so that the larger of the two is selected instead of the smaller.
- */
-void
-spe_float_max(struct spe_function *p, int rT, int rA, int rB)
-{
- int compare_reg = spe_allocate_available_register(p);
- spe_fcgt(p, compare_reg, rA, rB);
- spe_selb(p, rT, rB, rA, compare_reg);
- spe_release_register(p, compare_reg);
-}
-
-#endif /* GALLIUM_CELL */
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file
- * Real-time assembly generation interface for Cell B.E. SPEs.
- * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf
- *
- * \author Ian Romanick <idr@us.ibm.com>
- * \author Brian Paul
- */
-
-#ifndef RTASM_PPC_SPE_H
-#define RTASM_PPC_SPE_H
-
-/** 4 bytes per instruction */
-#define SPE_INST_SIZE 4
-
-/** number of general-purpose SIMD registers */
-#define SPE_NUM_REGS 128
-
-/** Return Address register (aka $lr / Link Register) */
-#define SPE_REG_RA 0
-
-/** Stack Pointer register (aka $sp) */
-#define SPE_REG_SP 1
-
-
-struct spe_function
-{
- uint32_t *store; /**< instruction buffer */
- uint num_inst;
- uint max_inst;
-
- /**
- * The "set count" reflects the number of nested register sets
- * are allowed. In the unlikely case that we exceed the set count,
- * register allocation will start to be confused, which is critical
- * enough that we check for it.
- */
- unsigned char set_count;
-
- /**
- * Flags for used and unused registers. Each byte corresponds to a
- * register; a 0 in that byte means that the register is available.
- * A value of 1 means that the register was allocated in the current
- * register set. Any other value N means that the register was allocated
- * N register sets ago.
- *
- * \sa
- * spe_allocate_register, spe_allocate_available_register,
- * spe_allocate_register_set, spe_release_register_set, spe_release_register,
- */
- unsigned char regs[SPE_NUM_REGS];
-
- boolean print; /**< print/dump instructions as they're emitted? */
- int indent; /**< number of spaces to indent */
-};
-
-
-extern void spe_init_func(struct spe_function *p, uint code_size);
-extern void spe_release_func(struct spe_function *p);
-extern uint spe_code_size(const struct spe_function *p);
-
-extern int spe_allocate_available_register(struct spe_function *p);
-extern int spe_allocate_register(struct spe_function *p, int reg);
-extern void spe_release_register(struct spe_function *p, int reg);
-extern void spe_allocate_register_set(struct spe_function *p);
-extern void spe_release_register_set(struct spe_function *p);
-
-extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]);
-
-extern void spe_print_code(struct spe_function *p, boolean enable);
-extern void spe_indent(struct spe_function *p, int spaces);
-extern void spe_comment(struct spe_function *p, int rel_indent, const char *s);
-
-
-#endif /* RTASM_PPC_SPE_H */
-
-#ifndef EMIT
-#define EMIT(_name, _op) \
- extern void _name (struct spe_function *p);
-#define EMIT_(_name, _op) \
- extern void _name (struct spe_function *p, int rT);
-#define EMIT_R(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int rA);
-#define EMIT_RR(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int rA, int rB);
-#define EMIT_RRR(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC);
-#define EMIT_RI7(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI8(_name, _op, bias) \
- extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI10(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI10s(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int rA, int imm);
-#define EMIT_RI16(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int imm);
-#define EMIT_RI18(_name, _op) \
- extern void _name (struct spe_function *p, int rT, int imm);
-#define EMIT_I16(_name, _op) \
- extern void _name (struct spe_function *p, int imm);
-#define UNDEF_EMIT_MACROS
-#endif /* EMIT */
-
-
-/* Memory load / store instructions
- */
-EMIT_RR (spe_lqx, 0x1c4)
-EMIT_RI16(spe_lqa, 0x061)
-EMIT_RI16(spe_lqr, 0x067)
-EMIT_RR (spe_stqx, 0x144)
-EMIT_RI16(spe_stqa, 0x041)
-EMIT_RI16(spe_stqr, 0x047)
-EMIT_RI7 (spe_cbd, 0x1f4)
-EMIT_RR (spe_cbx, 0x1d4)
-EMIT_RI7 (spe_chd, 0x1f5)
-EMIT_RI7 (spe_chx, 0x1d5)
-EMIT_RI7 (spe_cwd, 0x1f6)
-EMIT_RI7 (spe_cwx, 0x1d6)
-EMIT_RI7 (spe_cdd, 0x1f7)
-EMIT_RI7 (spe_cdx, 0x1d7)
-
-
-/* Constant formation instructions
- */
-EMIT_RI16(spe_ilh, 0x083)
-EMIT_RI16(spe_ilhu, 0x082)
-EMIT_RI16(spe_il, 0x081)
-EMIT_RI18(spe_ila, 0x021)
-EMIT_RI16(spe_iohl, 0x0c1)
-EMIT_RI16(spe_fsmbi, 0x065)
-
-
-
-/* Integer and logical instructions
- */
-EMIT_RR (spe_ah, 0x0c8)
-EMIT_RI10(spe_ahi, 0x01d)
-EMIT_RR (spe_a, 0x0c0)
-EMIT_RI10s(spe_ai, 0x01c)
-EMIT_RR (spe_sfh, 0x048)
-EMIT_RI10(spe_sfhi, 0x00d)
-EMIT_RR (spe_sf, 0x040)
-EMIT_RI10(spe_sfi, 0x00c)
-EMIT_RR (spe_addx, 0x340)
-EMIT_RR (spe_cg, 0x0c2)
-EMIT_RR (spe_cgx, 0x342)
-EMIT_RR (spe_sfx, 0x341)
-EMIT_RR (spe_bg, 0x042)
-EMIT_RR (spe_bgx, 0x343)
-EMIT_RR (spe_mpy, 0x3c4)
-EMIT_RR (spe_mpyu, 0x3cc)
-EMIT_RI10(spe_mpyi, 0x074)
-EMIT_RI10(spe_mpyui, 0x075)
-EMIT_RRR (spe_mpya, 0x00c)
-EMIT_RR (spe_mpyh, 0x3c5)
-EMIT_RR (spe_mpys, 0x3c7)
-EMIT_RR (spe_mpyhh, 0x3c6)
-EMIT_RR (spe_mpyhha, 0x346)
-EMIT_RR (spe_mpyhhu, 0x3ce)
-EMIT_RR (spe_mpyhhau, 0x34e)
-EMIT_R (spe_clz, 0x2a5)
-EMIT_R (spe_cntb, 0x2b4)
-EMIT_R (spe_fsmb, 0x1b6)
-EMIT_R (spe_fsmh, 0x1b5)
-EMIT_R (spe_fsm, 0x1b4)
-EMIT_R (spe_gbb, 0x1b2)
-EMIT_R (spe_gbh, 0x1b1)
-EMIT_R (spe_gb, 0x1b0)
-EMIT_RR (spe_avgb, 0x0d3)
-EMIT_RR (spe_absdb, 0x053)
-EMIT_RR (spe_sumb, 0x253)
-EMIT_R (spe_xsbh, 0x2b6)
-EMIT_R (spe_xshw, 0x2ae)
-EMIT_R (spe_xswd, 0x2a6)
-EMIT_RR (spe_and, 0x0c1)
-EMIT_RR (spe_andc, 0x2c1)
-EMIT_RI10s(spe_andbi, 0x016)
-EMIT_RI10s(spe_andhi, 0x015)
-EMIT_RI10s(spe_andi, 0x014)
-EMIT_RR (spe_or, 0x041)
-EMIT_RR (spe_orc, 0x2c9)
-EMIT_RI10s(spe_orbi, 0x006)
-EMIT_RI10s(spe_orhi, 0x005)
-EMIT_RI10s(spe_ori, 0x004)
-EMIT_R (spe_orx, 0x1f0)
-EMIT_RR (spe_xor, 0x241)
-EMIT_RI10s(spe_xorbi, 0x046)
-EMIT_RI10s(spe_xorhi, 0x045)
-EMIT_RI10s(spe_xori, 0x044)
-EMIT_RR (spe_nand, 0x0c9)
-EMIT_RR (spe_nor, 0x049)
-EMIT_RR (spe_eqv, 0x249)
-EMIT_RRR (spe_selb, 0x008)
-EMIT_RRR (spe_shufb, 0x00b)
-
-
-/* Shift and rotate instructions
- */
-EMIT_RR (spe_shlh, 0x05f)
-EMIT_RI7 (spe_shlhi, 0x07f)
-EMIT_RR (spe_shl, 0x05b)
-EMIT_RI7 (spe_shli, 0x07b)
-EMIT_RR (spe_shlqbi, 0x1db)
-EMIT_RI7 (spe_shlqbii, 0x1fb)
-EMIT_RR (spe_shlqby, 0x1df)
-EMIT_RI7 (spe_shlqbyi, 0x1ff)
-EMIT_RR (spe_shlqbybi, 0x1cf)
-EMIT_RR (spe_roth, 0x05c)
-EMIT_RI7 (spe_rothi, 0x07c)
-EMIT_RR (spe_rot, 0x058)
-EMIT_RI7 (spe_roti, 0x078)
-EMIT_RR (spe_rotqby, 0x1dc)
-EMIT_RI7 (spe_rotqbyi, 0x1fc)
-EMIT_RR (spe_rotqbybi, 0x1cc)
-EMIT_RR (spe_rotqbi, 0x1d8)
-EMIT_RI7 (spe_rotqbii, 0x1f8)
-EMIT_RR (spe_rothm, 0x05d)
-EMIT_RI7 (spe_rothmi, 0x07d)
-EMIT_RR (spe_rotm, 0x059)
-EMIT_RI7 (spe_rotmi, 0x079)
-EMIT_RR (spe_rotqmby, 0x1dd)
-EMIT_RI7 (spe_rotqmbyi, 0x1fd)
-EMIT_RR (spe_rotqmbybi, 0x1cd)
-EMIT_RR (spe_rotqmbi, 0x1c9)
-EMIT_RI7 (spe_rotqmbii, 0x1f9)
-EMIT_RR (spe_rotmah, 0x05e)
-EMIT_RI7 (spe_rotmahi, 0x07e)
-EMIT_RR (spe_rotma, 0x05a)
-EMIT_RI7 (spe_rotmai, 0x07a)
-
-
-/* Compare, branch, and halt instructions
- */
-EMIT_RR (spe_heq, 0x3d8)
-EMIT_RI10(spe_heqi, 0x07f)
-EMIT_RR (spe_hgt, 0x258)
-EMIT_RI10(spe_hgti, 0x04f)
-EMIT_RR (spe_hlgt, 0x2d8)
-EMIT_RI10(spe_hlgti, 0x05f)
-EMIT_RR (spe_ceqb, 0x3d0)
-EMIT_RI10(spe_ceqbi, 0x07e)
-EMIT_RR (spe_ceqh, 0x3c8)
-EMIT_RI10(spe_ceqhi, 0x07d)
-EMIT_RR (spe_ceq, 0x3c0)
-EMIT_RI10(spe_ceqi, 0x07c)
-EMIT_RR (spe_cgtb, 0x250)
-EMIT_RI10(spe_cgtbi, 0x04e)
-EMIT_RR (spe_cgth, 0x248)
-EMIT_RI10(spe_cgthi, 0x04d)
-EMIT_RR (spe_cgt, 0x240)
-EMIT_RI10(spe_cgti, 0x04c)
-EMIT_RR (spe_clgtb, 0x2d0)
-EMIT_RI10(spe_clgtbi, 0x05e)
-EMIT_RR (spe_clgth, 0x2c8)
-EMIT_RI10(spe_clgthi, 0x05d)
-EMIT_RR (spe_clgt, 0x2c0)
-EMIT_RI10(spe_clgti, 0x05c)
-EMIT_I16 (spe_br, 0x064)
-EMIT_I16 (spe_bra, 0x060)
-EMIT_RI16(spe_brsl, 0x066)
-EMIT_RI16(spe_brasl, 0x062)
-EMIT_RI16(spe_brnz, 0x042)
-EMIT_RI16(spe_brz, 0x040)
-EMIT_RI16(spe_brhnz, 0x046)
-EMIT_RI16(spe_brhz, 0x044)
-
-/* Control instructions
- */
-EMIT (spe_lnop, 0x001)
-
-extern void
-spe_lqd(struct spe_function *p, int rT, int rA, int offset);
-
-extern void
-spe_stqd(struct spe_function *p, int rT, int rA, int offset);
-
-extern void spe_bi(struct spe_function *p, int rA, int d, int e);
-extern void spe_iret(struct spe_function *p, int rA, int d, int e);
-extern void spe_bisled(struct spe_function *p, int rT, int rA,
- int d, int e);
-extern void spe_bisl(struct spe_function *p, int rT, int rA,
- int d, int e);
-extern void spe_biz(struct spe_function *p, int rT, int rA,
- int d, int e);
-extern void spe_binz(struct spe_function *p, int rT, int rA,
- int d, int e);
-extern void spe_bihz(struct spe_function *p, int rT, int rA,
- int d, int e);
-extern void spe_bihnz(struct spe_function *p, int rT, int rA,
- int d, int e);
-
-
-/** Load/splat immediate float into rT. */
-extern void
-spe_load_float(struct spe_function *p, int rT, float x);
-
-/** Load/splat immediate int into rT. */
-extern void
-spe_load_int(struct spe_function *p, int rT, int i);
-
-/** Load/splat immediate unsigned int into rT. */
-extern void
-spe_load_uint(struct spe_function *p, int rT, uint ui);
-
-/** And immediate value into rT. */
-extern void
-spe_and_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Xor immediate value into rT. */
-extern void
-spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Compare equal with immediate value. */
-extern void
-spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Compare greater with immediate value. */
-extern void
-spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui);
-
-/** Replicate word 0 of rA across rT. */
-extern void
-spe_splat(struct spe_function *p, int rT, int rA);
-
-/** rT = complement_all_bits(rA). */
-extern void
-spe_complement(struct spe_function *p, int rT, int rA);
-
-/** rT = rA. */
-extern void
-spe_move(struct spe_function *p, int rT, int rA);
-
-/** rT = {0,0,0,0}. */
-extern void
-spe_zero(struct spe_function *p, int rT);
-
-/** rT = splat(rA, word) */
-extern void
-spe_splat_word(struct spe_function *p, int rT, int rA, int word);
-
-/** rT = float min(rA, rB) */
-extern void
-spe_float_min(struct spe_function *p, int rT, int rA, int rB);
-
-/** rT = float max(rA, rB) */
-extern void
-spe_float_max(struct spe_function *p, int rT, int rA, int rB);
-
-
-/* Floating-point instructions
- */
-EMIT_RR (spe_fa, 0x2c4)
-EMIT_RR (spe_dfa, 0x2cc)
-EMIT_RR (spe_fs, 0x2c5)
-EMIT_RR (spe_dfs, 0x2cd)
-EMIT_RR (spe_fm, 0x2c6)
-EMIT_RR (spe_dfm, 0x2ce)
-EMIT_RRR (spe_fma, 0x00e)
-EMIT_RR (spe_dfma, 0x35c)
-EMIT_RRR (spe_fnms, 0x00d)
-EMIT_RR (spe_dfnms, 0x35e)
-EMIT_RRR (spe_fms, 0x00f)
-EMIT_RR (spe_dfms, 0x35d)
-EMIT_RR (spe_dfnma, 0x35f)
-EMIT_R (spe_frest, 0x1b8)
-EMIT_R (spe_frsqest, 0x1b9)
-EMIT_RR (spe_fi, 0x3d4)
-EMIT_RI8 (spe_csflt, 0x1da, 155)
-EMIT_RI8 (spe_cflts, 0x1d8, 173)
-EMIT_RI8 (spe_cuflt, 0x1db, 155)
-EMIT_RI8 (spe_cfltu, 0x1d9, 173)
-EMIT_R (spe_frds, 0x3b9)
-EMIT_R (spe_fesd, 0x3b8)
-EMIT_RR (spe_dfceq, 0x3c3)
-EMIT_RR (spe_dfcmeq, 0x3cb)
-EMIT_RR (spe_dfcgt, 0x2c3)
-EMIT_RR (spe_dfcmgt, 0x2cb)
-EMIT_RI7 (spe_dftsv, 0x3bf)
-EMIT_RR (spe_fceq, 0x3c2)
-EMIT_RR (spe_fcmeq, 0x3ca)
-EMIT_RR (spe_fcgt, 0x2c2)
-EMIT_RR (spe_fcmgt, 0x2ca)
-EMIT_R (spe_fscrwr, 0x3ba)
-EMIT_ (spe_fscrrd, 0x398)
-
-
-/* Channel instructions
- */
-EMIT_R (spe_rdch, 0x00d)
-EMIT_R (spe_rdchcnt, 0x00f)
-EMIT_R (spe_wrch, 0x10d)
-
-
-#ifdef UNDEF_EMIT_MACROS
-#undef EMIT
-#undef EMIT_
-#undef EMIT_R
-#undef EMIT_RR
-#undef EMIT_RRR
-#undef EMIT_RI7
-#undef EMIT_RI8
-#undef EMIT_RI10
-#undef EMIT_RI10s
-#undef EMIT_RI16
-#undef EMIT_RI18
-#undef EMIT_I16
-#undef UNDEF_EMIT_MACROS
-#endif /* EMIT_ */
/* Helper function to choose and instantiate one of the software rasterizers:
- * cell, llvmpipe, softpipe.
+ * llvmpipe, softpipe.
*/
#ifdef GALLIUM_SOFTPIPE
#include "llvmpipe/lp_public.h"
#endif
-#ifdef GALLIUM_CELL
-#include "cell/ppu/cell_public.h"
-#endif
-
static INLINE struct pipe_screen *
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
{
struct pipe_screen *screen = NULL;
-#if defined(GALLIUM_CELL)
- if (screen == NULL && strcmp(driver, "cell") == 0)
- screen = cell_create_screen(winsys);
-#endif
-
#if defined(GALLIUM_LLVMPIPE)
if (screen == NULL && strcmp(driver, "llvmpipe") == 0)
screen = llvmpipe_create_screen(winsys);
const char *default_driver;
const char *driver;
-#if defined(GALLIUM_CELL)
- default_driver = "cell";
-#elif defined(GALLIUM_LLVMPIPE)
+#if defined(GALLIUM_LLVMPIPE)
default_driver = "llvmpipe";
#elif defined(GALLIUM_SOFTPIPE)
default_driver = "softpipe";
+++ /dev/null
-# Cell Gallium driver Makefile
-
-
-default:
- ( cd spu ; make )
- ( cd ppu ; make )
-
-
-
-clean:
- ( cd spu ; make clean )
- ( cd ppu ; make clean )
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Types and tokens which are common to the SPU and PPU code.
- */
-
-
-#ifndef CELL_COMMON_H
-#define CELL_COMMON_H
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_format.h"
-#include "pipe/p_state.h"
-#include <stdio.h>
-
-/** The standard assert macro doesn't seem to work reliably */
-#define ASSERT(x) \
- if (!(x)) { \
- ubyte *p = NULL; \
- fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \
- __FILE__, __LINE__, __FUNCTION__, #x); \
- *p = 0; \
- exit(1); \
- }
-
-
-#define JOIN(x, y) JOIN_AGAIN(x, y)
-#define JOIN_AGAIN(x, y) x ## y
-
-#define STATIC_ASSERT(e) \
-{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];}
-
-
-
-/** for sanity checking */
-#define ASSERT_ALIGN16(ptr) \
- ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
-
-
-/** round up value to next multiple of 4 */
-#define ROUNDUP4(k) (((k) + 0x3) & ~0x3)
-
-/** round up value to next multiple of 8 */
-#define ROUNDUP8(k) (((k) + 0x7) & ~0x7)
-
-/** round up value to next multiple of 16 */
-#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
-
-
-#define CELL_MAX_SPUS 8
-
-#define CELL_MAX_SAMPLERS 4
-#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */
-#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */
-#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */
-#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */
-
-#define TILE_SIZE 32
-
-
-/**
- * The low byte of a mailbox word contains the command opcode.
- * Remaining higher bytes are command specific.
- */
-#define CELL_CMD_OPCODE_MASK 0xff
-
-#define CELL_CMD_EXIT 1
-#define CELL_CMD_CLEAR_SURFACE 2
-#define CELL_CMD_FINISH 3
-#define CELL_CMD_RENDER 4
-#define CELL_CMD_BATCH 5
-#define CELL_CMD_RELEASE_VERTS 6
-#define CELL_CMD_STATE_FRAMEBUFFER 10
-#define CELL_CMD_STATE_FRAGMENT_OPS 11
-#define CELL_CMD_STATE_SAMPLER 12
-#define CELL_CMD_STATE_TEXTURE 13
-#define CELL_CMD_STATE_VERTEX_INFO 14
-#define CELL_CMD_STATE_VIEWPORT 15
-#define CELL_CMD_STATE_UNIFORMS 16
-#define CELL_CMD_STATE_VS_ARRAY_INFO 17
-#define CELL_CMD_STATE_BIND_VS 18
-#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19
-#define CELL_CMD_STATE_ATTRIB_FETCH 20
-#define CELL_CMD_STATE_FS_CONSTANTS 21
-#define CELL_CMD_STATE_RASTERIZER 22
-#define CELL_CMD_VS_EXECUTE 23
-#define CELL_CMD_FLUSH_BUFFER_RANGE 24
-#define CELL_CMD_FENCE 25
-
-
-/** Command/batch buffers */
-#define CELL_NUM_BUFFERS 4
-#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
-
-#define CELL_BUFFER_STATUS_FREE 10
-#define CELL_BUFFER_STATUS_USED 20
-
-/** Debug flags */
-#define CELL_DEBUG_CHECKER (1 << 0)
-#define CELL_DEBUG_ASM (1 << 1)
-#define CELL_DEBUG_SYNC (1 << 2)
-#define CELL_DEBUG_FRAGMENT_OPS (1 << 3)
-#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4)
-#define CELL_DEBUG_CMD (1 << 5)
-#define CELL_DEBUG_CACHE (1 << 6)
-
-#define CELL_FENCE_IDLE 0
-#define CELL_FENCE_EMITTED 1
-#define CELL_FENCE_SIGNALLED 2
-
-#define CELL_FACING_FRONT 0
-#define CELL_FACING_BACK 1
-
-struct cell_fence
-{
- /** There's a 16-byte status qword per SPU */
- volatile uint status[CELL_MAX_SPUS][4];
-};
-
-#ifdef __SPU__
-typedef vector unsigned int opcode_t;
-#else
-typedef unsigned int opcode_t[4];
-#endif
-
-/**
- * Fence command sent to SPUs. In response, the SPUs will write
- * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory.
- */
-struct cell_command_fence
-{
- opcode_t opcode; /**< CELL_CMD_FENCE */
- struct cell_fence *fence;
- uint32_t pad_[3];
-};
-
-
-/**
- * Command to specify per-fragment operations state and generated code.
- * Note that this is a variant-length structure, allocated with as
- * much memory as needed to hold the generated code; the "code"
- * field *must* be the last field in the structure. Also, the entire
- * length of the structure (including the variant code field) must be
- * a multiple of 8 bytes; we require that this structure itself be
- * a multiple of 8 bytes, and that the generated code also be a multiple
- * of 8 bytes.
- *
- * Also note that the dsa, blend, blend_color fields are really only needed
- * for the fallback/C per-pixel code. They're not used when we generate
- * dynamic SPU fragment code (which is the normal case), and will eventually
- * be removed from this structure.
- */
-struct cell_command_fragment_ops
-{
- opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */
-
- /* Fields for the fallback case */
- struct pipe_depth_stencil_alpha_state dsa;
- struct pipe_blend_state blend;
- struct pipe_blend_color blend_color;
-
- /* Fields for the generated SPU code */
- unsigned total_code_size;
- unsigned front_code_index;
- unsigned back_code_index;
- /* this field has variant length, and must be the last field in
- * the structure
- */
- unsigned code[0];
-};
-
-
-/** Max instructions for fragment programs */
-#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512
-
-/**
- * Command to send a fragment program to SPUs.
- */
-struct cell_command_fragment_program
-{
- opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */
- uint num_inst; /**< Number of instructions */
- uint32_t pad[3];
- unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
-};
-
-
-/**
- * Tell SPUs about the framebuffer size, location
- */
-struct cell_command_framebuffer
-{
- opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */
- int width, height;
- void *color_start, *depth_start;
- enum pipe_format color_format, depth_format;
- uint32_t pad_[2];
-};
-
-
-/**
- * Tell SPUs about rasterizer state.
- */
-struct cell_command_rasterizer
-{
- opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */
- struct pipe_rasterizer_state rasterizer;
- /*uint32_t pad[1];*/
-};
-
-
-/**
- * Clear framebuffer to the given value/color.
- */
-struct cell_command_clear_surface
-{
- opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
- uint surface; /**< Temporary: 0=color, 1=Z */
- uint value;
- uint32_t pad[2];
-};
-
-
-/**
- * Array info used by the vertex shader's vertex puller.
- */
-struct cell_array_info
-{
- uint64_t base; /**< Base address of the 0th element. */
- uint attr; /**< Attribute that this state is for. */
- uint pitch; /**< Byte pitch from one entry to the next. */
- uint size;
- uint function_offset;
-};
-
-
-struct cell_attribute_fetch_code
-{
- uint64_t base;
- uint size;
-};
-
-
-struct cell_buffer_range
-{
- uint64_t base;
- unsigned size;
-};
-
-
-struct cell_shader_info
-{
- uint64_t declarations;
- uint64_t instructions;
- uint64_t immediates;
-
- unsigned num_outputs;
- unsigned num_declarations;
- unsigned num_instructions;
- unsigned num_immediates;
-};
-
-
-#define SPU_VERTS_PER_BATCH 64
-struct cell_command_vs
-{
- opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */
- uint64_t vOut[SPU_VERTS_PER_BATCH];
- unsigned num_elts;
- unsigned elts[SPU_VERTS_PER_BATCH];
- float plane[12][4];
- unsigned nr_planes;
- unsigned nr_attrs;
-};
-
-
-struct cell_command_render
-{
- opcode_t opcode; /**< CELL_CMD_RENDER */
- uint prim_type; /**< PIPE_PRIM_x */
- uint num_verts;
- uint vertex_size; /**< bytes per vertex */
- uint num_indexes;
- uint vertex_buf; /**< which cell->buffer[] contains the vertex data */
- float xmin, ymin, xmax, ymax; /* XXX another dummy field */
- uint min_index;
- boolean inline_verts;
- uint32_t pad_[1];
-};
-
-
-struct cell_command_release_verts
-{
- opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */
- uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
- uint32_t pad_[3];
-};
-
-
-struct cell_command_sampler
-{
- opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */
- uint unit;
- struct pipe_sampler_state state;
- uint32_t pad_[3];
-};
-
-
-struct cell_command_texture
-{
- opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */
- uint target; /**< PIPE_TEXTURE_x */
- uint unit;
- void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */
- ushort width[CELL_MAX_TEXTURE_LEVELS];
- ushort height[CELL_MAX_TEXTURE_LEVELS];
- ushort depth[CELL_MAX_TEXTURE_LEVELS];
-};
-
-
-#define MAX_SPU_FUNCTIONS 12
-/**
- * Used to tell the PPU about the address of particular functions in the
- * SPU's address space.
- */
-struct cell_spu_function_info
-{
- uint num;
- char names[MAX_SPU_FUNCTIONS][16];
- uint addrs[MAX_SPU_FUNCTIONS];
- char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */
-};
-
-
-/** This is the object passed to spe_create_thread() */
-PIPE_ALIGN_TYPE(16,
-struct cell_init_info
-{
- unsigned id;
- unsigned num_spus;
- unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */
- float inv_timebase; /**< 1.0/timebase, for perf measurement */
-
- /** Buffers for command batches, vertex/index data */
- ubyte *buffers[CELL_NUM_BUFFERS];
- uint *buffer_status; /**< points at cell_context->buffer_status */
-
- struct cell_spu_function_info *spu_functions;
-});
-
-
-#endif /* CELL_COMMON_H */
+++ /dev/null
-# Gallium3D Cell driver: PPU code
-
-# This makefile builds the libcell.a library which gets pulled into
-# the main libGL.so library
-
-
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-
-# This is the "top-level" cell PPU driver code, will get pulled into libGL.so
-# by the winsys Makefile.
-CELL_LIB = ../libcell.a
-
-
-# This is the SPU code. We'd like to be able to put this into the libcell.a
-# archive with the PPU code, but nesting .a libs doesn't seem to work.
-# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile
-SPU_CODE_MODULE = ../spu/g3d_spu.a
-
-
-SOURCES = \
- cell_batch.c \
- cell_clear.c \
- cell_context.c \
- cell_draw_arrays.c \
- cell_fence.c \
- cell_flush.c \
- cell_gen_fragment.c \
- cell_gen_fp.c \
- cell_state_derived.c \
- cell_state_emit.c \
- cell_state_shader.c \
- cell_pipe_state.c \
- cell_screen.c \
- cell_state_vertex.c \
- cell_spu.c \
- cell_surface.c \
- cell_texture.c \
- cell_vbuf.c \
- cell_vertex_fetch.c \
- cell_vertex_shader.c
-
-
-OBJECTS = $(SOURCES:.c=.o) \
-
-INCLUDE_DIRS = \
- -I$(TOP)/src/mesa \
- -I$(TOP)/src/gallium/include \
- -I$(TOP)/src/gallium/auxiliary \
- -I$(TOP)/src/gallium/drivers
-
-.c.o:
- $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
-
-
-.c.s:
- $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
-
-
-default: $(CELL_LIB)
-
-
-$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE)
-# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work
- ar -ru $(CELL_LIB) $(OBJECTS)
-
-#$(PROG): $(PPU_OBJECTS)
-# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS)
-
-
-
-clean:
- rm -f *.o *~ $(CELL_LIB)
-
-
-
-depend: $(SOURCES)
- rm -f depend
- touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
-
-include depend
-
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_fence.h"
-#include "cell_spu.h"
-
-
-
-/**
- * Search the buffer pool for an empty/free buffer and return its index.
- * Buffers are used for storing vertex data, state and commands which
- * will be sent to the SPUs.
- * If no empty buffers are available, wait for one.
- * \return buffer index in [0, CELL_NUM_BUFFERS-1]
- */
-uint
-cell_get_empty_buffer(struct cell_context *cell)
-{
- static uint prev_buffer = 0;
- uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS;
- uint tries = 0;
-
- /* Find a buffer that's marked as free by all SPUs */
- while (1) {
- uint spu, num_free = 0;
-
- for (spu = 0; spu < cell->num_spus; spu++) {
- if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) {
- num_free++;
-
- if (num_free == cell->num_spus) {
- /* found a free buffer, now mark status as used */
- for (spu = 0; spu < cell->num_spus; spu++) {
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
- }
- /*
- printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries);
- */
- prev_buffer = buf;
-
- /* release tex buffer associated w/ prev use of this batch buf */
- cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]);
-
- return buf;
- }
- }
- else {
- break;
- }
- }
-
- /* try next buf */
- buf = (buf + 1) % CELL_NUM_BUFFERS;
-
- tries++;
- if (tries == 100) {
- /*
- printf("PPU WAITING for buffer...\n");
- */
- }
- }
-}
-
-
-/**
- * Append a fence command to the current batch buffer.
- * Note that we're sure there's always room for this because of the
- * adjusted size check in cell_batch_free_space().
- */
-static void
-emit_fence(struct cell_context *cell)
-{
- const uint batch = cell->cur_batch;
- const uint size = cell->buffer_size[batch];
- struct cell_command_fence *fence_cmd;
- struct cell_fence *fence = &cell->fenced_buffers[batch].fence;
- uint i;
-
- /* set fence status to emitted, not yet signalled */
- for (i = 0; i < cell->num_spus; i++) {
- fence->status[i][0] = CELL_FENCE_EMITTED;
- }
-
- STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0);
- ASSERT(size % 16 == 0);
- ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE);
-
- fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size);
- fence_cmd->opcode[0] = CELL_CMD_FENCE;
- fence_cmd->fence = fence;
-
- /* update batch buffer size */
- cell->buffer_size[batch] = size + sizeof(struct cell_command_fence);
-}
-
-
-/**
- * Flush the current batch buffer to the SPUs.
- * An empty buffer will be found and set as the new current batch buffer
- * for subsequent commands/data.
- */
-void
-cell_batch_flush(struct cell_context *cell)
-{
- static boolean flushing = FALSE;
- uint batch = cell->cur_batch;
- uint size = cell->buffer_size[batch];
- uint spu, cmd_word;
-
- assert(!flushing);
-
- if (size == 0)
- return;
-
- /* Before we use this batch buffer, make sure any fenced texture buffers
- * are released.
- */
- if (cell->fenced_buffers[batch].head) {
- emit_fence(cell);
- size = cell->buffer_size[batch];
- }
-
- flushing = TRUE;
-
- assert(batch < CELL_NUM_BUFFERS);
-
- /*
- printf("cell_batch_dispatch: buf %u at %p, size %u\n",
- batch, &cell->buffer[batch][0], size);
- */
-
- /*
- * Build "BATCH" command and send to all SPUs.
- */
- cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16);
-
- for (spu = 0; spu < cell->num_spus; spu++) {
- assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED);
- send_mbox_message(cell_global.spe_contexts[spu], cmd_word);
- }
-
- /* When the SPUs are done copying the buffer into their locals stores
- * they'll write a BUFFER_STATUS_FREE message into the buffer_status[]
- * array indicating that the PPU can re-use the buffer.
- */
-
- batch = cell_get_empty_buffer(cell);
-
- cell->buffer_size[batch] = 0; /* empty */
- cell->cur_batch = batch;
-
- flushing = FALSE;
-}
-
-
-/**
- * Return the number of bytes free in the current batch buffer.
- */
-uint
-cell_batch_free_space(const struct cell_context *cell)
-{
- uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
- free -= sizeof(struct cell_command_fence);
- return free;
-}
-
-
-/**
- * Allocate space in the current batch buffer for 'bytes' space.
- * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned.
- * \return address in batch buffer to put data
- */
-void *
-cell_batch_alloc16(struct cell_context *cell, uint bytes)
-{
- void *pos;
- uint size;
-
- ASSERT(bytes % 16 == 0);
- ASSERT(bytes <= CELL_BUFFER_SIZE);
- ASSERT(cell->cur_batch >= 0);
-
-#ifdef ASSERT
- {
- uint spu;
- for (spu = 0; spu < cell->num_spus; spu++) {
- ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
- == CELL_BUFFER_STATUS_USED);
- }
- }
-#endif
-
- size = cell->buffer_size[cell->cur_batch];
-
- if (bytes > cell_batch_free_space(cell)) {
- cell_batch_flush(cell);
- size = 0;
- }
-
- ASSERT(size % 16 == 0);
- ASSERT(size + bytes <= CELL_BUFFER_SIZE);
-
- pos = (void *) (cell->buffer[cell->cur_batch] + size);
-
- cell->buffer_size[cell->cur_batch] = size + bytes;
-
- return pos;
-}
-
-
-/**
- * One-time init of batch buffers.
- */
-void
-cell_init_batch_buffers(struct cell_context *cell)
-{
- uint spu, buf;
-
- /* init command, vertex/index buffer info */
- for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
- cell->buffer_size[buf] = 0;
-
- /* init batch buffer status values,
- * mark 0th buffer as used, rest as free.
- */
- for (spu = 0; spu < cell->num_spus; spu++) {
- if (buf == 0)
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
- else
- cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE;
- }
- }
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_BATCH_H
-#define CELL_BATCH_H
-
-#include "pipe/p_compiler.h"
-
-
-struct cell_context;
-
-
-extern uint
-cell_get_empty_buffer(struct cell_context *cell);
-
-extern void
-cell_batch_flush(struct cell_context *cell);
-
-extern uint
-cell_batch_free_space(const struct cell_context *cell);
-
-extern void *
-cell_batch_alloc16(struct cell_context *cell, uint bytes);
-
-extern void
-cell_init_batch_buffers(struct cell_context *cell);
-
-
-#endif /* CELL_BATCH_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Authors
- * Brian Paul
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <stdint.h>
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_pack_color.h"
-#include "cell/common.h"
-#include "cell_clear.h"
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_state.h"
-
-
-/**
- * Called via pipe->clear()
- */
-void
-cell_clear(struct pipe_context *pipe, unsigned buffers,
- const pipe_color_union *color,
- double depth, unsigned stencil)
-{
- struct cell_context *cell = cell_context(pipe);
-
- if (cell->dirty)
- cell_update_derived(cell);
-
- if (buffers & PIPE_CLEAR_COLOR) {
- uint surfIndex = 0;
- union util_color uc;
-
- util_pack_color(color->f, cell->framebuffer.cbufs[0]->format, &uc);
-
- /* Build a CLEAR command and place it in the current batch buffer */
- STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
- struct cell_command_clear_surface *clr
- = (struct cell_command_clear_surface *)
- cell_batch_alloc16(cell, sizeof(*clr));
- clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
- clr->surface = surfIndex;
- clr->value = uc.ui;
- }
-
- if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
- uint surfIndex = 1;
- uint clearValue;
-
- clearValue = util_pack_z_stencil(cell->framebuffer.zsbuf->format,
- depth, stencil);
-
- /* Build a CLEAR command and place it in the current batch buffer */
- STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
- struct cell_command_clear_surface *clr
- = (struct cell_command_clear_surface *)
- cell_batch_alloc16(cell, sizeof(*clr));
- clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
- clr->surface = surfIndex;
- clr->value = clearValue;
- }
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_CLEAR_H
-#define CELL_CLEAR_H
-
-
-struct pipe_context;
-
-
-extern void
-cell_clear(struct pipe_context *pipe, unsigned buffers,
- const union pipe_color_union *color,
- double depth, unsigned stencil);
-
-
-#endif /* CELL_CLEAR_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Authors
- * Brian Paul
- */
-
-
-#include <stdio.h>
-
-#include "pipe/p_defines.h"
-#include "pipe/p_format.h"
-#include "util/u_memory.h"
-#include "pipe/p_screen.h"
-#include "util/u_inlines.h"
-
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-
-#include "cell/common.h"
-#include "cell_batch.h"
-#include "cell_clear.h"
-#include "cell_context.h"
-#include "cell_draw_arrays.h"
-#include "cell_fence.h"
-#include "cell_flush.h"
-#include "cell_state.h"
-#include "cell_surface.h"
-#include "cell_spu.h"
-#include "cell_pipe_state.h"
-#include "cell_texture.h"
-#include "cell_vbuf.h"
-
-
-
-static void
-cell_destroy_context( struct pipe_context *pipe )
-{
- struct cell_context *cell = cell_context(pipe);
- unsigned i;
-
- for (i = 0; i < cell->num_vertex_buffers; i++) {
- pipe_resource_reference(&cell->vertex_buffer[i].buffer, NULL);
- }
-
- util_delete_keymap(cell->fragment_ops_cache, NULL);
-
- cell_spu_exit(cell);
-
- align_free(cell);
-}
-
-
-static struct draw_context *
-cell_draw_create(struct cell_context *cell)
-{
- struct draw_context *draw = draw_create(&cell->pipe);
-
-#if 0 /* broken */
- if (getenv("GALLIUM_CELL_VS")) {
- /* plug in SPU-based vertex transformation code */
- draw->shader_queue_flush = cell_vertex_shader_queue_flush;
- draw->driver_private = cell;
- }
-#endif
-
- return draw;
-}
-
-
-static const struct debug_named_value cell_debug_flags[] = {
- {"checker", CELL_DEBUG_CHECKER, NULL},/**< modulate tile clear color by SPU ID */
- {"asm", CELL_DEBUG_ASM, NULL}, /**< dump SPU asm code */
- {"sync", CELL_DEBUG_SYNC, NULL}, /**< SPUs do synchronous DMA */
- {"fragops", CELL_DEBUG_FRAGMENT_OPS, NULL}, /**< SPUs emit fragment ops debug messages*/
- {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK, NULL}, /**< SPUs use reference implementation for fragment ops*/
- {"cmd", CELL_DEBUG_CMD, NULL}, /**< SPUs dump command buffer info */
- {"cache", CELL_DEBUG_CACHE, NULL}, /**< report texture cache stats on exit */
- DEBUG_NAMED_VALUE_END
-};
-
-
-struct pipe_context *
-cell_create_context(struct pipe_screen *screen,
- void *priv )
-{
- struct cell_context *cell;
- uint i;
-
- /* some fields need to be 16-byte aligned, so align the whole object */
- cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16);
- if (!cell)
- return NULL;
-
- memset(cell, 0, sizeof(*cell));
-
- cell->winsys = NULL; /* XXX: fixme - get this from screen? */
- cell->pipe.winsys = NULL;
- cell->pipe.screen = screen;
- cell->pipe.priv = priv;
- cell->pipe.destroy = cell_destroy_context;
-
- cell->pipe.clear = cell_clear;
- cell->pipe.flush = cell_flush;
-
-#if 0
- cell->pipe.begin_query = cell_begin_query;
- cell->pipe.end_query = cell_end_query;
- cell->pipe.wait_query = cell_wait_query;
-#endif
-
- cell_init_draw_functions(cell);
- cell_init_state_functions(cell);
- cell_init_shader_functions(cell);
- cell_init_surface_functions(cell);
- cell_init_vertex_functions(cell);
- cell_init_texture_transfer_funcs(cell);
-
- cell->draw = cell_draw_create(cell);
-
- /* Create cache of fragment ops generated code */
- cell->fragment_ops_cache =
- util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL);
-
- cell_init_vbuf(cell);
-
- draw_set_rasterize_stage(cell->draw, cell->vbuf);
-
- /* convert all points/lines to tris for the time being */
- draw_wide_point_threshold(cell->draw, 0.0);
- draw_wide_line_threshold(cell->draw, 0.0);
-
- /* get env vars or read config file to get debug flags */
- cell->debug_flags = debug_get_flags_option("CELL_DEBUG",
- cell_debug_flags,
- 0 );
-
- for (i = 0; i < CELL_NUM_BUFFERS; i++)
- cell_fence_init(&cell->fenced_buffers[i].fence);
-
-
- /*
- * SPU stuff
- */
- /* This call only works with SDK 3.0. Anyone still using 2.1??? */
- cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1);
- cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1);
- if (cell->debug_flags) {
- printf("Cell: found %d Cell(s) with %u SPUs\n",
- cell->num_cells, cell->num_spus);
- }
- if (getenv("CELL_NUM_SPUS")) {
- cell->num_spus = atoi(getenv("CELL_NUM_SPUS"));
- assert(cell->num_spus > 0);
- }
-
- cell_start_spus(cell);
-
- cell_init_batch_buffers(cell);
-
- /* make sure SPU initializations are done before proceeding */
- cell_flush_int(cell, CELL_FLUSH_WAIT);
-
- return &cell->pipe;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_CONTEXT_H
-#define CELL_CONTEXT_H
-
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "draw/draw_vertex.h"
-#include "draw/draw_vbuf.h"
-/*#include "cell_winsys.h"*/
-#include "cell/common.h"
-#include "rtasm/rtasm_ppc_spe.h"
-#include "tgsi/tgsi_scan.h"
-#include "util/u_keymap.h"
-
-
-struct cell_vbuf_render;
-
-
-/**
- * Cell vertex shader state, subclass of pipe_shader_state.
- */
-struct cell_vertex_shader_state
-{
- struct pipe_shader_state shader;
- struct tgsi_shader_info info;
- void *draw_data;
-};
-
-
-/**
- * Cell fragment shader state, subclass of pipe_shader_state.
- */
-struct cell_fragment_shader_state
-{
- struct pipe_shader_state shader;
- struct tgsi_shader_info info;
- struct spe_function code;
- void *data;
-};
-
-
-/**
- * Key for mapping per-fragment state to cached SPU machine code.
- * keymap(cell_fragment_ops_key) => cell_command_fragment_ops
- */
-struct cell_fragment_ops_key
-{
- struct pipe_blend_state blend;
- struct pipe_blend_color blend_color;
- struct pipe_depth_stencil_alpha_state dsa;
- enum pipe_format color_format;
- enum pipe_format zs_format;
-};
-
-
-struct cell_buffer_node;
-
-/**
- * Fenced buffer list. List of buffers which can be unreferenced after
- * the fence has been executed/signalled.
- */
-struct cell_buffer_list
-{
- PIPE_ALIGN_VAR(16) struct cell_fence fence;
- struct cell_buffer_node *head;
-};
-
-struct cell_velems_state
-{
- unsigned count;
- struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
-};
-
-/**
- * Per-context state, subclass of pipe_context.
- */
-struct cell_context
-{
- struct pipe_context pipe;
-
- struct cell_winsys *winsys;
-
- const struct pipe_blend_state *blend;
- const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
- uint num_samplers;
- const struct pipe_depth_stencil_alpha_state *depth_stencil;
- const struct pipe_rasterizer_state *rasterizer;
- const struct cell_vertex_shader_state *vs;
- const struct cell_fragment_shader_state *fs;
- const struct cell_velems_state *velems;
-
- struct spe_function logic_op;
-
- struct pipe_blend_color blend_color;
- struct pipe_stencil_ref stencil_ref;
- struct pipe_clip_state clip;
- struct pipe_resource *constants[2];
- struct pipe_framebuffer_state framebuffer;
- struct pipe_poly_stipple poly_stipple;
- struct pipe_scissor_state scissor;
- struct cell_resource *texture[PIPE_MAX_SAMPLERS];
- struct pipe_sampler_view *fragment_sampler_views[PIPE_MAX_SAMPLERS];
- uint num_textures;
- struct pipe_viewport_state viewport;
- struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
- uint num_vertex_buffers;
- struct pipe_index_buffer index_buffer;
-
- ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
- ubyte *zsbuf_map;
-
- uint dirty;
- uint dirty_textures; /* bitmask of texture units */
- uint dirty_samplers; /* bitmask of sampler units */
-
- /** Cache of code generated for per-fragment ops */
- struct keymap *fragment_ops_cache;
-
- /** The primitive drawing context */
- struct draw_context *draw;
- struct draw_stage *render_stage;
-
- /** For post-transformed vertex buffering: */
- struct cell_vbuf_render *vbuf_render;
- struct draw_stage *vbuf;
-
- struct vertex_info vertex_info;
-
- /** Mapped constant buffers */
- const void *mapped_constants[PIPE_SHADER_TYPES];
-
- PIPE_ALIGN_VAR(16) struct cell_spu_function_info spu_functions;
-
- uint num_cells, num_spus;
-
- /** Buffers for command batches, vertex/index data */
- uint buffer_size[CELL_NUM_BUFFERS];
- PIPE_ALIGN_VAR(16) ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE];
-
- int cur_batch; /**< which buffer is being filled w/ commands */
-
- /** [4] to ensure 16-byte alignment for each status word */
- PIPE_ALIGN_VAR(16) uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4];
-
-
- /** Associated with each command/batch buffer is a list of pipe_buffers
- * that are fenced. When the last command in a buffer is executed, the
- * fence will be signalled, indicating that any pipe_buffers preceeding
- * that fence can be unreferenced (and probably freed).
- */
- struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS];
-
-
- struct spe_function attrib_fetch;
- unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS];
-
- unsigned debug_flags;
-};
-
-
-
-
-static INLINE struct cell_context *
-cell_context(struct pipe_context *pipe)
-{
- return (struct cell_context *) pipe;
-}
-
-
-struct pipe_context *
-cell_create_context(struct pipe_screen *screen,
- void *priv );
-
-extern void
-cell_vertex_shader_queue_flush(struct draw_context *draw);
-
-
-/* XXX find a better home for this */
-extern void cell_update_vertex_fetch(struct draw_context *draw);
-
-
-#endif /* CELL_CONTEXT_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Author:
- * Brian Paul
- * Keith Whitwell
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "util/u_inlines.h"
-
-#include "cell_context.h"
-#include "cell_draw_arrays.h"
-#include "cell_state.h"
-#include "cell_flush.h"
-#include "cell_texture.h"
-
-#include "draw/draw_context.h"
-
-
-
-
-
-
-/**
- * Draw vertex arrays, with optional indexing.
- * Basically, map the vertex buffers (and drawing surfaces), then hand off
- * the drawing to the 'draw' module.
- *
- * XXX should the element buffer be specified/bound with a separate function?
- */
-static void
-cell_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
-{
- struct cell_context *cell = cell_context(pipe);
- struct draw_context *draw = cell->draw;
- void *mapped_indices = NULL;
- unsigned i;
-
- if (cell->dirty)
- cell_update_derived( cell );
-
-#if 0
- cell_map_surfaces(cell);
-#endif
-
- /*
- * Map vertex buffers
- */
- for (i = 0; i < cell->num_vertex_buffers; i++) {
- void *buf = cell_resource(cell->vertex_buffer[i].buffer)->data;
- draw_set_mapped_vertex_buffer(draw, i, buf);
- }
- /* Map index buffer, if present */
- if (info->indexed && cell->index_buffer.buffer)
- mapped_indices = cell_resource(cell->index_buffer.buffer)->data;
-
- draw_set_mapped_index_buffer(draw, mapped_indices);
-
- /* draw! */
- draw_vbo(draw, info);
-
- /*
- * unmap vertex/index buffers - will cause draw module to flush
- */
- for (i = 0; i < cell->num_vertex_buffers; i++) {
- draw_set_mapped_vertex_buffer(draw, i, NULL);
- }
- if (mapped_indices) {
- draw_set_mapped_index_buffer(draw, NULL);
- }
-
- /*
- * TODO: Flush only when a user vertex/index buffer is present
- * (or even better, modify draw module to do this
- * internally when this condition is seen?)
- */
- draw_flush(draw);
-}
-
-
-void
-cell_init_draw_functions(struct cell_context *cell)
-{
- cell->pipe.draw_vbo = cell_draw_vbo;
-}
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef CELL_DRAW_ARRAYS_H
-#define CELL_DRAW_ARRAYS_H
-
-
-extern void
-cell_init_draw_functions(struct cell_context *cell);
-
-
-#endif /* CELL_DRAW_ARRAYS_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <unistd.h>
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_fence.h"
-#include "cell_texture.h"
-
-
-void
-cell_fence_init(struct cell_fence *fence)
-{
- uint i;
- ASSERT_ALIGN16(fence->status);
- for (i = 0; i < CELL_MAX_SPUS; i++) {
- fence->status[i][0] = CELL_FENCE_IDLE;
- }
-}
-
-
-boolean
-cell_fence_signalled(const struct cell_context *cell,
- const struct cell_fence *fence)
-{
- uint i;
- for (i = 0; i < cell->num_spus; i++) {
- if (fence->status[i][0] != CELL_FENCE_SIGNALLED)
- return FALSE;
- /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/
- }
- return TRUE;
-}
-
-
-boolean
-cell_fence_finish(const struct cell_context *cell,
- const struct cell_fence *fence,
- uint64_t timeout)
-{
- while (!cell_fence_signalled(cell, fence)) {
- usleep(10);
- }
-
-#ifdef DEBUG
- {
- uint i;
- for (i = 0; i < cell->num_spus; i++) {
- assert(fence->status[i][0] == CELL_FENCE_SIGNALLED);
- }
- }
-#endif
- return TRUE;
-}
-
-
-
-
-struct cell_buffer_node
-{
- struct pipe_resource *buffer;
- struct cell_buffer_node *next;
-};
-
-
-#if 0
-static void
-cell_add_buffer_to_list(struct cell_context *cell,
- struct cell_buffer_list *list,
- struct pipe_resource *buffer)
-{
- struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node);
- /* create new list node which references the buffer, insert at head */
- if (node) {
- pipe_resource_reference(&node->buffer, buffer);
- node->next = list->head;
- list->head = node;
- }
-}
-#endif
-
-
-/**
- * Wait for completion of the given fence, then unreference any buffers
- * on the list.
- * This typically unrefs/frees texture buffers after any rendering which uses
- * them has completed.
- */
-void
-cell_free_fenced_buffers(struct cell_context *cell,
- struct cell_buffer_list *list)
-{
- if (list->head) {
- /*struct pipe_screen *ps = cell->pipe.screen;*/
- struct cell_buffer_node *node;
-
- cell_fence_finish(cell, &list->fence);
-
- /* traverse the list, unreferencing buffers, freeing nodes */
- node = list->head;
- while (node) {
- struct cell_buffer_node *next = node->next;
- assert(node->buffer);
- /* XXX need this? pipe_buffer_unmap(ps, node->buffer);*/
-#if 0
- printf("Unref buffer %p\n", node->buffer);
- if (node->buffer->reference.count == 1)
- printf(" Delete!\n");
-#endif
- pipe_resource_reference(&node->buffer, NULL);
- FREE(node);
- node = next;
- }
- list->head = NULL;
- }
-}
-
-
-/**
- * This should be called for each render command.
- * Any texture buffers that are current bound will be added to a fenced
- * list to be freed later when the fence is executed/signalled.
- */
-void
-cell_add_fenced_textures(struct cell_context *cell)
-{
- /*struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch];*/
- uint i;
-
- for (i = 0; i < cell->num_textures; i++) {
- struct cell_resource *ct = cell->texture[i];
- if (ct) {
-#if 0
- printf("Adding texture %p buffer %p to list\n",
- ct, ct->tiled_buffer[level]);
-#endif
-#if 00
- /* XXX this needs to be fixed/restored!
- * Maybe keep pointers to textures, not buffers.
- */
- if (ct->base.buffer)
- cell_add_buffer_to_list(cell, list, ct->buffer);
-#endif
- }
- }
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_FENCE_H
-#define CELL_FENCE_H
-
-
-extern void
-cell_fence_init(struct cell_fence *fence);
-
-
-extern boolean
-cell_fence_signalled(const struct cell_context *cell,
- const struct cell_fence *fence,
- unsigned flags);
-
-
-extern boolean
-cell_fence_finish(const struct cell_context *cell,
- const struct cell_fence *fence,
- unsigned flags,
- uint64_t timeout);
-
-
-
-extern void
-cell_free_fenced_buffers(struct cell_context *cell,
- struct cell_buffer_list *list);
-
-
-extern void
-cell_add_fenced_textures(struct cell_context *cell);
-
-
-#endif /* CELL_FENCE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_render.h"
-#include "draw/draw_context.h"
-
-
-/**
- * Called via pipe->flush()
- */
-void
-cell_flush(struct pipe_context *pipe,
- struct pipe_fence_handle **fence)
-{
- struct cell_context *cell = cell_context(pipe);
-
- if (fence) {
- *fence = NULL;
- }
-
- flags |= CELL_FLUSH_WAIT;
-
- draw_flush( cell->draw );
- cell_flush_int(cell, flags);
-}
-
-
-/**
- * Cell internal flush function. Send the current batch buffer to all SPUs.
- * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle.
- * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero
- */
-void
-cell_flush_int(struct cell_context *cell, unsigned flags)
-{
- static boolean flushing = FALSE; /* recursion catcher */
- uint i;
-
- ASSERT(!flushing);
- flushing = TRUE;
-
- if (flags & CELL_FLUSH_WAIT) {
- STATIC_ASSERT(sizeof(opcode_t) % 16 == 0);
- opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t));
- *cmd[0] = CELL_CMD_FINISH;
- }
-
- cell_batch_flush(cell);
-
-#if 0
- /* Send CMD_FINISH to all SPUs */
- for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH);
- }
-#endif
-
- if (flags & CELL_FLUSH_WAIT) {
- /* Wait for ack */
- for (i = 0; i < cell->num_spus; i++) {
- uint k = wait_mbox_message(cell_global.spe_contexts[i]);
- assert(k == CELL_CMD_FINISH);
- }
- }
-
- flushing = FALSE;
-}
-
-
-void
-cell_flush_buffer_range(struct cell_context *cell, void *ptr,
- unsigned size)
-{
- STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0);
- uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell,
- sizeof(opcode_t) + sizeof(struct cell_buffer_range));
- struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4];
- batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE;
- br->base = (uintptr_t) ptr;
- br->size = size;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_FLUSH
-#define CELL_FLUSH
-
-#define CELL_FLUSH_WAIT 0x80000000
-
-extern void
-cell_flush(struct pipe_context *pipe, unsigned flags,
- struct pipe_fence_handle **fence);
-
-extern void
-cell_flush_int(struct cell_context *cell, unsigned flags);
-
-extern void
-cell_flush_buffer_range(struct cell_context *cell, void *ptr,
- unsigned size);
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009 VMware, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-
-/**
- * Generate SPU fragment program/shader code.
- *
- * Note that we generate SOA-style code here. So each TGSI instruction
- * operates on four pixels (and is translated into four SPU instructions,
- * generally speaking).
- *
- * \author Brian Paul
- */
-
-#include <math.h>
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_exec.h"
-#include "tgsi/tgsi_dump.h"
-#include "rtasm/rtasm_ppc_spe.h"
-#include "util/u_memory.h"
-#include "cell_context.h"
-#include "cell_gen_fp.h"
-
-
-#define MAX_TEMPS 16
-#define MAX_IMMED 8
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-/**
- * Context needed during code generation.
- */
-struct codegen
-{
- struct cell_context *cell;
- int inputs_reg; /**< 1st function parameter */
- int outputs_reg; /**< 2nd function parameter */
- int constants_reg; /**< 3rd function parameter */
- int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
- int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
-
- int num_imm; /**< number of immediates */
-
- int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
-
- int addr_reg; /**< address register, integer values */
-
- /** Per-instruction temps / intermediate temps */
- int num_itemps;
- int itemps[12];
-
- /** Current IF/ELSE/ENDIF nesting level */
- int if_nesting;
- /** Current BGNLOOP/ENDLOOP nesting level */
- int loop_nesting;
- /** Location of start of current loop */
- int loop_start;
-
- /** Index of if/conditional mask register */
- int cond_mask_reg;
- /** Index of loop mask register */
- int loop_mask_reg;
-
- /** Index of master execution mask register */
- int exec_mask_reg;
-
- /** KIL mask: indicates which fragments have been killed */
- int kill_mask_reg;
-
- int frame_size; /**< Stack frame size, in words */
-
- struct spe_function *f;
- boolean error;
-};
-
-
-/**
- * Allocate an intermediate temporary register.
- */
-static int
-get_itemp(struct codegen *gen)
-{
- int t = spe_allocate_available_register(gen->f);
- assert(gen->num_itemps < Elements(gen->itemps));
- gen->itemps[gen->num_itemps++] = t;
- return t;
-}
-
-/**
- * Free all intermediate temporary registers. To be called after each
- * instruction has been emitted.
- */
-static void
-free_itemps(struct codegen *gen)
-{
- int i;
- for (i = 0; i < gen->num_itemps; i++) {
- spe_release_register(gen->f, gen->itemps[i]);
- }
- gen->num_itemps = 0;
-}
-
-
-/**
- * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
- * The register is allocated and initialized upon the first call.
- */
-static int
-get_const_one_reg(struct codegen *gen)
-{
- if (gen->one_reg <= 0) {
- gen->one_reg = spe_allocate_available_register(gen->f);
-
- spe_indent(gen->f, 4);
- spe_comment(gen->f, -4, "init constant reg = 1.0:");
-
- /* one = {1.0, 1.0, 1.0, 1.0} */
- spe_load_float(gen->f, gen->one_reg, 1.0f);
-
- spe_indent(gen->f, -4);
- }
-
- return gen->one_reg;
-}
-
-
-/**
- * Return index of the address register.
- * Used for indirect register loads/stores.
- */
-static int
-get_address_reg(struct codegen *gen)
-{
- if (gen->addr_reg <= 0) {
- gen->addr_reg = spe_allocate_available_register(gen->f);
-
- spe_indent(gen->f, 4);
- spe_comment(gen->f, -4, "init address reg = 0:");
-
- /* init addr = {0, 0, 0, 0} */
- spe_zero(gen->f, gen->addr_reg);
-
- spe_indent(gen->f, -4);
- }
-
- return gen->addr_reg;
-}
-
-
-/**
- * Return index of the master execution mask.
- * The register is allocated an initialized upon the first call.
- *
- * The master execution mask controls which pixels in a quad are
- * modified, according to surrounding conditionals, loops, etc.
- */
-static int
-get_exec_mask_reg(struct codegen *gen)
-{
- if (gen->exec_mask_reg <= 0) {
- gen->exec_mask_reg = spe_allocate_available_register(gen->f);
-
- /* XXX this may not be needed */
- spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0");
- spe_load_int(gen->f, gen->exec_mask_reg, ~0);
- }
-
- return gen->exec_mask_reg;
-}
-
-
-/** Return index of the conditional (if/else) execution mask register */
-static int
-get_cond_mask_reg(struct codegen *gen)
-{
- if (gen->cond_mask_reg <= 0) {
- gen->cond_mask_reg = spe_allocate_available_register(gen->f);
- }
-
- return gen->cond_mask_reg;
-}
-
-
-/** Return index of the loop execution mask register */
-static int
-get_loop_mask_reg(struct codegen *gen)
-{
- if (gen->loop_mask_reg <= 0) {
- gen->loop_mask_reg = spe_allocate_available_register(gen->f);
- }
-
- return gen->loop_mask_reg;
-}
-
-
-
-static boolean
-is_register_src(struct codegen *gen, int channel,
- const struct tgsi_full_src_register *src)
-{
- int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
- int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
-
- if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
- return FALSE;
- }
- if (src->Register.File == TGSI_FILE_TEMPORARY ||
- src->Register.File == TGSI_FILE_IMMEDIATE) {
- return TRUE;
- }
- return FALSE;
-}
-
-
-static boolean
-is_memory_dst(struct codegen *gen, int channel,
- const struct tgsi_full_dst_register *dst)
-{
- if (dst->Register.File == TGSI_FILE_OUTPUT) {
- return TRUE;
- }
- else {
- return FALSE;
- }
-}
-
-
-/**
- * Return the index of the SPU temporary containing the named TGSI
- * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
- * just return the corresponding SPE register. If the TGIS register
- * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
- * and emit an SPE load instruction.
- */
-static int
-get_src_reg(struct codegen *gen,
- int channel,
- const struct tgsi_full_src_register *src)
-{
- int reg = -1;
- int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
- boolean reg_is_itemp = FALSE;
- uint sign_op;
-
- assert(swizzle >= TGSI_SWIZZLE_X);
- assert(swizzle <= TGSI_SWIZZLE_W);
-
- {
- int index = src->Register.Index;
-
- assert(swizzle < 4);
-
- if (src->Register.Indirect) {
- /* XXX unfinished */
- }
-
- switch (src->Register.File) {
- case TGSI_FILE_TEMPORARY:
- reg = gen->temp_regs[index][swizzle];
- break;
- case TGSI_FILE_INPUT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = index * 4 + swizzle;
- reg = get_itemp(gen);
- reg_is_itemp = TRUE;
- /* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
- }
- break;
- case TGSI_FILE_IMMEDIATE:
- reg = gen->imm_regs[index][swizzle];
- break;
- case TGSI_FILE_CONSTANT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = index * 4 + swizzle;
- reg = get_itemp(gen);
- reg_is_itemp = TRUE;
- /* Load: reg = memory[(machine_reg) + offset] */
- spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
- }
- break;
- default:
- assert(0);
- }
- }
-
- /*
- * Handle absolute value, negate or set-negative of src register.
- */
- sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
- if (sign_op != TGSI_UTIL_SIGN_KEEP) {
- /*
- * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
- */
- const int bit31mask_reg = get_itemp(gen);
- int result_reg;
-
- if (reg_is_itemp) {
- /* re-use 'reg' for the result */
- result_reg = reg;
- }
- else {
- /* alloc a new reg for the result */
- result_reg = get_itemp(gen);
- }
-
- /* mask with bit 31 set, the rest cleared */
- spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
-
- if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
- spe_andc(gen->f, result_reg, reg, bit31mask_reg);
- }
- else if (sign_op == TGSI_UTIL_SIGN_SET) {
- spe_and(gen->f, result_reg, reg, bit31mask_reg);
- }
- else {
- assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
- spe_xor(gen->f, result_reg, reg, bit31mask_reg);
- }
-
- reg = result_reg;
- }
-
- return reg;
-}
-
-
-/**
- * Return the index of an SPE register to use for the given TGSI register.
- * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
- * corresponding SPE register is returned. If the TGSI register is
- * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
- * See store_dest_reg() below...
- */
-static int
-get_dst_reg(struct codegen *gen,
- int channel,
- const struct tgsi_full_dst_register *dest)
-{
- int reg = -1;
-
- switch (dest->Register.File) {
- case TGSI_FILE_TEMPORARY:
- if (gen->if_nesting > 0 || gen->loop_nesting > 0)
- reg = get_itemp(gen);
- else
- reg = gen->temp_regs[dest->Register.Index][channel];
- break;
- case TGSI_FILE_OUTPUT:
- reg = get_itemp(gen);
- break;
- default:
- assert(0);
- }
-
- return reg;
-}
-
-
-/**
- * When a TGSI instruction is writing to an output register, this
- * function emits the SPE store instruction to store the value_reg.
- * \param value_reg the SPE register containing the value to store.
- * This would have been returned by get_dst_reg().
- */
-static void
-store_dest_reg(struct codegen *gen,
- int value_reg, int channel,
- const struct tgsi_full_dst_register *dest)
-{
- /*
- * XXX need to implement dst reg clamping/saturation
- */
-#if 0
- switch (inst->Instruction.Saturate) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- break;
- default:
- assert( 0 );
- }
-#endif
-
- switch (dest->Register.File) {
- case TGSI_FILE_TEMPORARY:
- if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
- int d_reg = gen->temp_regs[dest->Register.Index][channel];
- int exec_reg = get_exec_mask_reg(gen);
- /* Mix d with new value according to exec mask:
- * d[i] = mask_reg[i] ? value_reg : d_reg
- */
- spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
- }
- else {
- /* we're not inside a condition or loop: do nothing special */
-
- }
- break;
- case TGSI_FILE_OUTPUT:
- {
- /* offset is measured in quadwords, not bytes */
- int offset = dest->Register.Index * 4 + channel;
- if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
- int exec_reg = get_exec_mask_reg(gen);
- int curval_reg = get_itemp(gen);
- /* First read the current value from memory:
- * Load: curval = memory[(machine_reg) + offset]
- */
- spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
- /* Mix curval with newvalue according to exec mask:
- * d[i] = mask_reg[i] ? value_reg : d_reg
- */
- spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
- /* Store: memory[(machine_reg) + offset] = curval */
- spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
- }
- else {
- /* Store: memory[(machine_reg) + offset] = reg */
- spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
- }
- }
- break;
- default:
- assert(0);
- }
-}
-
-
-
-static void
-emit_prologue(struct codegen *gen)
-{
- gen->frame_size = 1024; /* XXX temporary, should be dynamic */
-
- spe_comment(gen->f, 0, "Function prologue:");
-
- /* save $lr on stack # stqd $lr,16($sp) */
- spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
-
- if (gen->frame_size >= 512) {
- /* offset is too large for ai instruction */
- int offset_reg = spe_allocate_available_register(gen->f);
- int sp_reg = spe_allocate_available_register(gen->f);
- /* offset = -framesize */
- spe_load_int(gen->f, offset_reg, -gen->frame_size);
- /* sp = $sp */
- spe_move(gen->f, sp_reg, SPE_REG_SP);
- /* $sp = $sp + offset_reg */
- spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
- /* save $sp in stack frame */
- spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
- /* clean up */
- spe_release_register(gen->f, offset_reg);
- spe_release_register(gen->f, sp_reg);
- }
- else {
- /* save stack pointer # stqd $sp,-frameSize($sp) */
- spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
-
- /* adjust stack pointer # ai $sp,$sp,-frameSize */
- spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
- }
-}
-
-
-static void
-emit_epilogue(struct codegen *gen)
-{
- const int return_reg = 3;
-
- spe_comment(gen->f, 0, "Function epilogue:");
-
- spe_comment(gen->f, 0, "return the killed mask");
- if (gen->kill_mask_reg > 0) {
- /* shader called KIL, return the "alive" mask */
- spe_move(gen->f, return_reg, gen->kill_mask_reg);
- }
- else {
- /* return {0,0,0,0} */
- spe_load_uint(gen->f, return_reg, 0);
- }
-
- spe_comment(gen->f, 0, "restore stack and return");
- if (gen->frame_size >= 512) {
- /* offset is too large for ai instruction */
- int offset_reg = spe_allocate_available_register(gen->f);
- /* offset = framesize */
- spe_load_int(gen->f, offset_reg, gen->frame_size);
- /* $sp = $sp + offset */
- spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
- /* clean up */
- spe_release_register(gen->f, offset_reg);
- }
- else {
- /* restore stack pointer # ai $sp,$sp,frameSize */
- spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
- }
-
- /* restore $lr # lqd $lr,16($sp) */
- spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
-
- /* return from function call */
- spe_bi(gen->f, SPE_REG_RA, 0, 0);
-}
-
-
-#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
- for (ch = 0; ch < 4; ch++) \
- if (inst->Dst[0].Register.WriteMask & (1 << ch))
-
-
-static boolean
-emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch = 0, src_reg, addr_reg;
-
- src_reg = get_src_reg(gen, ch, &inst->Src[0]);
- addr_reg = get_address_reg(gen);
-
- /* convert float to int */
- spe_cflts(gen->f, addr_reg, src_reg, 0);
-
- free_itemps(gen);
-
- return TRUE;
-}
-
-
-static boolean
-emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, src_reg[4], dst_reg[4];
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- if (is_register_src(gen, ch, &inst->Src[0]) &&
- is_memory_dst(gen, ch, &inst->Dst[0])) {
- /* special-case: register to memory store */
- store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]);
- }
- else {
- spe_move(gen->f, dst_reg[ch], src_reg[ch]);
- store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]);
- }
- }
-
- free_itemps(gen);
-
- return TRUE;
-}
-
-/**
- * Emit binary operation
- */
-static boolean
-emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], s2_reg[4], d_reg[4];
-
- /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
-
- /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- /* Emit actual SPE instruction: d = s1 + s2 */
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ADD:
- spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- break;
- case TGSI_OPCODE_SUB:
- spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- break;
- case TGSI_OPCODE_MUL:
- spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- break;
- default:
- ;
- }
- }
-
- /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- /* Free any intermediate temps we allocated */
- free_itemps(gen);
-
- return TRUE;
-}
-
-
-/**
- * Emit multiply add. See emit_ADD for comments.
- */
-static boolean
-emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
- s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit linear interpolate. See emit_ADD for comments.
- */
-static boolean
-emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
-
- /* setup/get src/dst/temp regs */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
- s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
-
- /* d = s3 + s1(s2 - s3) */
- /* do all subtracts, then all fma, then all stores to better pipeline */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
- free_itemps(gen);
- return TRUE;
-}
-
-
-
-/**
- * Emit reciprocal or recip sqrt.
- */
-static boolean
-emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], d_reg[4], tmp_reg[4];
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) {
- /* tmp = 1/s1 */
- spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]);
- }
- else {
- /* tmp = 1/sqrt(s1) */
- spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]);
- }
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- /* d = float_interp(s1, tmp) */
- spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit absolute value. See emit_ADD for comments.
- */
-static boolean
-emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], d_reg[4];
- const int bit31mask_reg = get_itemp(gen);
-
- /* mask with bit 31 set, the rest cleared */
- spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
-
- /* d = sign bit cleared in s1 */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-/**
- * Emit 3 component dot product. See emit_ADD for comments.
- */
-static boolean
-emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
- int s1x_reg, s1y_reg, s1z_reg;
- int s2x_reg, s2y_reg, s2z_reg;
- int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
-
- s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
- s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
- s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
- s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
- s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
- s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
-
- /* t0 = x0 * x1 */
- spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
-
- /* t1 = y0 * y1 */
- spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
-
- /* t0 = z0 * z1 + t0 */
- spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
-
- /* t0 = t0 + t1 */
- spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
- spe_move(gen->f, d_reg, t0_reg);
- store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-/**
- * Emit 4 component dot product. See emit_ADD for comments.
- */
-static boolean
-emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
- int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
- int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
- int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
-
- s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
- s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
- s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
- s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
- s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
- s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
- s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]);
- s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
-
- /* t0 = x0 * x1 */
- spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
-
- /* t1 = y0 * y1 */
- spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
-
- /* t0 = z0 * z1 + t0 */
- spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
-
- /* t1 = w0 * w1 + t1 */
- spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
-
- /* t0 = t0 + t1 */
- spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
- spe_move(gen->f, d_reg, t0_reg);
- store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-/**
- * Emit homogeneous dot product. See emit_ADD for comments.
- */
-static boolean
-emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- /* XXX rewrite this function to look more like DP3/DP4 */
- int ch;
- int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
- int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
- int tmp_reg = get_itemp(gen);
-
- /* t = x0 * x1 */
- spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
- s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
- /* t = y0 * y1 + t */
- spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
- s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
- /* t = z0 * z1 + t */
- spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
- s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
- /* t = w1 + t */
- spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
- spe_move(gen->f, d_reg, tmp_reg);
- store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-/**
- * Emit 3-component vector normalize.
- */
-static boolean
-emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
- int src_reg[3];
- int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
-
- src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]);
- src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
- src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
-
- /* t0 = x * x */
- spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
-
- /* t1 = y * y */
- spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
-
- /* t0 = z * z + t0 */
- spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
-
- /* t0 = t0 + t1 */
- spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
-
- /* t1 = 1.0 / sqrt(t0) */
- spe_frsqest(gen->f, t1_reg, t0_reg);
- spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
- /* dst = src[ch] * t1 */
- spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
- store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit cross product. See emit_ADD for comments.
- */
-static boolean
-emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
- int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
- int tmp_reg = get_itemp(gen);
-
- /* t = z0 * y1 */
- spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
- s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
- /* t = y0 * z1 - t */
- spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
- if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) {
- store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
- }
-
- s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
- /* t = x0 * z1 */
- spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
- s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
- /* t = z0 * x1 - t */
- spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
- if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) {
- store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
- }
-
- s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
- /* t = y0 * x1 */
- spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
-
- s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
- s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
- /* t = x0 * y1 - t */
- spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
-
- if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) {
- store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit inequality instruction.
- * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
- * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
- * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
- */
-static boolean
-emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
- boolean complement = FALSE;
-
- one_reg = get_const_one_reg(gen);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_SGT:
- spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- break;
- case TGSI_OPCODE_SLT:
- spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
- break;
- case TGSI_OPCODE_SGE:
- spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]);
- complement = TRUE;
- break;
- case TGSI_OPCODE_SLE:
- spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- complement = TRUE;
- break;
- case TGSI_OPCODE_SEQ:
- spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- break;
- case TGSI_OPCODE_SNE:
- spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
- complement = TRUE;
- break;
- default:
- assert(0);
- }
- }
-
- /* convert d from 0x0/0xffffffff to 0.0/1.0 */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- /* d = d & one_reg */
- if (complement)
- spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]);
- else
- spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit compare.
- */
-static boolean
-emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int s1_reg = get_src_reg(gen, ch, &inst->Src[0]);
- int s2_reg = get_src_reg(gen, ch, &inst->Src[1]);
- int s3_reg = get_src_reg(gen, ch, &inst->Src[2]);
- int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
- int zero_reg = get_itemp(gen);
-
- spe_zero(gen->f, zero_reg);
-
- /* d = (s1 < 0) ? s2 : s3 */
- spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
- spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
-
- store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
- free_itemps(gen);
- }
-
- return TRUE;
-}
-
-/**
- * Emit trunc.
- * Convert float to signed int
- * Convert signed int to float
- */
-static boolean
-emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], d_reg[4];
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
-
- /* Convert float to int */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0);
- }
-
- /* Convert int to float */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit floor.
- * If negative int subtract one
- * Convert float to signed int
- * Convert signed int to float
- */
-static boolean
-emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
-
- zero_reg = get_itemp(gen);
- spe_zero(gen->f, zero_reg);
- one_reg = get_const_one_reg(gen);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
-
- /* If negative, subtract 1.0 */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
- }
-
- /* Convert float to int */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
- }
-
- /* Convert int to float */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Compute frac = Input - FLR(Input)
- */
-static boolean
-emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg;
-
- zero_reg = get_itemp(gen);
- spe_zero(gen->f, zero_reg);
- one_reg = get_const_one_reg(gen);
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
-
- /* If negative, subtract 1.0 */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]);
- }
-
- /* Convert float to int */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
- }
-
- /* Convert int to float */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0);
- }
-
- /* d = s1 - FLR(s1) */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]);
- }
-
- /* store result */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-#if 0
-static void
-print_functions(struct cell_context *cell)
-{
- struct cell_spu_function_info *funcs = &cell->spu_functions;
- uint i;
- for (i = 0; i < funcs->num; i++) {
- printf("SPU func %u: %s at %u\n",
- i, funcs->names[i], funcs->addrs[i]);
- }
-}
-#endif
-
-
-static uint
-lookup_function(struct cell_context *cell, const char *funcname)
-{
- const struct cell_spu_function_info *funcs = &cell->spu_functions;
- uint i, addr = 0;
- for (i = 0; i < funcs->num; i++) {
- if (strcmp(funcs->names[i], funcname) == 0) {
- addr = funcs->addrs[i];
- }
- }
- assert(addr && "spu function not found");
- return addr / 4; /* discard 2 least significant bits */
-}
-
-
-/**
- * Emit code to call a SPU function.
- * Used to implement instructions like SIN/COS/POW/TEX/etc.
- * If scalar, only the X components of the src regs are used, and the
- * result is replicated across the dest register's XYZW components.
- */
-static boolean
-emit_function_call(struct codegen *gen,
- const struct tgsi_full_instruction *inst,
- char *funcname, uint num_args, boolean scalar)
-{
- const uint addr = lookup_function(gen->cell, funcname);
- char comment[100];
- int s_regs[3];
- int func_called = FALSE;
- uint a, ch;
- int retval_reg = -1;
-
- assert(num_args <= 3);
-
- snprintf(comment, sizeof(comment), "CALL %s:", funcname);
- spe_comment(gen->f, -4, comment);
-
- if (scalar) {
- for (a = 0; a < num_args; a++) {
- s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]);
- }
- /* we'll call the function, put the return value in this register,
- * then replicate it across all write-enabled components in d_reg.
- */
- retval_reg = spe_allocate_available_register(gen->f);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int d_reg;
- ubyte usedRegs[SPE_NUM_REGS];
- uint i, numUsed;
-
- if (!scalar) {
- for (a = 0; a < num_args; a++) {
- s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]);
- }
- }
-
- d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-
- if (!scalar || !func_called) {
- /* for a scalar function, we'll really only call the function once */
-
- numUsed = spe_get_registers_used(gen->f, usedRegs);
- assert(numUsed < gen->frame_size / 16 - 2);
-
- /* save registers to stack */
- for (i = 0; i < numUsed; i++) {
- uint reg = usedRegs[i];
- int offset = 2 + i;
- spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
- }
-
- /* setup function arguments */
- for (a = 0; a < num_args; a++) {
- spe_move(gen->f, 3 + a, s_regs[a]);
- }
-
- /* branch to function, save return addr */
- spe_brasl(gen->f, SPE_REG_RA, addr);
-
- /* save function's return value */
- if (scalar)
- spe_move(gen->f, retval_reg, 3);
- else
- spe_move(gen->f, d_reg, 3);
-
- /* restore registers from stack */
- for (i = 0; i < numUsed; i++) {
- uint reg = usedRegs[i];
- if (reg != d_reg && reg != retval_reg) {
- int offset = 2 + i;
- spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
- }
- }
-
- func_called = TRUE;
- }
-
- if (scalar) {
- spe_move(gen->f, d_reg, retval_reg);
- }
-
- store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
- free_itemps(gen);
- }
-
- if (scalar) {
- spe_release_register(gen->f, retval_reg);
- }
-
- return TRUE;
-}
-
-
-static boolean
-emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- const uint target = inst->Texture.Texture;
- const uint unit = inst->Src[1].Register.Index;
- uint addr;
- int ch;
- int coord_regs[4], d_regs[4];
-
- switch (target) {
- case TGSI_TEXTURE_1D:
- case TGSI_TEXTURE_2D:
- addr = lookup_function(gen->cell, "spu_tex_2d");
- break;
- case TGSI_TEXTURE_3D:
- addr = lookup_function(gen->cell, "spu_tex_3d");
- break;
- case TGSI_TEXTURE_CUBE:
- addr = lookup_function(gen->cell, "spu_tex_cube");
- break;
- default:
- ASSERT(0 && "unsupported texture target");
- return FALSE;
- }
-
- assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
-
- spe_comment(gen->f, -4, "CALL tex:");
-
- /* get src/dst reg info */
- for (ch = 0; ch < 4; ch++) {
- coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- }
-
- {
- ubyte usedRegs[SPE_NUM_REGS];
- uint i, numUsed;
-
- numUsed = spe_get_registers_used(gen->f, usedRegs);
- assert(numUsed < gen->frame_size / 16 - 2);
-
- /* save registers to stack */
- for (i = 0; i < numUsed; i++) {
- uint reg = usedRegs[i];
- int offset = 2 + i;
- spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
- }
-
- /* setup function arguments (XXX depends on target) */
- for (i = 0; i < 4; i++) {
- spe_move(gen->f, 3 + i, coord_regs[i]);
- }
- spe_load_uint(gen->f, 7, unit); /* sampler unit */
-
- /* branch to function, save return addr */
- spe_brasl(gen->f, SPE_REG_RA, addr);
-
- /* save function's return values (four pixel's colors) */
- for (i = 0; i < 4; i++) {
- spe_move(gen->f, d_regs[i], 3 + i);
- }
-
- /* restore registers from stack */
- for (i = 0; i < numUsed; i++) {
- uint reg = usedRegs[i];
- if (reg != d_regs[0] &&
- reg != d_regs[1] &&
- reg != d_regs[2] &&
- reg != d_regs[3]) {
- int offset = 2 + i;
- spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
- }
- }
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]);
- free_itemps(gen);
- }
-
- return TRUE;
-}
-
-
-/**
- * KILL if any of src reg values are less than zero.
- */
-static boolean
-emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch;
- int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
-
- spe_comment(gen->f, -4, "CALL kil:");
-
- /* zero = {0,0,0,0} */
- zero_reg = get_itemp(gen);
- spe_zero(gen->f, zero_reg);
-
- cmp_reg = get_itemp(gen);
-
- /* get src regs */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- }
-
- /* test if any src regs are < 0 */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- if (kil_reg >= 0) {
- /* cmp = 0 > src ? : ~0 : 0 */
- spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
- /* kil = kil | cmp */
- spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
- }
- else {
- kil_reg = get_itemp(gen);
- /* kil = 0 > src ? : ~0 : 0 */
- spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
- }
- }
-
- if (gen->if_nesting || gen->loop_nesting) {
- /* may have been a conditional kil */
- spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
- }
-
- /* allocate the kill mask reg if needed */
- if (gen->kill_mask_reg <= 0) {
- gen->kill_mask_reg = spe_allocate_available_register(gen->f);
- spe_move(gen->f, gen->kill_mask_reg, kil_reg);
- }
- else {
- spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
- }
-
- free_itemps(gen);
-
- return TRUE;
-}
-
-
-
-/**
- * Emit min or max.
- */
-static boolean
-emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
- s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
- d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
- tmp_reg[ch] = get_itemp(gen);
- }
-
- /* d = (s0 > s1) ? s0 : s1 */
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- if (inst->Instruction.Opcode == TGSI_OPCODE_MAX)
- spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
- else
- spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
- }
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
- }
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
- }
-
- free_itemps(gen);
- return TRUE;
-}
-
-
-/**
- * Emit code to update the execution mask.
- * This needs to be done whenever the execution status of a conditional
- * or loop is changed.
- */
-static void
-emit_update_exec_mask(struct codegen *gen)
-{
- const int exec_reg = get_exec_mask_reg(gen);
- const int cond_reg = gen->cond_mask_reg;
- const int loop_reg = gen->loop_mask_reg;
-
- spe_comment(gen->f, 0, "Update master execution mask");
-
- if (gen->if_nesting > 0 && gen->loop_nesting > 0) {
- /* exec_mask = cond_mask & loop_mask */
- assert(cond_reg > 0);
- assert(loop_reg > 0);
- spe_and(gen->f, exec_reg, cond_reg, loop_reg);
- }
- else if (gen->if_nesting > 0) {
- assert(cond_reg > 0);
- spe_move(gen->f, exec_reg, cond_reg);
- }
- else if (gen->loop_nesting > 0) {
- assert(loop_reg > 0);
- spe_move(gen->f, exec_reg, loop_reg);
- }
- else {
- spe_load_int(gen->f, exec_reg, ~0x0);
- }
-}
-
-
-static boolean
-emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- const int channel = 0;
- int cond_reg;
-
- cond_reg = get_cond_mask_reg(gen);
-
- /* XXX push cond exec mask */
-
- spe_comment(gen->f, 0, "init conditional exec mask = ~0:");
- spe_load_int(gen->f, cond_reg, ~0);
-
- /* update conditional execution mask with the predicate register */
- int tmp_reg = get_itemp(gen);
- int s1_reg = get_src_reg(gen, channel, &inst->Src[0]);
-
- /* tmp = (s1_reg == 0) */
- spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
- /* tmp = !tmp */
- spe_complement(gen->f, tmp_reg, tmp_reg);
- /* cond_mask = cond_mask & tmp */
- spe_and(gen->f, cond_reg, cond_reg, tmp_reg);
-
- gen->if_nesting++;
-
- /* update the master execution mask */
- emit_update_exec_mask(gen);
-
- free_itemps(gen);
-
- return TRUE;
-}
-
-
-static boolean
-emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- const int cond_reg = get_cond_mask_reg(gen);
-
- spe_comment(gen->f, 0, "cond exec mask = !cond exec mask");
- spe_complement(gen->f, cond_reg, cond_reg);
- emit_update_exec_mask(gen);
-
- return TRUE;
-}
-
-
-static boolean
-emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- /* XXX todo: pop cond exec mask */
-
- gen->if_nesting--;
-
- emit_update_exec_mask(gen);
-
- return TRUE;
-}
-
-
-static boolean
-emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- int exec_reg, loop_reg;
-
- exec_reg = get_exec_mask_reg(gen);
- loop_reg = get_loop_mask_reg(gen);
-
- /* XXX push loop_exec mask */
-
- spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0");
- spe_load_int(gen->f, loop_reg, ~0x0);
-
- gen->loop_nesting++;
- gen->loop_start = spe_code_size(gen->f); /* in bytes */
-
- return TRUE;
-}
-
-
-static boolean
-emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- const int loop_reg = get_loop_mask_reg(gen);
- const int tmp_reg = get_itemp(gen);
- int offset;
-
- /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */
- spe_orx(gen->f, tmp_reg, loop_reg);
-
- offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */
-
- /* branch back to top of loop if tmp_reg != 0 */
- spe_brnz(gen->f, tmp_reg, offset / 4);
-
- /* XXX pop loop_exec mask */
-
- gen->loop_nesting--;
-
- emit_update_exec_mask(gen);
-
- return TRUE;
-}
-
-
-static boolean
-emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- const int exec_reg = get_exec_mask_reg(gen);
- const int loop_reg = get_loop_mask_reg(gen);
-
- assert(gen->loop_nesting > 0);
-
- spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask");
- spe_andc(gen->f, loop_reg, loop_reg, exec_reg);
-
- emit_update_exec_mask(gen);
-
- return TRUE;
-}
-
-
-static boolean
-emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst)
-{
- assert(gen->loop_nesting > 0);
-
- return TRUE;
-}
-
-
-static boolean
-emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
- boolean ddx)
-{
- int ch;
-
- FOR_EACH_ENABLED_CHANNEL(inst, ch) {
- int s_reg = get_src_reg(gen, ch, &inst->Src[0]);
- int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
-
- int t1_reg = get_itemp(gen);
- int t2_reg = get_itemp(gen);
-
- spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
- if (ddx) {
- spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
- }
- else {
- spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
- }
- spe_fs(gen->f, d_reg, t2_reg, t1_reg);
-
- free_itemps(gen);
- }
-
- return TRUE;
-}
-
-
-
-
-/**
- * Emit END instruction.
- * We just return from the shader function at this point.
- *
- * Note that there may be more code after this that would be
- * called by TGSI_OPCODE_CALL.
- */
-static boolean
-emit_END(struct codegen *gen)
-{
- emit_epilogue(gen);
- return TRUE;
-}
-
-
-/**
- * Emit code for the given instruction. Just a big switch stmt.
- */
-static boolean
-emit_instruction(struct codegen *gen,
- const struct tgsi_full_instruction *inst)
-{
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL:
- return emit_ARL(gen, inst);
- case TGSI_OPCODE_MOV:
- return emit_MOV(gen, inst);
- case TGSI_OPCODE_ADD:
- case TGSI_OPCODE_SUB:
- case TGSI_OPCODE_MUL:
- return emit_binop(gen, inst);
- case TGSI_OPCODE_MAD:
- return emit_MAD(gen, inst);
- case TGSI_OPCODE_LRP:
- return emit_LRP(gen, inst);
- case TGSI_OPCODE_DP3:
- return emit_DP3(gen, inst);
- case TGSI_OPCODE_DP4:
- return emit_DP4(gen, inst);
- case TGSI_OPCODE_DPH:
- return emit_DPH(gen, inst);
- case TGSI_OPCODE_NRM:
- return emit_NRM3(gen, inst);
- case TGSI_OPCODE_XPD:
- return emit_XPD(gen, inst);
- case TGSI_OPCODE_RCP:
- case TGSI_OPCODE_RSQ:
- return emit_RCP_RSQ(gen, inst);
- case TGSI_OPCODE_ABS:
- return emit_ABS(gen, inst);
- case TGSI_OPCODE_SGT:
- case TGSI_OPCODE_SLT:
- case TGSI_OPCODE_SGE:
- case TGSI_OPCODE_SLE:
- case TGSI_OPCODE_SEQ:
- case TGSI_OPCODE_SNE:
- return emit_inequality(gen, inst);
- case TGSI_OPCODE_CMP:
- return emit_CMP(gen, inst);
- case TGSI_OPCODE_MIN:
- case TGSI_OPCODE_MAX:
- return emit_MIN_MAX(gen, inst);
- case TGSI_OPCODE_TRUNC:
- return emit_TRUNC(gen, inst);
- case TGSI_OPCODE_FLR:
- return emit_FLR(gen, inst);
- case TGSI_OPCODE_FRC:
- return emit_FRC(gen, inst);
- case TGSI_OPCODE_END:
- return emit_END(gen);
-
- case TGSI_OPCODE_COS:
- return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
- case TGSI_OPCODE_SIN:
- return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
- case TGSI_OPCODE_POW:
- return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
- case TGSI_OPCODE_EX2:
- return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
- case TGSI_OPCODE_LG2:
- return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
- case TGSI_OPCODE_TEX:
- /* fall-through for now */
- case TGSI_OPCODE_TXD:
- /* fall-through for now */
- case TGSI_OPCODE_TXB:
- /* fall-through for now */
- case TGSI_OPCODE_TXL:
- /* fall-through for now */
- case TGSI_OPCODE_TXP:
- return emit_TEX(gen, inst);
- case TGSI_OPCODE_KIL:
- return emit_KIL(gen, inst);
-
- case TGSI_OPCODE_IF:
- return emit_IF(gen, inst);
- case TGSI_OPCODE_ELSE:
- return emit_ELSE(gen, inst);
- case TGSI_OPCODE_ENDIF:
- return emit_ENDIF(gen, inst);
-
- case TGSI_OPCODE_BGNLOOP:
- return emit_BGNLOOP(gen, inst);
- case TGSI_OPCODE_ENDLOOP:
- return emit_ENDLOOP(gen, inst);
- case TGSI_OPCODE_BRK:
- return emit_BRK(gen, inst);
- case TGSI_OPCODE_CONT:
- return emit_CONT(gen, inst);
-
- case TGSI_OPCODE_DDX:
- return emit_DDX_DDY(gen, inst, TRUE);
- case TGSI_OPCODE_DDY:
- return emit_DDX_DDY(gen, inst, FALSE);
-
- /* XXX lots more cases to do... */
-
- default:
- fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
- inst->Instruction.Opcode);
- return FALSE;
- }
-
- return TRUE;
-}
-
-
-
-/**
- * Emit code for a TGSI immediate value (vector of four floats).
- * This involves register allocation and initialization.
- * XXX the initialization should be done by a "prepare" stage, not
- * per quad execution!
- */
-static boolean
-emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
-{
- int ch;
-
- assert(gen->num_imm < MAX_TEMPS);
-
- for (ch = 0; ch < 4; ch++) {
- float val = immed->u[ch].Float;
-
- if (ch > 0 && val == immed->u[ch - 1].Float) {
- /* re-use previous register */
- gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
- }
- else {
- char str[100];
- int reg = spe_allocate_available_register(gen->f);
-
- if (reg < 0)
- return FALSE;
-
- sprintf(str, "init $%d = %f", reg, val);
- spe_comment(gen->f, 0, str);
-
- /* update immediate map */
- gen->imm_regs[gen->num_imm][ch] = reg;
-
- /* emit initializer instruction */
- spe_load_float(gen->f, reg, val);
- }
- }
-
- gen->num_imm++;
-
- return TRUE;
-}
-
-
-
-/**
- * Emit "code" for a TGSI declaration.
- * We only care about TGSI TEMPORARY register declarations at this time.
- * For each TGSI TEMPORARY we allocate four SPE registers.
- */
-static boolean
-emit_declaration(struct cell_context *cell,
- struct codegen *gen, const struct tgsi_full_declaration *decl)
-{
- int i, ch;
-
- switch (decl->Declaration.File) {
- case TGSI_FILE_TEMPORARY:
- for (i = decl->Range.First;
- i <= decl->Range.Last;
- i++) {
- assert(i < MAX_TEMPS);
- for (ch = 0; ch < 4; ch++) {
- gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
- if (gen->temp_regs[i][ch] < 0)
- return FALSE; /* out of regs */
- }
-
- /* XXX if we run out of SPE registers, we need to spill
- * to SPU memory. someday...
- */
-
- {
- char buf[100];
- sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
- gen->temp_regs[i][0], gen->temp_regs[i][1],
- gen->temp_regs[i][2], gen->temp_regs[i][3]);
- spe_comment(gen->f, 0, buf);
- }
- }
- break;
- default:
- ; /* ignore */
- }
-
- return TRUE;
-}
-
-
-
-/**
- * Translate TGSI shader code to SPE instructions. This is done when
- * the state tracker gives us a new shader (via pipe->create_fs_state()).
- *
- * \param cell the rendering context (in)
- * \param tokens the TGSI shader (in)
- * \param f the generated function (out)
- */
-boolean
-cell_gen_fragment_program(struct cell_context *cell,
- const struct tgsi_token *tokens,
- struct spe_function *f)
-{
- struct tgsi_parse_context parse;
- struct codegen gen;
- uint ic = 0;
-
- memset(&gen, 0, sizeof(gen));
- gen.cell = cell;
- gen.f = f;
-
- /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
- gen.inputs_reg = 3; /* pointer to inputs array */
- gen.outputs_reg = 4; /* pointer to outputs array */
- gen.constants_reg = 5; /* pointer to constants array */
-
- spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
- spe_allocate_register(f, gen.inputs_reg);
- spe_allocate_register(f, gen.outputs_reg);
- spe_allocate_register(f, gen.constants_reg);
-
- if (cell->debug_flags & CELL_DEBUG_ASM) {
- spe_print_code(f, TRUE);
- spe_indent(f, 2*8);
- printf("Begin %s\n", __FUNCTION__);
- tgsi_dump(tokens, 0);
- }
-
- tgsi_parse_init(&parse, tokens);
-
- emit_prologue(&gen);
-
- while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
- tgsi_parse_token(&parse);
-
- switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- if (f->print) {
- _debug_printf(" # ");
- tgsi_dump_immediate(&parse.FullToken.FullImmediate);
- }
- if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
- gen.error = TRUE;
- break;
-
- case TGSI_TOKEN_TYPE_DECLARATION:
- if (f->print) {
- _debug_printf(" # ");
- tgsi_dump_declaration(&parse.FullToken.FullDeclaration);
- }
- if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
- gen.error = TRUE;
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (f->print) {
- _debug_printf(" # ");
- ic++;
- tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic);
- }
- if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
- gen.error = TRUE;
- break;
-
- default:
- assert(0);
- }
- }
-
- if (gen.error) {
- /* terminate the SPE code */
- return emit_END(&gen);
- }
-
- if (cell->debug_flags & CELL_DEBUG_ASM) {
- printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
- printf("End %s\n", __FUNCTION__);
- }
-
- tgsi_parse_free( &parse );
-
- return !gen.error;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-
-#ifndef CELL_GEN_FP_H
-#define CELL_GEN_FP_H
-
-
-
-extern boolean
-cell_gen_fragment_program(struct cell_context *cell,
- const struct tgsi_token *tokens,
- struct spe_function *f);
-
-
-#endif /* CELL_GEN_FP_H */
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009 VMware, Inc. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Generate SPU per-fragment code (actually per-quad code).
- * \author Brian Paul
- * \author Bob Ellison
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-#include "rtasm/rtasm_ppc_spe.h"
-#include "cell_context.h"
-#include "cell_gen_fragment.h"
-
-
-
-/** Do extra optimizations? */
-#define OPTIMIZATIONS 1
-
-
-/**
- * Generate SPE code to perform Z/depth testing.
- *
- * \param dsa Gallium depth/stencil/alpha state to gen code for
- * \param f SPE function to append instruction onto.
- * \param mask_reg register containing quad/pixel "alive" mask (in/out)
- * \param ifragZ_reg register containing integer fragment Z values (in)
- * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
- * \param zmask_reg register containing result of Z test/comparison (out)
- *
- * Returns TRUE if the Z-buffer needs to be updated.
- */
-static boolean
-gen_depth_test(struct spe_function *f,
- const struct pipe_depth_stencil_alpha_state *dsa,
- int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
-{
- /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_
- * quantities. This only makes a difference for 32-bit Z values though.
- */
- ASSERT(dsa->depth.enabled);
-
- switch (dsa->depth.func) {
- case PIPE_FUNC_EQUAL:
- /* zmask = (ifragZ == ref) */
- spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
- /* mask = (mask & zmask) */
- spe_and(f, mask_reg, mask_reg, zmask_reg);
- break;
-
- case PIPE_FUNC_NOTEQUAL:
- /* zmask = (ifragZ == ref) */
- spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
- /* mask = (mask & ~zmask) */
- spe_andc(f, mask_reg, mask_reg, zmask_reg);
- break;
-
- case PIPE_FUNC_GREATER:
- /* zmask = (ifragZ > ref) */
- spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
- /* mask = (mask & zmask) */
- spe_and(f, mask_reg, mask_reg, zmask_reg);
- break;
-
- case PIPE_FUNC_LESS:
- /* zmask = (ref > ifragZ) */
- spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
- /* mask = (mask & zmask) */
- spe_and(f, mask_reg, mask_reg, zmask_reg);
- break;
-
- case PIPE_FUNC_LEQUAL:
- /* zmask = (ifragZ > ref) */
- spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
- /* mask = (mask & ~zmask) */
- spe_andc(f, mask_reg, mask_reg, zmask_reg);
- break;
-
- case PIPE_FUNC_GEQUAL:
- /* zmask = (ref > ifragZ) */
- spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
- /* mask = (mask & ~zmask) */
- spe_andc(f, mask_reg, mask_reg, zmask_reg);
- break;
-
- case PIPE_FUNC_NEVER:
- spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
- spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
- break;
-
- case PIPE_FUNC_ALWAYS:
- /* mask unchanged */
- spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
- break;
-
- default:
- ASSERT(0);
- break;
- }
-
- if (dsa->depth.writemask) {
- /*
- * If (ztest passed) {
- * framebufferZ = fragmentZ;
- * }
- * OR,
- * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
- */
- spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
- return TRUE;
- }
-
- return FALSE;
-}
-
-
-/**
- * Generate SPE code to perform alpha testing.
- *
- * \param dsa Gallium depth/stencil/alpha state to gen code for
- * \param f SPE function to append instruction onto.
- * \param mask_reg register containing quad/pixel "alive" mask (in/out)
- * \param fragA_reg register containing four fragment alpha values (in)
- */
-static void
-gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
- struct spe_function *f, int mask_reg, int fragA_reg)
-{
- int ref_reg = spe_allocate_available_register(f);
- int amask_reg = spe_allocate_available_register(f);
-
- ASSERT(dsa->alpha.enabled);
-
- if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
- (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
- /* load/splat the alpha reference float value */
- spe_load_float(f, ref_reg, dsa->alpha.ref_value);
- }
-
- /* emit code to do the alpha comparison, updating 'mask' */
- switch (dsa->alpha.func) {
- case PIPE_FUNC_EQUAL:
- /* amask = (fragA == ref) */
- spe_fceq(f, amask_reg, fragA_reg, ref_reg);
- /* mask = (mask & amask) */
- spe_and(f, mask_reg, mask_reg, amask_reg);
- break;
-
- case PIPE_FUNC_NOTEQUAL:
- /* amask = (fragA == ref) */
- spe_fceq(f, amask_reg, fragA_reg, ref_reg);
- /* mask = (mask & ~amask) */
- spe_andc(f, mask_reg, mask_reg, amask_reg);
- break;
-
- case PIPE_FUNC_GREATER:
- /* amask = (fragA > ref) */
- spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
- /* mask = (mask & amask) */
- spe_and(f, mask_reg, mask_reg, amask_reg);
- break;
-
- case PIPE_FUNC_LESS:
- /* amask = (ref > fragA) */
- spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
- /* mask = (mask & amask) */
- spe_and(f, mask_reg, mask_reg, amask_reg);
- break;
-
- case PIPE_FUNC_LEQUAL:
- /* amask = (fragA > ref) */
- spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
- /* mask = (mask & ~amask) */
- spe_andc(f, mask_reg, mask_reg, amask_reg);
- break;
-
- case PIPE_FUNC_GEQUAL:
- /* amask = (ref > fragA) */
- spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
- /* mask = (mask & ~amask) */
- spe_andc(f, mask_reg, mask_reg, amask_reg);
- break;
-
- case PIPE_FUNC_NEVER:
- spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
- break;
-
- case PIPE_FUNC_ALWAYS:
- /* no-op, mask unchanged */
- break;
-
- default:
- ASSERT(0);
- break;
- }
-
-#if OPTIMIZATIONS
- /* if mask == {0,0,0,0} we're all done, return */
- {
- /* re-use amask reg here */
- int tmp_reg = amask_reg;
- /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
- spe_orx(f, tmp_reg, mask_reg);
- /* if tmp[0] == 0 then return from function call */
- spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
- }
-#endif
-
- spe_release_register(f, ref_reg);
- spe_release_register(f, amask_reg);
-}
-
-
-/**
- * This pair of functions is used inline to allocate and deallocate
- * optional constant registers. Once a constant is discovered to be
- * needed, we will likely need it again, so we don't want to deallocate
- * it and have to allocate and load it again unnecessarily.
- */
-static INLINE void
-setup_optional_register(struct spe_function *f,
- int *r)
-{
- if (*r < 0)
- *r = spe_allocate_available_register(f);
-}
-
-static INLINE void
-release_optional_register(struct spe_function *f,
- int r)
-{
- if (r >= 0)
- spe_release_register(f, r);
-}
-
-static INLINE void
-setup_const_register(struct spe_function *f,
- int *r,
- float value)
-{
- if (*r >= 0)
- return;
- setup_optional_register(f, r);
- spe_load_float(f, *r, value);
-}
-
-static INLINE void
-release_const_register(struct spe_function *f,
- int r)
-{
- release_optional_register(f, r);
-}
-
-
-
-/**
- * Unpack/convert framebuffer colors from four 32-bit packed colors
- * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
- * Each 8-bit color component is expanded into a float in [0.0, 1.0].
- */
-static void
-unpack_colors(struct spe_function *f,
- enum pipe_format color_format,
- int fbRGBA_reg,
- int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg)
-{
- int mask0_reg = spe_allocate_available_register(f);
- int mask1_reg = spe_allocate_available_register(f);
- int mask2_reg = spe_allocate_available_register(f);
- int mask3_reg = spe_allocate_available_register(f);
-
- spe_load_int(f, mask0_reg, 0xff);
- spe_load_int(f, mask1_reg, 0xff00);
- spe_load_int(f, mask2_reg, 0xff0000);
- spe_load_int(f, mask3_reg, 0xff000000);
-
- spe_comment(f, 0, "Unpack framebuffer colors, convert to floats");
-
- switch (color_format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- /* fbB = fbRGBA & mask */
- spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg);
-
- /* fbG = fbRGBA & mask */
- spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg);
-
- /* fbR = fbRGBA & mask */
- spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg);
-
- /* fbA = fbRGBA & mask */
- spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg);
-
- /* fbG = fbG >> 8 */
- spe_roti(f, fbG_reg, fbG_reg, -8);
-
- /* fbR = fbR >> 16 */
- spe_roti(f, fbR_reg, fbR_reg, -16);
-
- /* fbA = fbA >> 24 */
- spe_roti(f, fbA_reg, fbA_reg, -24);
- break;
-
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- /* fbA = fbRGBA & mask */
- spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg);
-
- /* fbR = fbRGBA & mask */
- spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg);
-
- /* fbG = fbRGBA & mask */
- spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg);
-
- /* fbB = fbRGBA & mask */
- spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg);
-
- /* fbR = fbR >> 8 */
- spe_roti(f, fbR_reg, fbR_reg, -8);
-
- /* fbG = fbG >> 16 */
- spe_roti(f, fbG_reg, fbG_reg, -16);
-
- /* fbB = fbB >> 24 */
- spe_roti(f, fbB_reg, fbB_reg, -24);
- break;
-
- default:
- ASSERT(0);
- }
-
- /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
- spe_cuflt(f, fbR_reg, fbR_reg, 8);
- spe_cuflt(f, fbG_reg, fbG_reg, 8);
- spe_cuflt(f, fbB_reg, fbB_reg, 8);
- spe_cuflt(f, fbA_reg, fbA_reg, 8);
-
- spe_release_register(f, mask0_reg);
- spe_release_register(f, mask1_reg);
- spe_release_register(f, mask2_reg);
- spe_release_register(f, mask3_reg);
-}
-
-
-/**
- * Generate SPE code to implement the given blend mode for a quad of pixels.
- * \param f SPE function to append instruction onto.
- * \param fragR_reg register with fragment red values (float) (in/out)
- * \param fragG_reg register with fragment green values (float) (in/out)
- * \param fragB_reg register with fragment blue values (float) (in/out)
- * \param fragA_reg register with fragment alpha values (float) (in/out)
- * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
- */
-static void
-gen_blend(const struct pipe_blend_state *blend,
- const struct pipe_blend_color *blend_color,
- struct spe_function *f,
- enum pipe_format color_format,
- int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
- int fbRGBA_reg)
-{
- int term1R_reg = spe_allocate_available_register(f);
- int term1G_reg = spe_allocate_available_register(f);
- int term1B_reg = spe_allocate_available_register(f);
- int term1A_reg = spe_allocate_available_register(f);
-
- int term2R_reg = spe_allocate_available_register(f);
- int term2G_reg = spe_allocate_available_register(f);
- int term2B_reg = spe_allocate_available_register(f);
- int term2A_reg = spe_allocate_available_register(f);
-
- int fbR_reg = spe_allocate_available_register(f);
- int fbG_reg = spe_allocate_available_register(f);
- int fbB_reg = spe_allocate_available_register(f);
- int fbA_reg = spe_allocate_available_register(f);
-
- int tmp_reg = spe_allocate_available_register(f);
-
- /* Optional constant registers we might or might not end up using;
- * if we do use them, make sure we only allocate them once by
- * keeping a flag on each one.
- */
- int one_reg = -1;
- int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1;
-
- ASSERT(blend->rt[0].blend_enable);
-
- /* packed RGBA -> float colors */
- unpack_colors(f, color_format, fbRGBA_reg,
- fbR_reg, fbG_reg, fbB_reg, fbA_reg);
-
- /*
- * Compute Src RGB terms. We're actually looking for the value
- * of (the appropriate RGB factors) * (the incoming source RGB color),
- * because in some cases (like PIPE_BLENDFACTOR_ONE and
- * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math.
- */
- switch (blend->rt[0].rgb_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* factors = (1,1,1), so term = (R,G,B) */
- spe_move(f, term1R_reg, fragR_reg);
- spe_move(f, term1G_reg, fragG_reg);
- spe_move(f, term1B_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_ZERO:
- /* factors = (0,0,0), so term = (0,0,0) */
- spe_load_float(f, term1R_reg, 0.0f);
- spe_load_float(f, term1G_reg, 0.0f);
- spe_load_float(f, term1B_reg, 0.0f);
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
- spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
- spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
- spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
- spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
- spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
- spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B))
- * or in other words term = (R-R*R, G-G*G, B-B*B)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg);
- spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg);
- spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
- spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
- spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
- spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb))
- * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg);
- spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg);
- spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A))
- * or term = (R-R*A,G-G*A,B-B*A)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg);
- spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg);
- spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
- spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
- spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
- spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
- break;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb))
- * or term = (R-R*Afb,G-G*Afb,b-B*Afb)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg);
- spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg);
- spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* We need the optional constant color registers */
- setup_const_register(f, &constR_reg, blend_color->color[0]);
- setup_const_register(f, &constG_reg, blend_color->color[1]);
- setup_const_register(f, &constB_reg, blend_color->color[2]);
- /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
- spe_fm(f, term1R_reg, fragR_reg, constR_reg);
- spe_fm(f, term1G_reg, fragG_reg, constG_reg);
- spe_fm(f, term1B_reg, fragB_reg, constB_reg);
- break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- /* we'll need the optional constant alpha register */
- setup_const_register(f, &constA_reg, blend_color->color[3]);
- /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
- spe_fm(f, term1R_reg, fragR_reg, constA_reg);
- spe_fm(f, term1G_reg, fragG_reg, constA_reg);
- spe_fm(f, term1B_reg, fragB_reg, constA_reg);
- break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* We need the optional constant color registers */
- setup_const_register(f, &constR_reg, blend_color->color[0]);
- setup_const_register(f, &constG_reg, blend_color->color[1]);
- setup_const_register(f, &constB_reg, blend_color->color[2]);
- /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc))
- * or term = (R-R*Rc, G-G*Gc, B-B*Bc)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg);
- spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg);
- spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- /* We need the optional constant color registers */
- setup_const_register(f, &constR_reg, blend_color->color[0]);
- setup_const_register(f, &constG_reg, blend_color->color[1]);
- setup_const_register(f, &constB_reg, blend_color->color[2]);
- /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac))
- * or term = (R-R*Ac,G-G*Ac,B-B*Ac)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg);
- spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg);
- spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- /* We'll need the optional {1,1,1,1} register */
- setup_const_register(f, &one_reg, 1.0f);
- /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
- * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
- * We could expand the term (as a*min(b,c) == min(a*b,a*c)
- * as long as a is positive), but then we'd have to do three
- * spe_float_min() functions instead of one, so this is simpler.
- */
- /* tmp = 1 - Afb */
- spe_fs(f, tmp_reg, one_reg, fbA_reg);
- /* tmp = min(A,tmp) */
- spe_float_min(f, tmp_reg, fragA_reg, tmp_reg);
- /* term = R*tmp */
- spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
- spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
- spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
- break;
-
- /* These are special D3D cases involving a second color output
- * from the fragment shader. I'm not sure we can support them
- * yet... XXX
- */
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-
- default:
- ASSERT(0);
- }
-
- /*
- * Compute Src Alpha term. Like the above, we're looking for
- * the full term A*factor, not just the factor itself, because
- * in many cases we can avoid doing unnecessary multiplies.
- */
- switch (blend->rt[0].alpha_src_factor) {
- case PIPE_BLENDFACTOR_ZERO:
- /* factor = 0, so term = 0 */
- spe_load_float(f, term1A_reg, 0.0f);
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */
- case PIPE_BLENDFACTOR_ONE:
- /* factor = 1, so term = A */
- spe_move(f, term1A_reg, fragA_reg);
- break;
-
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* factor = A, so term = A*A */
- spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
- break;
-
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* factor = 1-A, so term = A*(1-A) = A-A*A */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg);
- break;
-
- case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* factor = Afb, so term = A*Afb */
- spe_fm(f, term1A_reg, fragA_reg, fbA_reg);
- break;
-
- case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg);
- break;
-
- case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* We need the optional constA_reg register */
- setup_const_register(f, &constA_reg, blend_color->color[3]);
- /* factor = Ac, so term = A*Ac */
- spe_fm(f, term1A_reg, fragA_reg, constA_reg);
- break;
-
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* We need the optional constA_reg register */
- setup_const_register(f, &constA_reg, blend_color->color[3]);
- /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg);
- break;
-
- /* These are special D3D cases involving a second color output
- * from the fragment shader. I'm not sure we can support them
- * yet... XXX
- */
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- default:
- ASSERT(0);
- }
-
- /*
- * Compute Dest RGB term. Like the above, we're looking for
- * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because
- * in many cases we can avoid doing unnecessary multiplies.
- */
- switch (blend->rt[0].rgb_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */
- spe_move(f, term2R_reg, fbR_reg);
- spe_move(f, term2G_reg, fbG_reg);
- spe_move(f, term2B_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_ZERO:
- /* factor s= (0,0,0), so term = (0,0,0) */
- spe_load_float(f, term2R_reg, 0.0f);
- spe_load_float(f, term2G_reg, 0.0f);
- spe_load_float(f, term2B_reg, 0.0f);
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */
- spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
- spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
- spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B))
- * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg);
- spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg);
- spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */
- spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
- spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
- spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
- spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
- spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */
- spe_fm(f, term2R_reg, fbR_reg, fbR_reg);
- spe_fm(f, term2G_reg, fbG_reg, fbG_reg);
- spe_fm(f, term2B_reg, fbB_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb))
- * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg);
- spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg);
- spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg);
- break;
-
- case PIPE_BLENDFACTOR_DST_ALPHA:
- /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */
- spe_fm(f, term2R_reg, fbR_reg, fbA_reg);
- spe_fm(f, term2G_reg, fbG_reg, fbA_reg);
- spe_fm(f, term2B_reg, fbB_reg, fbA_reg);
- break;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb))
- * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg);
- spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg);
- spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* We need the optional constant color registers */
- setup_const_register(f, &constR_reg, blend_color->color[0]);
- setup_const_register(f, &constG_reg, blend_color->color[1]);
- setup_const_register(f, &constB_reg, blend_color->color[2]);
- /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */
- spe_fm(f, term2R_reg, fbR_reg, constR_reg);
- spe_fm(f, term2G_reg, fbG_reg, constG_reg);
- spe_fm(f, term2B_reg, fbB_reg, constB_reg);
- break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- /* we'll need the optional constant alpha register */
- setup_const_register(f, &constA_reg, blend_color->color[3]);
- /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */
- spe_fm(f, term2R_reg, fbR_reg, constA_reg);
- spe_fm(f, term2G_reg, fbG_reg, constA_reg);
- spe_fm(f, term2B_reg, fbB_reg, constA_reg);
- break;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* We need the optional constant color registers */
- setup_const_register(f, &constR_reg, blend_color->color[0]);
- setup_const_register(f, &constG_reg, blend_color->color[1]);
- setup_const_register(f, &constB_reg, blend_color->color[2]);
- /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc))
- * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg);
- spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg);
- spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- /* We need the optional constant color registers */
- setup_const_register(f, &constR_reg, blend_color->color[0]);
- setup_const_register(f, &constG_reg, blend_color->color[1]);
- setup_const_register(f, &constB_reg, blend_color->color[2]);
- /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac))
- * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac)
- * fnms(a,b,c,d) computes a = d - b*c
- */
- spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg);
- spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg);
- spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */
- ASSERT(0);
- break;
-
- /* These are special D3D cases involving a second color output
- * from the fragment shader. I'm not sure we can support them
- * yet... XXX
- */
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
-
- default:
- ASSERT(0);
- }
-
- /*
- * Compute Dest Alpha term. Like the above, we're looking for
- * the full term Afb*factor, not just the factor itself, because
- * in many cases we can avoid doing unnecessary multiplies.
- */
- switch (blend->rt[0].alpha_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- /* factor = 1, so term = Afb */
- spe_move(f, term2A_reg, fbA_reg);
- break;
- case PIPE_BLENDFACTOR_ZERO:
- /* factor = 0, so term = 0 */
- spe_load_float(f, term2A_reg, 0.0f);
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_SRC_COLOR:
- /* factor = A, so term = Afb*A */
- spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
- break;
-
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
- break;
-
- case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* factor = Afb, so term = Afb*Afb */
- spe_fm(f, term2A_reg, fbA_reg, fbA_reg);
- break;
-
- case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg);
- break;
-
- case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* We need the optional constA_reg register */
- setup_const_register(f, &constA_reg, blend_color->color[3]);
- /* factor = Ac, so term = Afb*Ac */
- spe_fm(f, term2A_reg, fbA_reg, constA_reg);
- break;
-
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- /* We need the optional constA_reg register */
- setup_const_register(f, &constA_reg, blend_color->color[3]);
- /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */
- /* fnms(a,b,c,d) computes a = d - b*c */
- spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg);
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */
- ASSERT(0);
- break;
-
- /* These are special D3D cases involving a second color output
- * from the fragment shader. I'm not sure we can support them
- * yet... XXX
- */
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- default:
- ASSERT(0);
- }
-
- /*
- * Combine Src/Dest RGB terms as per the blend equation.
- */
- switch (blend->rt[0].rgb_func) {
- case PIPE_BLEND_ADD:
- spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
- spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
- spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
- break;
- case PIPE_BLEND_SUBTRACT:
- spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
- spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
- spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
- spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
- spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
- break;
- case PIPE_BLEND_MIN:
- spe_float_min(f, fragR_reg, term1R_reg, term2R_reg);
- spe_float_min(f, fragG_reg, term1G_reg, term2G_reg);
- spe_float_min(f, fragB_reg, term1B_reg, term2B_reg);
- break;
- case PIPE_BLEND_MAX:
- spe_float_max(f, fragR_reg, term1R_reg, term2R_reg);
- spe_float_max(f, fragG_reg, term1G_reg, term2G_reg);
- spe_float_max(f, fragB_reg, term1B_reg, term2B_reg);
- break;
- default:
- ASSERT(0);
- }
-
- /*
- * Combine Src/Dest A term
- */
- switch (blend->rt[0].alpha_func) {
- case PIPE_BLEND_ADD:
- spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
- break;
- case PIPE_BLEND_SUBTRACT:
- spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
- break;
- case PIPE_BLEND_MIN:
- spe_float_min(f, fragA_reg, term1A_reg, term2A_reg);
- break;
- case PIPE_BLEND_MAX:
- spe_float_max(f, fragA_reg, term1A_reg, term2A_reg);
- break;
- default:
- ASSERT(0);
- }
-
- spe_release_register(f, term1R_reg);
- spe_release_register(f, term1G_reg);
- spe_release_register(f, term1B_reg);
- spe_release_register(f, term1A_reg);
-
- spe_release_register(f, term2R_reg);
- spe_release_register(f, term2G_reg);
- spe_release_register(f, term2B_reg);
- spe_release_register(f, term2A_reg);
-
- spe_release_register(f, fbR_reg);
- spe_release_register(f, fbG_reg);
- spe_release_register(f, fbB_reg);
- spe_release_register(f, fbA_reg);
-
- spe_release_register(f, tmp_reg);
-
- /* Free any optional registers that actually got used */
- release_const_register(f, one_reg);
- release_const_register(f, constR_reg);
- release_const_register(f, constG_reg);
- release_const_register(f, constB_reg);
- release_const_register(f, constA_reg);
-}
-
-
-static void
-gen_logicop(const struct pipe_blend_state *blend,
- struct spe_function *f,
- int fragRGBA_reg, int fbRGBA_reg)
-{
- /* We've got four 32-bit RGBA packed pixels in each of
- * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
- * reds, greens, blues, and alphas.
- * */
- ASSERT(blend->logicop_enable);
-
- switch(blend->logicop_func) {
- case PIPE_LOGICOP_CLEAR: /* 0 */
- spe_zero(f, fragRGBA_reg);
- break;
- case PIPE_LOGICOP_NOR: /* ~(s | d) */
- spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */
- /* andc R, A, B computes R = A & ~B */
- spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
- break;
- case PIPE_LOGICOP_COPY_INVERTED: /* ~s */
- spe_complement(f, fragRGBA_reg, fragRGBA_reg);
- break;
- case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */
- /* andc R, A, B computes R = A & ~B */
- spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_INVERT: /* ~d */
- /* Note that (A nor A) == ~(A|A) == ~A */
- spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_XOR: /* s ^ d */
- spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_NAND: /* ~(s & d) */
- spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_AND: /* s & d */
- spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */
- spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- spe_complement(f, fragRGBA_reg, fragRGBA_reg);
- break;
- case PIPE_LOGICOP_NOOP: /* d */
- spe_move(f, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */
- /* orc R, A, B computes R = A | ~B */
- spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg);
- break;
- case PIPE_LOGICOP_COPY: /* s */
- break;
- case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */
- /* orc R, A, B computes R = A | ~B */
- spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_OR: /* s | d */
- spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg);
- break;
- case PIPE_LOGICOP_SET: /* 1 */
- spe_load_int(f, fragRGBA_reg, 0xffffffff);
- break;
- default:
- ASSERT(0);
- }
-}
-
-
-/**
- * Generate code to pack a quad of float colors into four 32-bit integers.
- *
- * \param f SPE function to append instruction onto.
- * \param color_format the dest color packing format
- * \param r_reg register containing four red values (in/clobbered)
- * \param g_reg register containing four green values (in/clobbered)
- * \param b_reg register containing four blue values (in/clobbered)
- * \param a_reg register containing four alpha values (in/clobbered)
- * \param rgba_reg register to store the packed RGBA colors (out)
- */
-static void
-gen_pack_colors(struct spe_function *f,
- enum pipe_format color_format,
- int r_reg, int g_reg, int b_reg, int a_reg,
- int rgba_reg)
-{
- int rg_reg = spe_allocate_available_register(f);
- int ba_reg = spe_allocate_available_register(f);
-
- /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
- spe_cfltu(f, r_reg, r_reg, 32);
- spe_cfltu(f, g_reg, g_reg, 32);
- spe_cfltu(f, b_reg, b_reg, 32);
- spe_cfltu(f, a_reg, a_reg, 32);
-
- /* Shift the most significant bytes to the least significant positions.
- * I.e.: reg = reg >> 24
- */
- spe_rotmi(f, r_reg, r_reg, -24);
- spe_rotmi(f, g_reg, g_reg, -24);
- spe_rotmi(f, b_reg, b_reg, -24);
- spe_rotmi(f, a_reg, a_reg, -24);
-
- /* Shift the color bytes according to the surface format */
- if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
- spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
- spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
- spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
- }
- else if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
- spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
- spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
- spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
- }
- else {
- ASSERT(0);
- }
-
- /* Merge red, green, blue, alpha registers to make packed RGBA colors.
- * Eg: after shifting according to color_format we might have:
- * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
- * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
- * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
- * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
- * OR-ing all those together gives us four packed colors:
- * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
- */
- spe_or(f, rg_reg, r_reg, g_reg);
- spe_or(f, ba_reg, a_reg, b_reg);
- spe_or(f, rgba_reg, rg_reg, ba_reg);
-
- spe_release_register(f, rg_reg);
- spe_release_register(f, ba_reg);
-}
-
-
-static void
-gen_colormask(struct spe_function *f,
- uint colormask,
- enum pipe_format color_format,
- int fragRGBA_reg, int fbRGBA_reg)
-{
- /* We've got four 32-bit RGBA packed pixels in each of
- * fragRGBA_reg and fbRGBA_reg, not sets of floating-point
- * reds, greens, blues, and alphas. Further, the pixels
- * are packed according to the given color format, not
- * necessarily RGBA...
- */
- uint r_mask;
- uint g_mask;
- uint b_mask;
- uint a_mask;
-
- /* Calculate exactly where the bits for any particular color
- * end up, so we can mask them correctly.
- */
- switch(color_format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- /* ARGB */
- a_mask = 0xff000000;
- r_mask = 0x00ff0000;
- g_mask = 0x0000ff00;
- b_mask = 0x000000ff;
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- /* BGRA */
- b_mask = 0xff000000;
- g_mask = 0x00ff0000;
- r_mask = 0x0000ff00;
- a_mask = 0x000000ff;
- break;
- default:
- ASSERT(0);
- }
-
- /* For each R, G, B, and A component we're supposed to mask out,
- * clear its bits. Then our mask operation later will work
- * as expected.
- */
- if (!(colormask & PIPE_MASK_R)) {
- r_mask = 0;
- }
- if (!(colormask & PIPE_MASK_G)) {
- g_mask = 0;
- }
- if (!(colormask & PIPE_MASK_B)) {
- b_mask = 0;
- }
- if (!(colormask & PIPE_MASK_A)) {
- a_mask = 0;
- }
-
- /* Get a temporary register to hold the mask that will be applied
- * to the fragment
- */
- int colormask_reg = spe_allocate_available_register(f);
-
- /* The actual mask we're going to use is an OR of the remaining R, G, B,
- * and A masks. Load the result value into our temporary register.
- */
- spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask);
-
- /* Use the mask register to select between the fragment color
- * values and the frame buffer color values. Wherever the
- * mask has a 0 bit, the current frame buffer color should override
- * the fragment color. Wherever the mask has a 1 bit, the
- * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM)
- * instruction will select bits from its first operand rA wherever the
- * the mask bits rM are 0, and from its second operand rB wherever the
- * mask bits rM are 1. That means that the frame buffer color is the
- * first operand, and the fragment color the second.
- */
- spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg);
-
- /* Release the temporary register and we're done */
- spe_release_register(f, colormask_reg);
-}
-
-
-/**
- * This function is annoyingly similar to gen_depth_test(), above, except
- * that instead of comparing two varying values (i.e. fragment and buffer),
- * we're comparing a varying value with a static value. As such, we have
- * access to the Compare Immediate instructions where we don't in
- * gen_depth_test(), which is what makes us very different.
- *
- * There's some added complexity if there's a non-trivial state->mask
- * value; then stencil and reference both must be masked
- *
- * The return value in the stencil_pass_reg is a bitmask of valid
- * fragments that also passed the stencil test. The bitmask of valid
- * fragments that failed would be found in
- * (fragment_mask_reg & ~stencil_pass_reg).
- */
-static void
-gen_stencil_test(struct spe_function *f,
- const struct pipe_stencil_state *state,
- const unsigned ref_value,
- uint stencil_max_value,
- int fragment_mask_reg,
- int fbS_reg,
- int stencil_pass_reg)
-{
- /* Generate code that puts the set of passing fragments into the
- * stencil_pass_reg register, taking into account whether each fragment
- * was active to begin with.
- */
- switch (state->func) {
- case PIPE_FUNC_EQUAL:
- if (state->valuemask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (s == reference) */
- spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value);
- spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- }
- else {
- /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */
- uint tmp_masked_stencil = spe_allocate_available_register(f);
- spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
- spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
- state->valuemask & ref_value);
- spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_masked_stencil);
- }
- break;
-
- case PIPE_FUNC_NOTEQUAL:
- if (state->valuemask == stencil_max_value) {
- /* stencil_pass = fragment_mask & ~(s == reference) */
- spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, ref_value);
- spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- }
- else {
- /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */
- int tmp_masked_stencil = spe_allocate_available_register(f);
- spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
- spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil,
- state->valuemask & ref_value);
- spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_masked_stencil);
- }
- break;
-
- case PIPE_FUNC_LESS:
- if (state->valuemask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (reference < s) */
- spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, ref_value);
- spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- }
- else {
- /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */
- int tmp_masked_stencil = spe_allocate_available_register(f);
- spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
- spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
- state->valuemask & ref_value);
- spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_masked_stencil);
- }
- break;
-
- case PIPE_FUNC_GREATER:
- if (state->valuemask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (reference > s) */
- /* There's no convenient Compare Less Than Immediate instruction, so
- * we'll have to do this one the harder way, by loading a register and
- * comparing directly. Compare Logical Greater Than Word (clgt)
- * treats its operands as unsigned - no sign extension.
- */
- int tmp_reg = spe_allocate_available_register(f);
- spe_load_uint(f, tmp_reg, ref_value);
- spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
- spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_reg);
- }
- else {
- /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */
- int tmp_reg = spe_allocate_available_register(f);
- int tmp_masked_stencil = spe_allocate_available_register(f);
- spe_load_uint(f, tmp_reg, state->valuemask & ref_value);
- spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
- spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
- spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_reg);
- spe_release_register(f, tmp_masked_stencil);
- }
- break;
-
- case PIPE_FUNC_GEQUAL:
- if (state->valuemask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (reference >= s)
- * = fragment_mask & ~(s > reference) */
- spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg,
- ref_value);
- spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- }
- else {
- /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */
- int tmp_masked_stencil = spe_allocate_available_register(f);
- spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
- spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil,
- state->valuemask & ref_value);
- spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_masked_stencil);
- }
- break;
-
- case PIPE_FUNC_LEQUAL:
- if (state->valuemask == stencil_max_value) {
- /* stencil_pass = fragment_mask & (reference <= s) ]
- * = fragment_mask & ~(reference > s) */
- /* As above, we have to do this by loading a register */
- int tmp_reg = spe_allocate_available_register(f);
- spe_load_uint(f, tmp_reg, ref_value);
- spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg);
- spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_reg);
- }
- else {
- /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */
- int tmp_reg = spe_allocate_available_register(f);
- int tmp_masked_stencil = spe_allocate_available_register(f);
- spe_load_uint(f, tmp_reg, ref_value & state->valuemask);
- spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask);
- spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil);
- spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg);
- spe_release_register(f, tmp_reg);
- spe_release_register(f, tmp_masked_stencil);
- }
- break;
-
- case PIPE_FUNC_NEVER:
- /* stencil_pass = fragment_mask & 0 = 0 */
- spe_load_uint(f, stencil_pass_reg, 0);
- break;
-
- case PIPE_FUNC_ALWAYS:
- /* stencil_pass = fragment_mask & 1 = fragment_mask */
- spe_move(f, stencil_pass_reg, fragment_mask_reg);
- break;
- }
-
- /* The fragments that passed the stencil test are now in stencil_pass_reg.
- * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg).
- */
-}
-
-
-/**
- * This function generates code that calculates a set of new stencil values
- * given the earlier values and the operation to apply. It does not
- * apply any tests. It is intended to be called up to 3 times
- * (for the stencil fail operation, for the stencil pass-z fail operation,
- * and for the stencil pass-z pass operation) to collect up to three
- * possible sets of values, and for the caller to combine them based
- * on the result of the tests.
- *
- * stencil_max_value should be (2^n - 1) where n is the number of bits
- * in the stencil buffer - in other words, it should be usable as a mask.
- */
-static void
-gen_stencil_values(struct spe_function *f,
- uint stencil_op,
- uint stencil_ref_value,
- uint stencil_max_value,
- int fbS_reg,
- int newS_reg)
-{
- /* The code below assumes that newS_reg and fbS_reg are not the same
- * register; if they can be, the calculations below will have to use
- * an additional temporary register. For now, mark the assumption
- * with an assertion that will fail if they are the same.
- */
- ASSERT(fbS_reg != newS_reg);
-
- /* The code also assumes that the stencil_max_value is of the form
- * 2^n-1 and can therefore be used as a mask for the valid bits in
- * addition to a maximum. Make sure this is the case as well.
- * The clever math below exploits the fact that incrementing a
- * binary number serves to flip all the bits of a number starting at
- * the LSB and continuing to (and including) the first zero bit
- * found. That means that a number and its increment will always
- * have at least one bit in common (the high order bit, if nothing
- * else) *unless* the number is zero, *or* the number is of a form
- * consisting of some number of 1s in the low-order bits followed
- * by nothing but 0s in the high-order bits. The latter case
- * implies it's of the form 2^n-1.
- */
- ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0);
-
- switch(stencil_op) {
- case PIPE_STENCIL_OP_KEEP:
- /* newS = S */
- spe_move(f, newS_reg, fbS_reg);
- break;
-
- case PIPE_STENCIL_OP_ZERO:
- /* newS = 0 */
- spe_zero(f, newS_reg);
- break;
-
- case PIPE_STENCIL_OP_REPLACE:
- /* newS = stencil reference value */
- spe_load_uint(f, newS_reg, stencil_ref_value);
- break;
-
- case PIPE_STENCIL_OP_INCR: {
- /* newS = (s == max ? max : s + 1) */
- int equals_reg = spe_allocate_available_register(f);
-
- spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value);
- /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
- spe_ai(f, newS_reg, fbS_reg, 1);
- /* Select from the current value or the new value based on the equality test */
- spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
-
- spe_release_register(f, equals_reg);
- break;
- }
- case PIPE_STENCIL_OP_DECR: {
- /* newS = (s == 0 ? 0 : s - 1) */
- int equals_reg = spe_allocate_available_register(f);
-
- spe_compare_equal_uint(f, equals_reg, fbS_reg, 0);
- /* Add Word Immediate with a (-1) value works */
- spe_ai(f, newS_reg, fbS_reg, -1);
- /* Select from the current value or the new value based on the equality test */
- spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
-
- spe_release_register(f, equals_reg);
- break;
- }
- case PIPE_STENCIL_OP_INCR_WRAP:
- /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can
- * do a normal add and mask off the correct bits
- */
- spe_ai(f, newS_reg, fbS_reg, 1);
- spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
- break;
-
- case PIPE_STENCIL_OP_DECR_WRAP:
- /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */
- spe_ai(f, newS_reg, fbS_reg, -1);
- spe_and_uint(f, newS_reg, newS_reg, stencil_max_value);
- break;
-
- case PIPE_STENCIL_OP_INVERT:
- /* newS = ~s. We take advantage of the mask/max value to invert only
- * the valid bits for the field so we don't have to do an extra "and".
- */
- spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value);
- break;
-
- default:
- ASSERT(0);
- }
-}
-
-
-/**
- * This function generates code to get all the necessary possible
- * stencil values. For each of the output registers (fail_reg,
- * zfail_reg, and zpass_reg), it either allocates a new register
- * and calculates a new set of values based on the stencil operation,
- * or it reuses a register allocation and calculation done for an
- * earlier (matching) operation, or it reuses the fbS_reg register
- * (if the stencil operation is KEEP, which doesn't change the
- * stencil buffer).
- *
- * Since this function allocates a variable number of registers,
- * to avoid incurring complex logic to free them, they should
- * be allocated after a spe_allocate_register_set() call
- * and released by the corresponding spe_release_register_set() call.
- */
-static void
-gen_get_stencil_values(struct spe_function *f,
- const struct pipe_stencil_state *stencil,
- const unsigned ref_value,
- const uint depth_enabled,
- int fbS_reg,
- int *fail_reg,
- int *zfail_reg,
- int *zpass_reg)
-{
- uint zfail_op;
-
- /* Stenciling had better be enabled here */
- ASSERT(stencil->enabled);
-
- /* If the depth test is not enabled, it is treated as though it always
- * passes, which means that the zfail_op is not considered - a
- * failing stencil test triggers the fail_op, and a passing one
- * triggers the zpass_op
- *
- * As an optimization, override calculation of the zfail_op values
- * if they aren't going to be used. By setting the value of
- * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed
- * to match the incoming stencil values, and no calculation will
- * be done.
- */
- if (depth_enabled) {
- zfail_op = stencil->zfail_op;
- }
- else {
- zfail_op = PIPE_STENCIL_OP_KEEP;
- }
-
- /* One-sided or front-facing stencil */
- if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) {
- *fail_reg = fbS_reg;
- }
- else {
- *fail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, stencil->fail_op, ref_value,
- 0xff, fbS_reg, *fail_reg);
- }
-
- /* Check the possibly overridden value, not the structure value */
- if (zfail_op == PIPE_STENCIL_OP_KEEP) {
- *zfail_reg = fbS_reg;
- }
- else if (zfail_op == stencil->fail_op) {
- *zfail_reg = *fail_reg;
- }
- else {
- *zfail_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, stencil->zfail_op, ref_value,
- 0xff, fbS_reg, *zfail_reg);
- }
-
- if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
- *zpass_reg = fbS_reg;
- }
- else if (stencil->zpass_op == stencil->fail_op) {
- *zpass_reg = *fail_reg;
- }
- else if (stencil->zpass_op == zfail_op) {
- *zpass_reg = *zfail_reg;
- }
- else {
- *zpass_reg = spe_allocate_available_register(f);
- gen_stencil_values(f, stencil->zpass_op, ref_value,
- 0xff, fbS_reg, *zpass_reg);
- }
-}
-
-/**
- * Note that fbZ_reg may *not* be set on entry, if in fact
- * the depth test is not enabled. This function must not use
- * the register if depth is not enabled.
- */
-static boolean
-gen_stencil_depth_test(struct spe_function *f,
- const struct pipe_depth_stencil_alpha_state *dsa,
- const struct pipe_stencil_ref *stencil_ref,
- const uint facing,
- const int mask_reg, const int fragZ_reg,
- const int fbZ_reg, const int fbS_reg)
-{
- /* True if we've generated code that could require writeback to the
- * depth and/or stencil buffers
- */
- boolean modified_buffers = FALSE;
-
- boolean need_to_calculate_stencil_values;
- boolean need_to_writemask_stencil_values;
-
- struct pipe_stencil_state *stencil;
-
- /* Registers. We may or may not actually allocate these, depending
- * on whether the state values indicate that we need them.
- */
- int stencil_pass_reg, stencil_fail_reg;
- int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values;
- int stencil_writemask_reg;
- int zmask_reg;
- int newS_reg;
- unsigned ref_value;
-
- /* Stenciling is quite complex: up to six different configurable stencil
- * operations/calculations can be required (three each for front-facing
- * and back-facing fragments). Many of those operations will likely
- * be identical, so there's good reason to try to avoid calculating
- * the same values more than once (which unfortunately makes the code less
- * straightforward).
- *
- * To make register management easier, we start a new
- * register set; we can release all the registers in the set at
- * once, and avoid having to keep track of exactly which registers
- * we allocate. We can still allocate and free registers as
- * desired (if we know we no longer need a register), but we don't
- * have to spend the complexity to track the more difficult variant
- * register usage scenarios.
- */
- spe_comment(f, 0, "Allocating stencil register set");
- spe_allocate_register_set(f);
-
- /* The facing we're given is the fragment facing; it doesn't
- * exactly match the stencil facing. If stencil is enabled,
- * but two-sided stencil is *not* enabled, we use the same
- * stencil settings for both front- and back-facing fragments.
- * We only use the "back-facing" stencil for backfacing fragments
- * if two-sided stenciling is enabled.
- */
- if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) {
- stencil = &dsa->stencil[1];
- ref_value = stencil_ref->ref_value[1];
- }
- else {
- stencil = &dsa->stencil[0];
- ref_value = stencil_ref->ref_value[0];
- }
-
- /* Calculate the writemask. If the writemask is trivial (either
- * all 0s, meaning that we don't need to calculate any stencil values
- * because they're not going to change the stencil anyway, or all 1s,
- * meaning that we have to calculate the stencil values but do not
- * need to mask them), we can avoid generating code. Don't forget
- * that we need to consider backfacing stencil, if enabled.
- *
- * Note that if the backface stencil is *not* enabled, the backface
- * stencil will have the same values as the frontface stencil.
- */
- if (stencil->fail_op == PIPE_STENCIL_OP_KEEP &&
- stencil->zfail_op == PIPE_STENCIL_OP_KEEP &&
- stencil->zpass_op == PIPE_STENCIL_OP_KEEP) {
- need_to_calculate_stencil_values = FALSE;
- need_to_writemask_stencil_values = FALSE;
- }
- else if (stencil->writemask == 0x0) {
- /* All changes are writemasked out, so no need to calculate
- * what those changes might be, and no need to write anything back.
- */
- need_to_calculate_stencil_values = FALSE;
- need_to_writemask_stencil_values = FALSE;
- }
- else if (stencil->writemask == 0xff) {
- /* Still trivial, but a little less so. We need to write the stencil
- * values, but we don't need to mask them.
- */
- need_to_calculate_stencil_values = TRUE;
- need_to_writemask_stencil_values = FALSE;
- }
- else {
- /* The general case: calculate, mask, and write */
- need_to_calculate_stencil_values = TRUE;
- need_to_writemask_stencil_values = TRUE;
-
- /* While we're here, generate code that calculates what the
- * writemask should be. If backface stenciling is enabled,
- * and the backface writemask is not the same as the frontface
- * writemask, we'll have to generate code that merges the
- * two masks into a single effective mask based on fragment facing.
- */
- spe_comment(f, 0, "Computing stencil writemask");
- stencil_writemask_reg = spe_allocate_available_register(f);
- spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask);
- }
-
- /* At least one-sided stenciling must be on. Generate code that
- * runs the stencil test on the basic/front-facing stencil, leaving
- * the mask of passing stencil bits in stencil_pass_reg. This mask will
- * be used both to mask the set of active pixels, and also to
- * determine how the stencil buffer changes.
- *
- * This test will *not* change the value in mask_reg (because we don't
- * yet know whether to apply the two-sided stencil or one-sided stencil).
- */
- spe_comment(f, 0, "Running basic stencil test");
- stencil_pass_reg = spe_allocate_available_register(f);
- gen_stencil_test(f, stencil, ref_value, 0xff, mask_reg, fbS_reg, stencil_pass_reg);
-
- /* Generate code that, given the mask of valid fragments and the
- * mask of valid fragments that passed the stencil test, computes
- * the mask of valid fragments that failed the stencil test. We
- * have to do this before we run a depth test (because the
- * depth test should not be performed on fragments that failed the
- * stencil test, and because the depth test will update the
- * mask of valid fragments based on the results of the depth test).
- */
- spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask");
- stencil_fail_reg = spe_allocate_available_register(f);
- spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg);
- /* Now remove the stenciled-out pixels from the valid fragment mask,
- * so we can later use the valid fragment mask in the depth test.
- */
- spe_and(f, mask_reg, mask_reg, stencil_pass_reg);
-
- /* We may not need to calculate stencil values, if the writemask is off */
- if (need_to_calculate_stencil_values) {
- /* Generate code that calculates exactly which stencil values we need,
- * without calculating the same value twice (say, if two different
- * stencil ops have the same value). This code will work for one-sided
- * and two-sided stenciling (so that we take into account that operations
- * may match between front and back stencils), and will also take into
- * account whether the depth test is enabled (if the depth test is off,
- * we don't need any of the zfail results, because the depth test always
- * is considered to pass if it is disabled). Any register value that
- * does not need to be calculated will come back with the same value
- * that's in fbS_reg.
- *
- * This function will allocate a variant number of registers that
- * will be released as part of the register set.
- */
- spe_comment(f, 0, facing == CELL_FACING_FRONT
- ? "Computing front-facing stencil values"
- : "Computing back-facing stencil values");
- gen_get_stencil_values(f, stencil, ref_value, dsa->depth.enabled, fbS_reg,
- &stencil_fail_values, &stencil_pass_depth_fail_values,
- &stencil_pass_depth_pass_values);
- }
-
- /* We now have all the stencil values we need. We also need
- * the results of the depth test to figure out which
- * stencil values will become the new stencil values. (Even if
- * we aren't actually calculating stencil values, we need to apply
- * the depth test if it's enabled.)
- *
- * The code generated by gen_depth_test() returns the results of the
- * test in the given register, but also alters the mask_reg based
- * on the results of the test.
- */
- if (dsa->depth.enabled) {
- spe_comment(f, 0, "Running stencil depth test");
- zmask_reg = spe_allocate_available_register(f);
- modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg,
- fbZ_reg, zmask_reg);
- }
-
- if (need_to_calculate_stencil_values) {
-
- /* If we need to writemask the stencil values before going into
- * the stencil buffer, we'll have to use a new register to
- * hold the new values. If not, we can just keep using the
- * current register.
- */
- if (need_to_writemask_stencil_values) {
- newS_reg = spe_allocate_available_register(f);
- spe_comment(f, 0, "Saving current stencil values for writemasking");
- spe_move(f, newS_reg, fbS_reg);
- }
- else {
- newS_reg = fbS_reg;
- }
-
- /* Merge in the selected stencil fail values */
- if (stencil_fail_values != fbS_reg) {
- spe_comment(f, 0, "Loading stencil fail values");
- spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg);
- modified_buffers = TRUE;
- }
-
- /* Same for the stencil pass/depth fail values. If this calculation
- * is not needed (say, if depth test is off), then the
- * stencil_pass_depth_fail_values register will be equal to fbS_reg
- * and we'll skip the calculation.
- */
- if (stencil_pass_depth_fail_values != fbS_reg) {
- /* We don't actually have a stencil pass/depth fail mask yet.
- * Calculate it here from the stencil passing mask and the
- * depth passing mask. Note that zmask_reg *must* have been
- * set above if we're here.
- */
- uint stencil_pass_depth_fail_mask =
- spe_allocate_available_register(f);
-
- spe_comment(f, 0, "Loading stencil pass/depth fail values");
- spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg);
-
- spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values,
- stencil_pass_depth_fail_mask);
-
- spe_release_register(f, stencil_pass_depth_fail_mask);
- modified_buffers = TRUE;
- }
-
- /* Same for the stencil pass/depth pass mask. Note that we
- * *can* get here with zmask_reg being unset (if the depth
- * test is off but the stencil test is on). In this case,
- * we assume the depth test passes, and don't need to mask
- * the stencil pass mask with the Z mask.
- */
- if (stencil_pass_depth_pass_values != fbS_reg) {
- if (dsa->depth.enabled) {
- uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f);
- /* We'll need a separate register */
- spe_comment(f, 0, "Loading stencil pass/depth pass values");
- spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg);
- spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask);
- spe_release_register(f, stencil_pass_depth_pass_mask);
- }
- else {
- /* We can use the same stencil-pass register */
- spe_comment(f, 0, "Loading stencil pass values");
- spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg);
- }
- modified_buffers = TRUE;
- }
-
- /* Almost done. If we need to writemask, do it now, leaving the
- * results in the fbS_reg register passed in. If we don't need
- * to writemask, then the results are *already* in the fbS_reg,
- * so there's nothing more to do.
- */
-
- if (need_to_writemask_stencil_values && modified_buffers) {
- /* The Select Bytes command makes a fine writemask. Where
- * the mask is 0, the first (original) values are retained,
- * effectively masking out changes. Where the mask is 1, the
- * second (new) values are retained, incorporating changes.
- */
- spe_comment(f, 0, "Writemasking new stencil values");
- spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg);
- }
-
- } /* done calculating stencil values */
-
- /* The stencil and/or depth values have been applied, and the
- * mask_reg, fbS_reg, and fbZ_reg values have been updated.
- * We're all done, except that we've allocated a fair number
- * of registers that we didn't bother tracking. Release all
- * those registers as part of the register set, and go home.
- */
- spe_comment(f, 0, "Releasing stencil register set");
- spe_release_register_set(f);
-
- /* Return TRUE if we could have modified the stencil and/or
- * depth buffers.
- */
- return modified_buffers;
-}
-
-
-/**
- * Generate depth and/or stencil test code.
- * \param cell context
- * \param dsa depth/stencil/alpha state
- * \param f spe function to emit
- * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK
- * \param mask_reg register containing the pixel alive/dead mask
- * \param depth_tile_reg register containing address of z/stencil tile
- * \param quad_offset_reg offset to quad from start of tile
- * \param fragZ_reg register containg fragment Z values
- */
-static void
-gen_depth_stencil(struct cell_context *cell,
- const struct pipe_depth_stencil_alpha_state *dsa,
- const struct pipe_stencil_ref *stencil_ref,
- struct spe_function *f,
- uint facing,
- int mask_reg,
- int depth_tile_reg,
- int quad_offset_reg,
- int fragZ_reg)
-
-{
- const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
- boolean write_depth_stencil;
-
- /* framebuffer's combined z/stencil values register */
- int fbZS_reg = spe_allocate_available_register(f);
-
- /* Framebufer Z values register */
- int fbZ_reg = spe_allocate_available_register(f);
-
- /* Framebuffer stencil values register (may not be used) */
- int fbS_reg = spe_allocate_available_register(f);
-
- /* 24-bit mask register (may not be used) */
- int zmask_reg = spe_allocate_available_register(f);
-
- /**
- * The following code:
- * 1. fetch quad of packed Z/S values from the framebuffer tile.
- * 2. extract the separate the Z and S values from packed values
- * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints
- *
- * The instructions for doing this are interleaved for better performance.
- */
- spe_comment(f, 0, "Fetch Z/stencil quad from tile");
-
- switch(zs_format) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT: /* fall through */
- case PIPE_FORMAT_Z24X8_UNORM:
- /* prepare mask to extract Z vals from ZS vals */
- spe_load_uint(f, zmask_reg, 0x00ffffff);
-
- /* convert fragment Z from [0,1] to 32-bit ints */
- spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-
- /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
- spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
- /* right shift 32-bit fragment Z to 24 bits */
- spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
-
- /* extract 24-bit Z values from ZS values by masking */
- spe_and(f, fbZ_reg, fbZS_reg, zmask_reg);
-
- /* extract 8-bit stencil values by shifting */
- spe_rotmi(f, fbS_reg, fbZS_reg, -24);
- break;
-
- case PIPE_FORMAT_S8_UINT_Z24_UNORM: /* fall through */
- case PIPE_FORMAT_X8Z24_UNORM:
- /* convert fragment Z from [0,1] to 32-bit ints */
- spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-
- /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
- spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
- /* right shift 32-bit fragment Z to 24 bits */
- spe_rotmi(f, fragZ_reg, fragZ_reg, -8);
-
- /* extract 24-bit Z values from ZS values by shifting */
- spe_rotmi(f, fbZ_reg, fbZS_reg, -8);
-
- /* extract 8-bit stencil values by masking */
- spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff);
- break;
-
- case PIPE_FORMAT_Z32_UNORM:
- /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */
- spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg);
-
- /* convert fragment Z from [0,1] to 32-bit ints */
- spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
-
- /* No stencil, so can't do anything there */
- break;
-
- case PIPE_FORMAT_Z16_UNORM:
- /* XXX This code for 16bpp Z is broken! */
-
- /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
- spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
-
- /* Copy over 4 32-bit values */
- spe_move(f, fbZ_reg, fbZS_reg);
-
- /* convert Z from [0,1] to 16-bit ints */
- spe_cfltu(f, fragZ_reg, fragZ_reg, 32);
- spe_rotmi(f, fragZ_reg, fragZ_reg, -16);
- /* No stencil */
- break;
-
- default:
- ASSERT(0); /* invalid format */
- }
-
- /* If stencil is enabled, use the stencil-specific code
- * generator to generate both the stencil and depth (if needed)
- * tests. Otherwise, if only depth is enabled, generate
- * a quick depth test. The test generators themselves will
- * report back whether the depth/stencil buffer has to be
- * written back.
- */
- if (dsa->stencil[0].enabled) {
- /* This will perform the stencil and depth tests, and update
- * the mask_reg, fbZ_reg, and fbS_reg as required by the
- * tests.
- */
- ASSERT(fbS_reg >= 0);
- spe_comment(f, 0, "Perform stencil test");
-
- /* Note that fbZ_reg may not be set on entry, if stenciling
- * is enabled but there's no Z-buffer. The
- * gen_stencil_depth_test() function must ignore the
- * fbZ_reg register if depth is not enabled.
- */
- write_depth_stencil = gen_stencil_depth_test(f, dsa, stencil_ref, facing,
- mask_reg, fragZ_reg,
- fbZ_reg, fbS_reg);
- }
- else if (dsa->depth.enabled) {
- int zmask_reg = spe_allocate_available_register(f);
- ASSERT(fbZ_reg >= 0);
- spe_comment(f, 0, "Perform depth test");
- write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg,
- fbZ_reg, zmask_reg);
- spe_release_register(f, zmask_reg);
- }
- else {
- write_depth_stencil = FALSE;
- }
-
- if (write_depth_stencil) {
- /* Merge latest Z and Stencil values into fbZS_reg.
- * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
- * fbS_reg has four 8-bit Z values in bits [7..0].
- */
- spe_comment(f, 0, "Store quad's depth/stencil values in tile");
- if (zs_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
- zs_format == PIPE_FORMAT_Z24X8_UNORM) {
- spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
- spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
- }
- else if (zs_format == PIPE_FORMAT_S8_UINT_Z24_UNORM ||
- zs_format == PIPE_FORMAT_X8Z24_UNORM) {
- spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */
- spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
- }
- else if (zs_format == PIPE_FORMAT_Z32_UNORM) {
- spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
- }
- else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
- spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */
- }
- else if (zs_format == PIPE_FORMAT_S8_UINT) {
- ASSERT(0); /* XXX to do */
- }
- else {
- ASSERT(0); /* bad zs_format */
- }
-
- /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
- spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
- }
-
- /* Don't need these any more */
- spe_release_register(f, fbZS_reg);
- spe_release_register(f, fbZ_reg);
- spe_release_register(f, fbS_reg);
- spe_release_register(f, zmask_reg);
-}
-
-
-
-/**
- * Generate SPE code to implement the fragment operations (alpha test,
- * depth test, stencil test, blending, colormask, and final
- * framebuffer write) as specified by the current context state.
- *
- * Logically, this code will be called after running the fragment
- * shader. But under some circumstances we could run some of this
- * code before the fragment shader to cull fragments/quads that are
- * totally occluded/discarded.
- *
- * XXX we only support PIPE_FORMAT_S8_UINT_Z24_UNORM z/stencil buffer right now.
- *
- * See the spu_default_fragment_ops() function to see how the per-fragment
- * operations would be done with ordinary C code.
- * The code we generate here though has no branches, is SIMD, etc and
- * should be much faster.
- *
- * \param cell the rendering context (in)
- * \param facing whether the generated code is for front-facing or
- * back-facing fragments
- * \param f the generated function (in/out); on input, the function
- * must already have been initialized. On exit, whatever
- * instructions within the generated function have had
- * the fragment ops appended.
- */
-void
-cell_gen_fragment_function(struct cell_context *cell,
- const uint facing,
- struct spe_function *f)
-{
- const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil;
- const struct pipe_stencil_ref *stencil_ref = &cell->stencil_ref;
- const struct pipe_blend_state *blend = cell->blend;
- const struct pipe_blend_color *blend_color = &cell->blend_color;
- const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
-
- /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
- const int x_reg = 3; /* uint */
- const int y_reg = 4; /* uint */
- const int color_tile_reg = 5; /* tile_t * */
- const int depth_tile_reg = 6; /* tile_t * */
- const int fragZ_reg = 7; /* vector float */
- const int fragR_reg = 8; /* vector float */
- const int fragG_reg = 9; /* vector float */
- const int fragB_reg = 10; /* vector float */
- const int fragA_reg = 11; /* vector float */
- const int mask_reg = 12; /* vector uint */
-
- ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK);
-
- /* offset of quad from start of tile
- * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
- */
- int quad_offset_reg;
-
- int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
-
- if (cell->debug_flags & CELL_DEBUG_ASM) {
- spe_print_code(f, TRUE);
- spe_indent(f, 8);
- spe_comment(f, -4, facing == CELL_FACING_FRONT
- ? "Begin front-facing per-fragment ops"
- : "Begin back-facing per-fragment ops");
- }
-
- spe_allocate_register(f, x_reg);
- spe_allocate_register(f, y_reg);
- spe_allocate_register(f, color_tile_reg);
- spe_allocate_register(f, depth_tile_reg);
- spe_allocate_register(f, fragZ_reg);
- spe_allocate_register(f, fragR_reg);
- spe_allocate_register(f, fragG_reg);
- spe_allocate_register(f, fragB_reg);
- spe_allocate_register(f, fragA_reg);
- spe_allocate_register(f, mask_reg);
-
- quad_offset_reg = spe_allocate_available_register(f);
- fbRGBA_reg = spe_allocate_available_register(f);
-
- /* compute offset of quad from start of tile, in bytes */
- {
- int x2_reg = spe_allocate_available_register(f);
- int y2_reg = spe_allocate_available_register(f);
-
- ASSERT(TILE_SIZE == 32);
-
- spe_comment(f, 0, "Compute quad offset within tile");
- spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
- spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
- spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
- spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
- spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
-
- spe_release_register(f, x2_reg);
- spe_release_register(f, y2_reg);
- }
-
- /* Generate the alpha test, if needed. */
- if (dsa->alpha.enabled) {
- gen_alpha_test(dsa, f, mask_reg, fragA_reg);
- }
-
- /* generate depth and/or stencil test code */
- if (dsa->depth.enabled || dsa->stencil[0].enabled) {
- gen_depth_stencil(cell, dsa, stencil_ref, f,
- facing,
- mask_reg,
- depth_tile_reg,
- quad_offset_reg,
- fragZ_reg);
- }
-
- /* Get framebuffer quad/colors. We'll need these for blending,
- * color masking, and to obey the quad/pixel mask.
- * Load: fbRGBA_reg = memory[color_tile + quad_offset]
- * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
- * we could skip this load.
- */
- spe_comment(f, 0, "Fetch quad colors from tile");
- spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
-
- if (blend->rt[0].blend_enable) {
- spe_comment(f, 0, "Perform blending");
- gen_blend(blend, blend_color, f, color_format,
- fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
- }
-
- /*
- * Write fragment colors to framebuffer/tile.
- * This involves converting the fragment colors from float[4] to the
- * tile's specific format and obeying the quad/pixel mask.
- */
- {
- int rgba_reg = spe_allocate_available_register(f);
-
- /* Pack four float colors as four 32-bit int colors */
- spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors");
- gen_pack_colors(f, color_format,
- fragR_reg, fragG_reg, fragB_reg, fragA_reg,
- rgba_reg);
-
- if (blend->logicop_enable) {
- spe_comment(f, 0, "Compute logic op");
- gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
- }
-
- if (blend->rt[0].colormask != PIPE_MASK_RGBA) {
- spe_comment(f, 0, "Compute color mask");
- gen_colormask(f, blend->rt[0].colormask, color_format, rgba_reg, fbRGBA_reg);
- }
-
- /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
- * if (mask[i])
- * rgba[i] = rgba[i];
- * else
- * rgba[i] = framebuffer[i];
- */
- spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
-
- /* Store updated quad in tile:
- * memory[color_tile + quad_offset] = rgba_reg;
- */
- spe_comment(f, 0, "Store quad colors into color tile");
- spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
-
- spe_release_register(f, rgba_reg);
- }
-
- //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
-
- spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
-
- spe_release_register(f, fbRGBA_reg);
- spe_release_register(f, quad_offset_reg);
-
- if (cell->debug_flags & CELL_DEBUG_ASM) {
- char buffer[1024];
- sprintf(buffer, "End %s-facing per-fragment ops: %d instructions",
- facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst);
- spe_comment(f, -4, buffer);
- }
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_GEN_FRAGMENT_H
-#define CELL_GEN_FRAGMENT_H
-
-
-extern void
-cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f);
-
-
-#endif /* CELL_GEN_FRAGMENT_H */
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- */
-
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "draw/draw_context.h"
-#include "cell_context.h"
-#include "cell_flush.h"
-#include "cell_pipe_state.h"
-#include "cell_state.h"
-#include "cell_texture.h"
-
-
-
-static void *
-cell_create_blend_state(struct pipe_context *pipe,
- const struct pipe_blend_state *blend)
-{
- return mem_dup(blend, sizeof(*blend));
-}
-
-
-static void
-cell_bind_blend_state(struct pipe_context *pipe, void *blend)
-{
- struct cell_context *cell = cell_context(pipe);
-
- draw_flush(cell->draw);
-
- cell->blend = (struct pipe_blend_state *) blend;
- cell->dirty |= CELL_NEW_BLEND;
-}
-
-
-static void
-cell_delete_blend_state(struct pipe_context *pipe, void *blend)
-{
- FREE(blend);
-}
-
-
-static void
-cell_set_blend_color(struct pipe_context *pipe,
- const struct pipe_blend_color *blend_color)
-{
- struct cell_context *cell = cell_context(pipe);
-
- draw_flush(cell->draw);
-
- cell->blend_color = *blend_color;
-
- cell->dirty |= CELL_NEW_BLEND;
-}
-
-
-
-
-static void *
-cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
- const struct pipe_depth_stencil_alpha_state *dsa)
-{
- return mem_dup(dsa, sizeof(*dsa));
-}
-
-
-static void
-cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
- void *dsa)
-{
- struct cell_context *cell = cell_context(pipe);
-
- draw_flush(cell->draw);
-
- cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa;
- cell->dirty |= CELL_NEW_DEPTH_STENCIL;
-}
-
-
-static void
-cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa)
-{
- FREE(dsa);
-}
-
-
-static void
-cell_set_stencil_ref(struct pipe_context *pipe,
- const struct pipe_stencil_ref *stencil_ref)
-{
- struct cell_context *cell = cell_context(pipe);
-
- draw_flush(cell->draw);
-
- cell->stencil_ref = *stencil_ref;
-
- cell->dirty |= CELL_NEW_DEPTH_STENCIL;
-}
-
-
-static void
-cell_set_clip_state(struct pipe_context *pipe,
- const struct pipe_clip_state *clip)
-{
- struct cell_context *cell = cell_context(pipe);
-
- /* pass the clip state to the draw module */
- draw_set_clip_state(cell->draw, clip);
-}
-
-
-static void
-cell_set_sample_mask(struct pipe_context *pipe,
- unsigned sample_mask)
-{
-}
-
-
-/* Called when driver state tracker notices changes to the viewport
- * matrix:
- */
-static void
-cell_set_viewport_state( struct pipe_context *pipe,
- const struct pipe_viewport_state *viewport )
-{
- struct cell_context *cell = cell_context(pipe);
-
- cell->viewport = *viewport; /* struct copy */
- cell->dirty |= CELL_NEW_VIEWPORT;
-
- /* pass the viewport info to the draw module */
- draw_set_viewport_state(cell->draw, viewport);
-
- /* Using tnl/ and vf/ modules is temporary while getting started.
- * Full pipe will have vertex shader, vertex fetch of its own.
- */
-}
-
-
-static void
-cell_set_scissor_state( struct pipe_context *pipe,
- const struct pipe_scissor_state *scissor )
-{
- struct cell_context *cell = cell_context(pipe);
-
- memcpy( &cell->scissor, scissor, sizeof(*scissor) );
- cell->dirty |= CELL_NEW_SCISSOR;
-}
-
-
-static void
-cell_set_polygon_stipple( struct pipe_context *pipe,
- const struct pipe_poly_stipple *stipple )
-{
- struct cell_context *cell = cell_context(pipe);
-
- memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) );
- cell->dirty |= CELL_NEW_STIPPLE;
-}
-
-
-
-static void *
-cell_create_rasterizer_state(struct pipe_context *pipe,
- const struct pipe_rasterizer_state *rasterizer)
-{
- return mem_dup(rasterizer, sizeof(*rasterizer));
-}
-
-
-static void
-cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast)
-{
- struct pipe_rasterizer_state *rasterizer =
- (struct pipe_rasterizer_state *) rast;
- struct cell_context *cell = cell_context(pipe);
-
- /* pass-through to draw module */
- draw_set_rasterizer_state(cell->draw, rasterizer, rast);
-
- cell->rasterizer = rasterizer;
-
- cell->dirty |= CELL_NEW_RASTERIZER;
-}
-
-
-static void
-cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
-{
- FREE(rasterizer);
-}
-
-
-
-static void *
-cell_create_sampler_state(struct pipe_context *pipe,
- const struct pipe_sampler_state *sampler)
-{
- return mem_dup(sampler, sizeof(*sampler));
-}
-
-
-static void
-cell_bind_sampler_states(struct pipe_context *pipe,
- unsigned num, void **samplers)
-{
- struct cell_context *cell = cell_context(pipe);
- uint i, changed = 0x0;
-
- assert(num <= CELL_MAX_SAMPLERS);
-
- draw_flush(cell->draw);
-
- for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL;
- if (cell->sampler[i] != new_samp) {
- cell->sampler[i] = new_samp;
- changed |= (1 << i);
- }
- }
-
- if (changed) {
- cell->dirty |= CELL_NEW_SAMPLER;
- cell->dirty_samplers |= changed;
- }
-}
-
-
-static void
-cell_delete_sampler_state(struct pipe_context *pipe,
- void *sampler)
-{
- FREE( sampler );
-}
-
-
-
-static void
-cell_set_fragment_sampler_views(struct pipe_context *pipe,
- unsigned num,
- struct pipe_sampler_view **views)
-{
- struct cell_context *cell = cell_context(pipe);
- uint i, changed = 0x0;
-
- assert(num <= CELL_MAX_SAMPLERS);
-
- for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- struct pipe_sampler_view *new_view = i < num ? views[i] : NULL;
- struct pipe_sampler_view *old_view = cell->fragment_sampler_views[i];
-
- if (old_view != new_view) {
- struct pipe_resource *new_tex = new_view ? new_view->texture : NULL;
-
- pipe_sampler_view_reference(&cell->fragment_sampler_views[i],
- new_view);
- pipe_resource_reference((struct pipe_resource **) &cell->texture[i],
- (struct pipe_resource *) new_tex);
-
- changed |= (1 << i);
- }
- }
-
- cell->num_textures = num;
-
- if (changed) {
- cell->dirty |= CELL_NEW_TEXTURE;
- cell->dirty_textures |= changed;
- }
-}
-
-
-static struct pipe_sampler_view *
-cell_create_sampler_view(struct pipe_context *pipe,
- struct pipe_resource *texture,
- const struct pipe_sampler_view *templ)
-{
- struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
- if (view) {
- *view = *templ;
- view->reference.count = 1;
- view->texture = NULL;
- pipe_resource_reference(&view->texture, texture);
- view->context = pipe;
- }
-
- return view;
-}
-
-
-static void
-cell_sampler_view_destroy(struct pipe_context *pipe,
- struct pipe_sampler_view *view)
-{
- pipe_resource_reference(&view->texture, NULL);
- FREE(view);
-}
-
-
-/**
- * Map color and z/stencil framebuffer surfaces.
- */
-static void
-cell_map_surfaces(struct cell_context *cell)
-{
-#if 0
- struct pipe_screen *screen = cell->pipe.screen;
-#endif
- uint i;
-
- for (i = 0; i < 1; i++) {
- struct pipe_surface *ps = cell->framebuffer.cbufs[i];
- if (ps) {
- struct cell_resource *ct = cell_resource(ps->texture);
-#if 0
- cell->cbuf_map[i] = screen->buffer_map(screen,
- ct->buffer,
- (PIPE_BUFFER_USAGE_GPU_READ |
- PIPE_BUFFER_USAGE_GPU_WRITE));
-#else
- cell->cbuf_map[i] = ct->data;
-#endif
- }
- }
-
- {
- struct pipe_surface *ps = cell->framebuffer.zsbuf;
- if (ps) {
- struct cell_resource *ct = cell_resource(ps->texture);
-#if 0
- cell->zsbuf_map = screen->buffer_map(screen,
- ct->buffer,
- (PIPE_BUFFER_USAGE_GPU_READ |
- PIPE_BUFFER_USAGE_GPU_WRITE));
-#else
- cell->zsbuf_map = ct->data;
-#endif
- }
- }
-}
-
-
-/**
- * Unmap color and z/stencil framebuffer surfaces.
- */
-static void
-cell_unmap_surfaces(struct cell_context *cell)
-{
- /*struct pipe_screen *screen = cell->pipe.screen;*/
- uint i;
-
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- struct pipe_surface *ps = cell->framebuffer.cbufs[i];
- if (ps && cell->cbuf_map[i]) {
- /*struct cell_resource *ct = cell_resource(ps->texture);*/
- assert(ps->texture);
- /*assert(ct->buffer);*/
-
- /*screen->buffer_unmap(screen, ct->buffer);*/
- cell->cbuf_map[i] = NULL;
- }
- }
-
- {
- struct pipe_surface *ps = cell->framebuffer.zsbuf;
- if (ps && cell->zsbuf_map) {
- /*struct cell_resource *ct = cell_resource(ps->texture);*/
- /*screen->buffer_unmap(screen, ct->buffer);*/
- cell->zsbuf_map = NULL;
- }
- }
-}
-
-
-static void
-cell_set_framebuffer_state(struct pipe_context *pipe,
- const struct pipe_framebuffer_state *fb)
-{
- struct cell_context *cell = cell_context(pipe);
-
- if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) {
- uint i;
-
- /* unmap old surfaces */
- cell_unmap_surfaces(cell);
-
- /* Finish any pending rendering to the current surface before
- * installing a new surface!
- */
- cell_flush_int(cell, CELL_FLUSH_WAIT);
-
- /* update my state
- * (this is also where old surfaces will finally get freed)
- */
- cell->framebuffer.width = fb->width;
- cell->framebuffer.height = fb->height;
- cell->framebuffer.nr_cbufs = fb->nr_cbufs;
- for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
- pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]);
- }
- pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf);
-
- /* map new surfaces */
- cell_map_surfaces(cell);
-
- cell->dirty |= CELL_NEW_FRAMEBUFFER;
- }
-}
-
-
-void
-cell_init_state_functions(struct cell_context *cell)
-{
- cell->pipe.create_blend_state = cell_create_blend_state;
- cell->pipe.bind_blend_state = cell_bind_blend_state;
- cell->pipe.delete_blend_state = cell_delete_blend_state;
-
- cell->pipe.create_sampler_state = cell_create_sampler_state;
- cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states;
- cell->pipe.delete_sampler_state = cell_delete_sampler_state;
-
- cell->pipe.set_fragment_sampler_views = cell_set_fragment_sampler_views;
- cell->pipe.create_sampler_view = cell_create_sampler_view;
- cell->pipe.sampler_view_destroy = cell_sampler_view_destroy;
-
- cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state;
- cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state;
- cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state;
-
- cell->pipe.create_rasterizer_state = cell_create_rasterizer_state;
- cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state;
- cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state;
-
- cell->pipe.set_blend_color = cell_set_blend_color;
- cell->pipe.set_stencil_ref = cell_set_stencil_ref;
- cell->pipe.set_clip_state = cell_set_clip_state;
- cell->pipe.set_sample_mask = cell_set_sample_mask;
-
- cell->pipe.set_framebuffer_state = cell_set_framebuffer_state;
-
- cell->pipe.set_polygon_stipple = cell_set_polygon_stipple;
- cell->pipe.set_scissor_state = cell_set_scissor_state;
- cell->pipe.set_viewport_state = cell_set_viewport_state;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_PIPE_STATE_H
-#define CELL_PIPE_STATE_H
-
-
-struct cell_context;
-
-extern void
-cell_init_state_functions(struct cell_context *cell);
-
-
-#endif /* CELL_PIPE_STATE_H */
+++ /dev/null
-#ifndef CELL_PUBLIC_H
-#define CELL_PUBLIC_H
-
-struct pipe_screen;
-struct sw_winsys;
-
-struct pipe_screen *
-cell_create_screen(struct sw_winsys *winsys);
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief Last stage of 'draw' pipeline: send tris to SPUs.
- * \author Brian Paul
- */
-
-#include "cell_context.h"
-#include "cell_render.h"
-#include "cell_spu.h"
-#include "util/u_memory.h"
-#include "draw/draw_private.h"
-
-
-struct render_stage {
- struct draw_stage stage; /**< This must be first (base class) */
-
- struct cell_context *cell;
-};
-
-
-static INLINE struct render_stage *
-render_stage(struct draw_stage *stage)
-{
- return (struct render_stage *) stage;
-}
-
-
-static void render_begin( struct draw_stage *stage )
-{
-#if 0
- struct render_stage *render = render_stage(stage);
- struct cell_context *sp = render->cell;
- const struct pipe_shader_state *fs = &render->cell->fs->shader;
- render->quad.nr_attrs = render->cell->nr_frag_attrs;
-
- render->firstFpInput = fs->input_semantic_name[0];
-
- sp->quad.first->begin(sp->quad.first);
-#endif
-}
-
-
-static void render_end( struct draw_stage *stage )
-{
-}
-
-
-static void reset_stipple_counter( struct draw_stage *stage )
-{
- struct render_stage *render = render_stage(stage);
- /*render->cell->line_stipple_counter = 0;*/
-}
-
-
-static void
-render_point(struct draw_stage *stage, struct prim_header *prim)
-{
-}
-
-
-static void
-render_line(struct draw_stage *stage, struct prim_header *prim)
-{
-}
-
-
-/** Write a vertex into the prim buffer */
-static void
-save_vertex(struct cell_prim_buffer *buf, uint pos,
- const struct vertex_header *vert)
-{
- uint attr, j;
-
- for (attr = 0; attr < 2; attr++) {
- for (j = 0; j < 4; j++) {
- buf->vertex[pos][attr][j] = vert->data[attr][j];
- }
- }
-
- /* update bounding box */
- if (vert->data[0][0] < buf->xmin)
- buf->xmin = vert->data[0][0];
- if (vert->data[0][0] > buf->xmax)
- buf->xmax = vert->data[0][0];
- if (vert->data[0][1] < buf->ymin)
- buf->ymin = vert->data[0][1];
- if (vert->data[0][1] > buf->ymax)
- buf->ymax = vert->data[0][1];
-}
-
-
-static void
-render_tri(struct draw_stage *stage, struct prim_header *prim)
-{
- struct render_stage *rs = render_stage(stage);
- struct cell_context *cell = rs->cell;
- struct cell_prim_buffer *buf = &cell->prim_buffer;
- uint i;
-
- if (buf->num_verts + 3 > CELL_MAX_VERTS) {
- cell_flush_prim_buffer(cell);
- }
-
- i = buf->num_verts;
- assert(i+2 <= CELL_MAX_VERTS);
- save_vertex(buf, i+0, prim->v[0]);
- save_vertex(buf, i+1, prim->v[1]);
- save_vertex(buf, i+2, prim->v[2]);
- buf->num_verts += 3;
-}
-
-
-/**
- * Send the a RENDER command to all SPUs to have them render the prims
- * in the current prim_buffer.
- */
-void
-cell_flush_prim_buffer(struct cell_context *cell)
-{
- uint i;
-
- if (cell->prim_buffer.num_verts == 0)
- return;
-
- for (i = 0; i < cell->num_spus; i++) {
- struct cell_command_render *render = &cell_global.command[i].render;
- render->prim_type = PIPE_PRIM_TRIANGLES;
- render->num_verts = cell->prim_buffer.num_verts;
- render->front_ccw = cell->rasterizer->front_ccw;
- render->vertex_size = cell->vertex_info->size * 4;
- render->xmin = cell->prim_buffer.xmin;
- render->ymin = cell->prim_buffer.ymin;
- render->xmax = cell->prim_buffer.xmax;
- render->ymax = cell->prim_buffer.ymax;
- render->vertex_data = &cell->prim_buffer.vertex;
- ASSERT_ALIGN16(render->vertex_data);
- send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER);
- }
-
- cell->prim_buffer.num_verts = 0;
-
- cell->prim_buffer.xmin = 1e100;
- cell->prim_buffer.ymin = 1e100;
- cell->prim_buffer.xmax = -1e100;
- cell->prim_buffer.ymax = -1e100;
-
- /* XXX temporary, need to double-buffer the prim buffer until we get
- * a real command buffer/list system.
- */
- cell_flush(&cell->pipe, 0x0);
-}
-
-
-
-static void render_destroy( struct draw_stage *stage )
-{
- FREE( stage );
-}
-
-
-/**
- * Create a new draw/render stage. This will be plugged into the
- * draw module as the last pipeline stage.
- */
-struct draw_stage *cell_draw_render_stage( struct cell_context *cell )
-{
- struct render_stage *render = CALLOC_STRUCT(render_stage);
-
- render->cell = cell;
- render->stage.draw = cell->draw;
- render->stage.begin = render_begin;
- render->stage.point = render_point;
- render->stage.line = render_line;
- render->stage.tri = render_tri;
- render->stage.end = render_end;
- render->stage.reset_stipple_counter = reset_stipple_counter;
- render->stage.destroy = render_destroy;
-
- /*
- render->quad.coef = render->coef;
- render->quad.posCoef = &render->posCoef;
- */
-
- return &render->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef CELL_RENDER_H
-#define CELL_RENDER_H
-
-struct cell_context;
-struct draw_stage;
-
-extern void
-cell_flush_prim_buffer(struct cell_context *cell);
-
-extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell );
-
-#endif /* CELL_RENDER_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_screen.h"
-
-#include "cell/common.h"
-#include "cell_context.h"
-#include "cell_screen.h"
-#include "cell_texture.h"
-#include "cell_public.h"
-
-#include "state_tracker/sw_winsys.h"
-
-
-static const char *
-cell_get_vendor(struct pipe_screen *screen)
-{
- return "VMware, Inc.";
-}
-
-
-static const char *
-cell_get_name(struct pipe_screen *screen)
-{
- return "Cell";
-}
-
-
-static int
-cell_get_param(struct pipe_screen *screen, enum pipe_cap param)
-{
- switch (param) {
- case PIPE_CAP_MAX_COMBINED_SAMPLERS:
- return CELL_MAX_SAMPLERS;
- case PIPE_CAP_NPOT_TEXTURES:
- return 1;
- case PIPE_CAP_TWO_SIDED_STENCIL:
- return 1;
- case PIPE_CAP_ANISOTROPIC_FILTER:
- return 0;
- case PIPE_CAP_POINT_SPRITE:
- return 1;
- case PIPE_CAP_MAX_RENDER_TARGETS:
- return 1;
- case PIPE_CAP_OCCLUSION_QUERY:
- return 1;
- case PIPE_CAP_TIMER_QUERY:
- return 0;
- case PIPE_CAP_TEXTURE_SHADOW_MAP:
- return 10;
- case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
- return CELL_MAX_TEXTURE_LEVELS;
- case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
- return 8; /* max 128x128x128 */
- case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
- return CELL_MAX_TEXTURE_LEVELS;
- case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
- return 0; /* XXX to do */
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
- return 1;
- case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
- case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
- return 0;
- case PIPE_CAP_BLEND_EQUATION_SEPARATE:
- return 1;
- default:
- return 0;
- }
-}
-
-static int
-cell_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param)
-{
- switch(shader)
- {
- case PIPE_SHADER_FRAGMENT:
- switch (param) {
- case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
- return CELL_MAX_SAMPLERS;
- default:
- return tgsi_exec_get_shader_param(param);
- }
- case PIPE_SHADER_VERTEX:
- case PIPE_SHADER_GEOMETRY:
- return draw_get_shader_param(shader, param);
- default:
- return 0;
- }
-}
-
-static float
-cell_get_paramf(struct pipe_screen *screen, enum pipe_capf param)
-{
- switch (param) {
- case PIPE_CAPF_MAX_LINE_WIDTH:
- /* fall-through */
- case PIPE_CAPF_MAX_LINE_WIDTH_AA:
- return 255.0; /* arbitrary */
-
- case PIPE_CAPF_MAX_POINT_WIDTH:
- /* fall-through */
- case PIPE_CAPF_MAX_POINT_WIDTH_AA:
- return 255.0; /* arbitrary */
-
- case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
- return 0.0;
-
- case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
- return 16.0; /* arbitrary */
-
- default:
- return 0;
- }
-}
-
-
-static boolean
-cell_is_format_supported( struct pipe_screen *screen,
- enum pipe_format format,
- enum pipe_texture_target target,
- unsigned sample_count,
- unsigned tex_usage)
-{
- struct sw_winsys *winsys = cell_screen(screen)->winsys;
-
- if (sample_count > 1)
- return FALSE;
-
- if (tex_usage & (PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) {
- if (!winsys->is_displaytarget_format_supported(winsys, tex_usage, format))
- return FALSE;
- }
-
- /* only a few formats are known to work at this time */
- switch (format) {
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- case PIPE_FORMAT_I8_UNORM:
- return TRUE;
- default:
- return FALSE;
- }
-}
-
-
-static void
-cell_destroy_screen( struct pipe_screen *screen )
-{
- struct cell_screen *sp_screen = cell_screen(screen);
- struct sw_winsys *winsys = sp_screen->winsys;
-
- if(winsys->destroy)
- winsys->destroy(winsys);
-
- FREE(screen);
-}
-
-
-
-/**
- * Create a new pipe_screen object
- * Note: we're not presently subclassing pipe_screen (no cell_screen) but
- * that would be the place to put SPU thread/context info...
- */
-struct pipe_screen *
-cell_create_screen(struct sw_winsys *winsys)
-{
- struct cell_screen *screen = CALLOC_STRUCT(cell_screen);
-
- if (!screen)
- return NULL;
-
- screen->winsys = winsys;
- screen->base.winsys = NULL;
-
- screen->base.destroy = cell_destroy_screen;
-
- screen->base.get_name = cell_get_name;
- screen->base.get_vendor = cell_get_vendor;
- screen->base.get_param = cell_get_param;
- screen->base.get_shader_param = cell_get_shader_param;
- screen->base.get_paramf = cell_get_paramf;
- screen->base.is_format_supported = cell_is_format_supported;
- screen->base.context_create = cell_create_context;
-
- cell_init_screen_texture_funcs(&screen->base);
-
- return &screen->base;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_SCREEN_H
-#define CELL_SCREEN_H
-
-
-#include "pipe/p_screen.h"
-
-struct sw_winsys;
-
-struct cell_screen {
- struct pipe_screen base;
-
- struct sw_winsys *winsys;
-
- /* Increments whenever textures are modified. Contexts can track
- * this.
- */
- unsigned timestamp;
-};
-
-static INLINE struct cell_screen *
-cell_screen( struct pipe_screen *pipe )
-{
- return (struct cell_screen *)pipe;
-}
-
-
-#endif /* CELL_SCREEN_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * Utility/wrappers for communicating with the SPUs.
- */
-
-
-#include <pthread.h>
-
-#include "cell_spu.h"
-#include "pipe/p_format.h"
-#include "pipe/p_state.h"
-#include "util/u_memory.h"
-#include "cell/common.h"
-
-
-/*
-helpful headers:
-/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h
-*/
-
-
-/**
- * Cell/SPU info that's not per-context.
- */
-struct cell_global_info cell_global;
-
-
-/**
- * Scan /proc/cpuinfo to determine the timebase for the system.
- * This is used by the SPUs to convert 'decrementer' ticks to seconds.
- * There may be a better way to get this value...
- */
-static unsigned
-get_timebase(void)
-{
- FILE *f = fopen("/proc/cpuinfo", "r");
- unsigned timebase;
-
- assert(f);
- while (!feof(f)) {
- char line[80];
- fgets(line, sizeof(line), f);
- if (strncmp(line, "timebase", 8) == 0) {
- char *colon = strchr(line, ':');
- if (colon) {
- timebase = atoi(colon + 2);
- break;
- }
- }
- }
- fclose(f);
-
- return timebase;
-}
-
-
-/**
- * Write a 1-word message to the given SPE mailbox.
- */
-void
-send_mbox_message(spe_context_ptr_t ctx, unsigned int msg)
-{
- spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING);
-}
-
-
-/**
- * Wait for a 1-word message to arrive in given mailbox.
- */
-uint
-wait_mbox_message(spe_context_ptr_t ctx)
-{
- do {
- unsigned data;
- int count = spe_out_mbox_read(ctx, &data, 1);
-
- if (count == 1) {
- return data;
- }
-
- if (count < 0) {
- /* error */ ;
- }
- } while (1);
-}
-
-
-/**
- * Called by pthread_create() to spawn an SPU thread.
- */
-static void *
-cell_thread_function(void *arg)
-{
- struct cell_init_info *init = (struct cell_init_info *) arg;
- unsigned entry = SPE_DEFAULT_ENTRY;
-
- ASSERT_ALIGN16(init);
-
- if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0,
- init, NULL, NULL) < 0) {
- fprintf(stderr, "spe_context_run() failed\n");
- exit(1);
- }
-
- pthread_exit(NULL);
-}
-
-
-/**
- * Create the SPU threads. This is done once during driver initialization.
- * This involves setting the "init" message which is sent to each SPU.
- * The init message specifies an SPU id, total number of SPUs, location
- * and number of batch buffers, etc.
- */
-void
-cell_start_spus(struct cell_context *cell)
-{
- static boolean one_time_init = FALSE;
- uint i, j;
- uint timebase = get_timebase();
-
- if (one_time_init) {
- fprintf(stderr, "PPU: Multiple rendering contexts not yet supported "
- "on Cell.\n");
- abort();
- }
-
- one_time_init = TRUE;
-
- assert(cell->num_spus <= CELL_MAX_SPUS);
-
- ASSERT_ALIGN16(&cell_global.inits[0]);
- ASSERT_ALIGN16(&cell_global.inits[1]);
-
- /*
- * Initialize the global 'inits' structure for each SPU.
- * A pointer to the init struct will be passed to each SPU.
- * The SPUs will then each grab their init info with mfc_get().
- */
- for (i = 0; i < cell->num_spus; i++) {
- cell_global.inits[i].id = i;
- cell_global.inits[i].num_spus = cell->num_spus;
- cell_global.inits[i].debug_flags = cell->debug_flags;
- cell_global.inits[i].inv_timebase = 1000.0f / timebase;
-
- for (j = 0; j < CELL_NUM_BUFFERS; j++) {
- cell_global.inits[i].buffers[j] = cell->buffer[j];
- }
- cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
-
- cell_global.inits[i].spu_functions = &cell->spu_functions;
-
- cell_global.spe_contexts[i] = spe_context_create(0, NULL);
- if (!cell_global.spe_contexts[i]) {
- fprintf(stderr, "spe_context_create() failed\n");
- exit(1);
- }
-
- if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) {
- fprintf(stderr, "spe_program_load() failed\n");
- exit(1);
- }
-
- pthread_create(&cell_global.spe_threads[i], /* returned thread handle */
- NULL, /* pthread attribs */
- &cell_thread_function, /* start routine */
- &cell_global.inits[i]); /* thread argument */
- }
-}
-
-
-/**
- * Tell all the SPUs to stop/exit.
- * This is done when the driver's exiting / cleaning up.
- */
-void
-cell_spu_exit(struct cell_context *cell)
-{
- uint i;
-
- for (i = 0; i < cell->num_spus; i++) {
- send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT);
- }
-
- /* wait for threads to exit */
- for (i = 0; i < cell->num_spus; i++) {
- void *value;
- pthread_join(cell_global.spe_threads[i], &value);
- cell_global.spe_threads[i] = 0;
- cell_global.spe_contexts[i] = 0;
- }
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef CELL_SPU
-#define CELL_SPU
-
-
-#include <libspe2.h>
-#include <pthread.h>
-#include "cell/common.h"
-
-#include "cell_context.h"
-
-
-/**
- * Global vars, for now anyway.
- */
-struct cell_global_info
-{
- /**
- * SPU/SPE handles, etc
- */
- spe_context_ptr_t spe_contexts[CELL_MAX_SPUS];
- pthread_t spe_threads[CELL_MAX_SPUS];
-
- /**
- * Data sent to SPUs at start-up
- */
- struct cell_init_info inits[CELL_MAX_SPUS];
-};
-
-
-extern struct cell_global_info cell_global;
-
-
-/** This is the handle for the actual SPE code */
-extern spe_program_handle_t g3d_spu;
-
-
-extern void
-send_mbox_message(spe_context_ptr_t ctx, unsigned int msg);
-
-extern uint
-wait_mbox_message(spe_context_ptr_t ctx);
-
-
-extern void
-cell_start_spus(struct cell_context *cell);
-
-
-extern void
-cell_spu_exit(struct cell_context *cell);
-
-
-#endif /* CELL_SPU */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef CELL_STATE_H
-#define CELL_STATE_H
-
-
-#define CELL_NEW_VIEWPORT 0x1
-#define CELL_NEW_RASTERIZER 0x2
-#define CELL_NEW_FS 0x4
-#define CELL_NEW_BLEND 0x8
-#define CELL_NEW_CLIP 0x10
-#define CELL_NEW_SCISSOR 0x20
-#define CELL_NEW_STIPPLE 0x40
-#define CELL_NEW_FRAMEBUFFER 0x80
-#define CELL_NEW_ALPHA_TEST 0x100
-#define CELL_NEW_DEPTH_STENCIL 0x200
-#define CELL_NEW_SAMPLER 0x400
-#define CELL_NEW_TEXTURE 0x800
-#define CELL_NEW_VERTEX 0x1000
-#define CELL_NEW_VS 0x2000
-#define CELL_NEW_VS_CONSTANTS 0x4000
-#define CELL_NEW_FS_CONSTANTS 0x8000
-#define CELL_NEW_VERTEX_INFO 0x10000
-
-
-extern void
-cell_update_derived( struct cell_context *cell );
-
-
-extern void
-cell_init_shader_functions(struct cell_context *cell);
-
-
-extern void
-cell_init_vertex_functions(struct cell_context *cell);
-
-
-#endif /* CELL_STATE_H */
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_memory.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw/draw_context.h"
-#include "draw/draw_vertex.h"
-#include "cell_context.h"
-#include "cell_batch.h"
-#include "cell_state.h"
-#include "cell_state_emit.h"
-
-
-/**
- * Determine how to map vertex program outputs to fragment program inputs.
- * Basically, this will be used when computing the triangle interpolation
- * coefficients from the post-transform vertex attributes.
- */
-static void
-calculate_vertex_layout( struct cell_context *cell )
-{
- const struct cell_fragment_shader_state *fs = cell->fs;
- const enum interp_mode colorInterp
- = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
- struct vertex_info *vinfo = &cell->vertex_info;
- uint i;
- int src;
-
-#if 0
- if (cell->vbuf) {
- /* if using the post-transform vertex buffer, tell draw_vbuf to
- * simply emit the whole post-xform vertex as-is:
- */
- struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf;
- vinfo_vbuf->num_attribs = 0;
- draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0);
- vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4;
- }
-#endif
-
- /* reset vinfo */
- vinfo->num_attribs = 0;
-
- /* we always want to emit vertex pos */
- src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
- assert(src >= 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
-
-
- /*
- * Loop over fragment shader inputs, searching for the matching output
- * from the vertex shader.
- */
- for (i = 0; i < fs->info.num_inputs; i++) {
- switch (fs->info.input_semantic_name[i]) {
- case TGSI_SEMANTIC_POSITION:
- /* already done above */
- break;
-
- case TGSI_SEMANTIC_COLOR:
- src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR,
- fs->info.input_semantic_index[i]);
- assert(src >= 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
- break;
-
- case TGSI_SEMANTIC_FOG:
- src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
-#if 1
- if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
- src = 0;
-#endif
- assert(src >= 0);
- draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
- break;
-
- case TGSI_SEMANTIC_GENERIC:
- /* this includes texcoords and varying vars */
- src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
- fs->info.input_semantic_index[i]);
- assert(src >= 0);
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
- break;
-
- default:
- assert(0);
- }
- }
-
- draw_compute_vertex_size(vinfo);
-
- /* XXX only signal this if format really changes */
- cell->dirty |= CELL_NEW_VERTEX_INFO;
-}
-
-
-#if 0
-/**
- * Recompute cliprect from scissor bounds, scissor enable and surface size.
- */
-static void
-compute_cliprect(struct cell_context *sp)
-{
- uint surfWidth = sp->framebuffer.width;
- uint surfHeight = sp->framebuffer.height;
-
- if (sp->rasterizer->scissor) {
- /* clip to scissor rect */
- sp->cliprect.minx = MAX2(sp->scissor.minx, 0);
- sp->cliprect.miny = MAX2(sp->scissor.miny, 0);
- sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth);
- sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight);
- }
- else {
- /* clip to surface bounds */
- sp->cliprect.minx = 0;
- sp->cliprect.miny = 0;
- sp->cliprect.maxx = surfWidth;
- sp->cliprect.maxy = surfHeight;
- }
-}
-#endif
-
-
-
-/**
- * Update derived state, send current state to SPUs prior to rendering.
- */
-void cell_update_derived( struct cell_context *cell )
-{
- if (cell->dirty & (CELL_NEW_RASTERIZER |
- CELL_NEW_FS |
- CELL_NEW_VS))
- calculate_vertex_layout( cell );
-
-#if 0
- if (cell->dirty & (CELL_NEW_SCISSOR |
- CELL_NEW_DEPTH_STENCIL_ALPHA |
- CELL_NEW_FRAMEBUFFER))
- compute_cliprect(cell);
-#endif
-
- cell_emit_state(cell);
-
- cell->dirty = 0;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
-#include "util/u_format.h"
-#include "cell_context.h"
-#include "cell_gen_fragment.h"
-#include "cell_state.h"
-#include "cell_state_emit.h"
-#include "cell_batch.h"
-#include "cell_texture.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-
-
-/**
- * Find/create a cell_command_fragment_ops object corresponding to the
- * current blend/stencil/z/colormask/etc. state.
- */
-static struct cell_command_fragment_ops *
-lookup_fragment_ops(struct cell_context *cell)
-{
- struct cell_fragment_ops_key key;
- struct cell_command_fragment_ops *ops;
-
- /*
- * Build key
- */
- memset(&key, 0, sizeof(key));
- key.blend = *cell->blend;
- key.blend_color = cell->blend_color;
- key.dsa = *cell->depth_stencil;
-
- if (cell->framebuffer.cbufs[0])
- key.color_format = cell->framebuffer.cbufs[0]->format;
- else
- key.color_format = PIPE_FORMAT_NONE;
-
- if (cell->framebuffer.zsbuf)
- key.zs_format = cell->framebuffer.zsbuf->format;
- else
- key.zs_format = PIPE_FORMAT_NONE;
-
- /*
- * Look up key in cache.
- */
- ops = (struct cell_command_fragment_ops *)
- util_keymap_lookup(cell->fragment_ops_cache, &key);
-
- /*
- * If not found, create/save new fragment ops command.
- */
- if (!ops) {
- struct spe_function spe_code_front, spe_code_back;
- unsigned int facing_dependent, total_code_size;
-
- if (0)
- debug_printf("**** Create New Fragment Ops\n");
-
- /* Prepare the buffer that will hold the generated code. The
- * "0" passed in for the size means that the SPE code will
- * use a default size.
- */
- spe_init_func(&spe_code_front, 0);
- spe_init_func(&spe_code_back, 0);
-
- /* Generate new code. Always generate new code for both front-facing
- * and back-facing fragments, even if it's the same code in both
- * cases.
- */
- cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front);
- cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back);
-
- /* Make sure the code is a multiple of 8 bytes long; this is
- * required to ensure that the dual pipe instruction alignment
- * is correct. It's also important for the SPU unpacking,
- * which assumes 8-byte boundaries.
- */
- unsigned int front_code_size = spe_code_size(&spe_code_front);
- while (front_code_size % 8 != 0) {
- spe_lnop(&spe_code_front);
- front_code_size = spe_code_size(&spe_code_front);
- }
- unsigned int back_code_size = spe_code_size(&spe_code_back);
- while (back_code_size % 8 != 0) {
- spe_lnop(&spe_code_back);
- back_code_size = spe_code_size(&spe_code_back);
- }
-
- /* Determine whether the code we generated is facing-dependent, by
- * determining whether the generated code is different for the front-
- * and back-facing fragments.
- */
- if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) {
- /* Code is identical; only need one copy. */
- facing_dependent = 0;
- total_code_size = front_code_size;
- }
- else {
- /* Code is different for front-facing and back-facing fragments.
- * Need to send both copies.
- */
- facing_dependent = 1;
- total_code_size = front_code_size + back_code_size;
- }
-
- /* alloc new fragment ops command. Note that this structure
- * has variant length based on the total code size required.
- */
- ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size);
- /* populate the new cell_command_fragment_ops object */
- ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS;
- ops->total_code_size = total_code_size;
- ops->front_code_index = 0;
- memcpy(ops->code, spe_code_front.store, front_code_size);
- if (facing_dependent) {
- /* We have separate front- and back-facing code. Append the
- * back-facing code to the buffer. Be careful because the code
- * size is in bytes, but the buffer is of unsigned elements.
- */
- ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]);
- memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size);
- }
- else {
- /* Use the same code for front- and back-facing fragments */
- ops->back_code_index = ops->front_code_index;
- }
-
- /* Set the fields for the fallback case. Note that these fields
- * (and the whole fallback case) will eventually go away.
- */
- ops->dsa = *cell->depth_stencil;
- ops->blend = *cell->blend;
- ops->blend_color = cell->blend_color;
-
- /* insert cell_command_fragment_ops object into keymap/cache */
- util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL);
-
- /* release rtasm buffer */
- spe_release_func(&spe_code_front);
- spe_release_func(&spe_code_back);
- }
- else {
- if (0)
- debug_printf("**** Re-use Fragment Ops\n");
- }
-
- return ops;
-}
-
-
-
-static void
-emit_state_cmd(struct cell_context *cell, uint cmd,
- const void *state, uint state_size)
-{
- uint32_t *dst = (uint32_t *)
- cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size));
- *dst = cmd;
- memcpy(dst + 4, state, state_size);
-}
-
-
-/**
- * For state marked as 'dirty', construct a state-update command block
- * and insert it into the current batch buffer.
- */
-void
-cell_emit_state(struct cell_context *cell)
-{
- if (cell->dirty & CELL_NEW_FRAMEBUFFER) {
- struct pipe_surface *cbuf = cell->framebuffer.cbufs[0];
- struct pipe_surface *zbuf = cell->framebuffer.zsbuf;
- STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0);
- struct cell_command_framebuffer *fb
- = cell_batch_alloc16(cell, sizeof(*fb));
- fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER;
- fb->color_start = cell->cbuf_map[0];
- fb->color_format = cbuf->format;
- fb->depth_start = cell->zsbuf_map;
- fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE;
- fb->width = cell->framebuffer.width;
- fb->height = cell->framebuffer.height;
-#if 0
- printf("EMIT color format %s\n", util_format_name(fb->color_format));
- printf("EMIT depth format %s\n", util_format_name(fb->depth_format));
-#endif
- }
-
- if (cell->dirty & (CELL_NEW_RASTERIZER)) {
- STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0);
- struct cell_command_rasterizer *rast =
- cell_batch_alloc16(cell, sizeof(*rast));
- rast->opcode[0] = CELL_CMD_STATE_RASTERIZER;
- rast->rasterizer = *cell->rasterizer;
- }
-
- if (cell->dirty & (CELL_NEW_FS)) {
- /* Send new fragment program to SPUs */
- STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0);
- struct cell_command_fragment_program *fp
- = cell_batch_alloc16(cell, sizeof(*fp));
- fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM;
- fp->num_inst = cell->fs->code.num_inst;
- memcpy(&fp->code, cell->fs->code.store,
- SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
- if (0) {
- int i;
- printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n");
- for (i = 0; i < fp->num_inst; i++) {
- printf(" %3d: 0x%08x\n", i, fp->code[i]);
- }
- }
- }
-
- if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) {
- const uint shader = PIPE_SHADER_FRAGMENT;
- const uint num_const = cell->constants[shader]->width0 / sizeof(float);
- uint i, j;
- float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float)));
- uint32_t *ibuf = (uint32_t *) buf;
- const float *constants = cell->mapped_constants[shader];
- ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS;
- ibuf[4] = num_const;
- j = 8;
- for (i = 0; i < num_const; i++) {
- buf[j++] = constants[i];
- }
- }
-
- if (cell->dirty & (CELL_NEW_FRAMEBUFFER |
- CELL_NEW_DEPTH_STENCIL |
- CELL_NEW_BLEND)) {
- struct cell_command_fragment_ops *fops, *fops_cmd;
- /* Note that cell_command_fragment_ops is a variant-sized record */
- fops = lookup_fragment_ops(cell);
- fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size));
- memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size);
- }
-
- if (cell->dirty & CELL_NEW_SAMPLER) {
- uint i;
- for (i = 0; i < CELL_MAX_SAMPLERS; i++) {
- if (cell->dirty_samplers & (1 << i)) {
- if (cell->sampler[i]) {
- STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0);
- struct cell_command_sampler *sampler
- = cell_batch_alloc16(cell, sizeof(*sampler));
- sampler->opcode[0] = CELL_CMD_STATE_SAMPLER;
- sampler->unit = i;
- sampler->state = *cell->sampler[i];
- }
- }
- }
- cell->dirty_samplers = 0x0;
- }
-
- if (cell->dirty & CELL_NEW_TEXTURE) {
- uint i;
- for (i = 0;i < CELL_MAX_SAMPLERS; i++) {
- if (cell->dirty_textures & (1 << i)) {
- STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0);
- struct cell_command_texture *texture =
- (struct cell_command_texture *)
- cell_batch_alloc16(cell, sizeof(*texture));
-
- texture->opcode[0] = CELL_CMD_STATE_TEXTURE;
- texture->unit = i;
- if (cell->texture[i]) {
- struct cell_resource *ct = cell->texture[i];
- uint level;
- for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
- texture->start[level] = (ct->mapped +
- ct->level_offset[level]);
- texture->width[level] = u_minify(ct->base.width0, level);
- texture->height[level] = u_minify(ct->base.height0, level);
- texture->depth[level] = u_minify(ct->base.depth0, level);
- }
- texture->target = ct->base.target;
- }
- else {
- uint level;
- for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
- texture->start[level] = NULL;
- texture->width[level] = 0;
- texture->height[level] = 0;
- texture->depth[level] = 0;
- }
- texture->target = 0;
- }
- }
- }
- cell->dirty_textures = 0x0;
- }
-
- if (cell->dirty & CELL_NEW_VERTEX_INFO) {
- emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
- &cell->vertex_info, sizeof(struct vertex_info));
- }
-
-#if 0
- if (cell->dirty & CELL_NEW_VS) {
- const struct draw_context *const draw = cell->draw;
- struct cell_shader_info info;
-
- info.num_outputs = draw_num_shader_outputs(draw);
- info.declarations = (uintptr_t) draw->vs.machine.Declarations;
- info.num_declarations = draw->vs.machine.NumDeclarations;
- info.instructions = (uintptr_t) draw->vs.machine.Instructions;
- info.num_instructions = draw->vs.machine.NumInstructions;
- info.immediates = (uintptr_t) draw->vs.machine.Imms;
- info.num_immediates = draw->vs.machine.ImmLimit / 4;
-
- emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info));
- }
-#endif
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef CELL_STATE_EMIT_H
-#define CELL_STATE_EMIT_H
-
-
-extern void
-cell_emit_state(struct cell_context *cell);
-
-
-#endif /* CELL_STATE_EMIT_H */
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file
- * Generate code to perform all per-fragment operations.
- *
- * Code generated by these functions perform both alpha, depth, and stencil
- * testing as well as alpha blending.
- *
- * \note
- * Occlusion query is not supported, but this is the right place to add that
- * support.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- */
-
-#include "pipe/p_defines.h"
-#include "pipe/p_state.h"
-
-#include "cell_context.h"
-
-#include "rtasm/rtasm_ppc_spe.h"
-
-
-/**
- * Generate code to perform alpha testing.
- *
- * The code generated by this function uses the register specificed by
- * \c mask as both an input and an output.
- *
- * \param dsa Current alpha-test state
- * \param f Function to which code should be appended
- * \param mask Index of register containing active fragment mask
- * \param alphas Index of register containing per-fragment alpha values
- *
- * \note Emits a maximum of 6 instructions.
- */
-static void
-emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
- struct spe_function *f, int mask, int alphas)
-{
- /* If the alpha function is either NEVER or ALWAYS, there is no need to
- * load the reference value into a register. ALWAYS is a fairly common
- * case, and this optimization saves 2 instructions.
- */
- if (dsa->alpha.enabled
- && (dsa->alpha.func != PIPE_FUNC_NEVER)
- && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
- int ref = spe_allocate_available_register(f);
- int tmp_a = spe_allocate_available_register(f);
- int tmp_b = spe_allocate_available_register(f);
- union {
- float f;
- unsigned u;
- } ref_val;
- boolean complement = FALSE;
-
- ref_val.f = dsa->alpha.ref;
-
- spe_il(f, ref, ref_val.u & 0x0000ffff);
- spe_ilh(f, ref, ref_val.u >> 16);
-
- switch (dsa->alpha.func) {
- case PIPE_FUNC_NOTEQUAL:
- complement = TRUE;
- /* FALLTHROUGH */
-
- case PIPE_FUNC_EQUAL:
- spe_fceq(f, tmp_a, ref, alphas);
- break;
-
- case PIPE_FUNC_LEQUAL:
- complement = TRUE;
- /* FALLTHROUGH */
-
- case PIPE_FUNC_GREATER:
- spe_fcgt(f, tmp_a, ref, alphas);
- break;
-
- case PIPE_FUNC_LESS:
- complement = TRUE;
- /* FALLTHROUGH */
-
- case PIPE_FUNC_GEQUAL:
- spe_fcgt(f, tmp_a, ref, alphas);
- spe_fceq(f, tmp_b, ref, alphas);
- spe_or(f, tmp_a, tmp_b, tmp_a);
- break;
-
- case PIPE_FUNC_ALWAYS:
- case PIPE_FUNC_NEVER:
- default:
- assert(0);
- break;
- }
-
- if (complement) {
- spe_andc(f, mask, mask, tmp_a);
- } else {
- spe_and(f, mask, mask, tmp_a);
- }
-
- spe_release_register(f, ref);
- spe_release_register(f, tmp_a);
- spe_release_register(f, tmp_b);
- } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
- spe_il(f, mask, 0);
- }
-}
-
-
-/**
- * Generate code to perform Z testing. Four Z values are tested at once.
- * \param dsa Current depth-test state
- * \param f Function to which code should be appended
- * \param mask Index of register to contain depth-pass mask
- * \param stored Index of register containing values from depth buffer
- * \param calculated Index of register containing per-fragment depth values
- *
- * \return
- * If the calculated depth comparison mask is the actual mask, \c FALSE is
- * returned. If the calculated depth comparison mask is the compliment of
- * the actual mask, \c TRUE is returned.
- *
- * \note Emits a maximum of 3 instructions.
- */
-static boolean
-emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
- struct spe_function *f, int mask, int stored, int calculated)
-{
- unsigned func = (dsa->depth.enabled)
- ? dsa->depth.func : PIPE_FUNC_ALWAYS;
- int tmp = spe_allocate_available_register(f);
- boolean compliment = FALSE;
-
- switch (func) {
- case PIPE_FUNC_NEVER:
- spe_il(f, mask, 0);
- break;
-
- case PIPE_FUNC_NOTEQUAL:
- compliment = TRUE;
- /* FALLTHROUGH */
- case PIPE_FUNC_EQUAL:
- spe_ceq(f, mask, calculated, stored);
- break;
-
- case PIPE_FUNC_LEQUAL:
- compliment = TRUE;
- /* FALLTHROUGH */
- case PIPE_FUNC_GREATER:
- spe_clgt(f, mask, calculated, stored);
- break;
-
- case PIPE_FUNC_LESS:
- compliment = TRUE;
- /* FALLTHROUGH */
- case PIPE_FUNC_GEQUAL:
- spe_clgt(f, mask, calculated, stored);
- spe_ceq(f, tmp, calculated, stored);
- spe_or(f, mask, mask, tmp);
- break;
-
- case PIPE_FUNC_ALWAYS:
- spe_il(f, mask, ~0);
- break;
-
- default:
- assert(0);
- break;
- }
-
- spe_release_register(f, tmp);
- return compliment;
-}
-
-
-/**
- * Generate code to apply the stencil operation (after testing).
- * \note Emits a maximum of 5 instructions.
- *
- * \warning
- * Since \c out and \c in might be the same register, this routine cannot
- * generate code that uses \c out as a temporary.
- */
-static void
-emit_stencil_op(struct spe_function *f,
- int out, int in, int mask, unsigned op, unsigned ref)
-{
- const int clamp = spe_allocate_available_register(f);
- const int clamp_mask = spe_allocate_available_register(f);
- const int result = spe_allocate_available_register(f);
-
- switch(op) {
- case PIPE_STENCIL_OP_KEEP:
- assert(0);
- case PIPE_STENCIL_OP_ZERO:
- spe_il(f, result, 0);
- break;
- case PIPE_STENCIL_OP_REPLACE:
- spe_il(f, result, ref);
- break;
- case PIPE_STENCIL_OP_INCR:
- /* clamp = [0xff, 0xff, 0xff, 0xff] */
- spe_il(f, clamp, 0x0ff);
- /* result[i] = in[i] + 1 */
- spe_ai(f, result, in, 1);
- /* clamp_mask[i] = (result[i] > 0xff) */
- spe_clgti(f, clamp_mask, result, 0x0ff);
- /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
- spe_selb(f, result, result, clamp, clamp_mask);
- break;
- case PIPE_STENCIL_OP_DECR:
- spe_il(f, clamp, 0);
- spe_ai(f, result, in, -1);
-
- /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
- * arithmetic.
- */
- spe_clgti(f, clamp_mask, result, 0x0ff);
- spe_selb(f, result, result, clamp, clamp_mask);
- break;
- case PIPE_STENCIL_OP_INCR_WRAP:
- spe_ai(f, result, in, 1);
- break;
- case PIPE_STENCIL_OP_DECR_WRAP:
- spe_ai(f, result, in, -1);
- break;
- case PIPE_STENCIL_OP_INVERT:
- spe_nor(f, result, in, in);
- break;
- default:
- assert(0);
- }
-
- spe_selb(f, out, in, result, mask);
-
- spe_release_register(f, result);
- spe_release_register(f, clamp_mask);
- spe_release_register(f, clamp);
-}
-
-
-/**
- * Generate code to do stencil test. Four pixels are tested at once.
- * \param dsa Depth / stencil test state
- * \param face 0 for front face, 1 for back face
- * \param f Function to append instructions to
- * \param mask Register containing mask of fragments passing the
- * alpha test
- * \param depth_mask Register containing mask of fragments passing the
- * depth test
- * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
- * \param stencil Register containing values from stencil buffer
- * \param depth_pass Register to store mask of fragments passing stencil test
- * and depth test
- *
- * \note
- * Emits a maximum of 10 + (3 * 5) = 25 instructions.
- */
-static int
-emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
- struct pipe_stencil_ref *sr,
- unsigned face,
- struct spe_function *f,
- int mask,
- int depth_mask,
- boolean depth_complement,
- int stencil,
- int depth_pass)
-{
- int stencil_fail = spe_allocate_available_register(f);
- int depth_fail = spe_allocate_available_register(f);
- int stencil_mask = spe_allocate_available_register(f);
- int stencil_pass = spe_allocate_available_register(f);
- int face_stencil = spe_allocate_available_register(f);
- int stencil_src = stencil;
- const unsigned ref = (sr->ref_value[face]
- & dsa->stencil[face].valuemask);
- boolean complement = FALSE;
- int stored;
- int tmp = spe_allocate_available_register(f);
-
-
- if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
- && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
- && (dsa->stencil[face].valuemask != 0x0ff)) {
- stored = spe_allocate_available_register(f);
- spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
- } else {
- stored = stencil;
- }
-
-
- switch (dsa->stencil[face].func) {
- case PIPE_FUNC_NEVER:
- spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
- break;
-
- case PIPE_FUNC_NOTEQUAL:
- complement = TRUE;
- /* FALLTHROUGH */
- case PIPE_FUNC_EQUAL:
- /* stencil_mask[i] = (stored[i] == ref) */
- spe_ceqi(f, stencil_mask, stored, ref);
- break;
-
- case PIPE_FUNC_LEQUAL:
- complement = TRUE;
- /* FALLTHROUGH */
- case PIPE_FUNC_GREATER:
- complement = TRUE;
- /* stencil_mask[i] = (stored[i] > ref) */
- spe_clgti(f, stencil_mask, stored, ref);
- break;
-
- case PIPE_FUNC_LESS:
- complement = TRUE;
- /* FALLTHROUGH */
- case PIPE_FUNC_GEQUAL:
- /* stencil_mask[i] = (stored[i] > ref) */
- spe_clgti(f, stencil_mask, stored, ref);
- /* tmp[i] = (stored[i] == ref) */
- spe_ceqi(f, tmp, stored, ref);
- /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
- spe_or(f, stencil_mask, stencil_mask, tmp);
- break;
-
- case PIPE_FUNC_ALWAYS:
- /* See comment below. */
- break;
-
- default:
- assert(0);
- break;
- }
-
- if (stored != stencil) {
- spe_release_register(f, stored);
- }
- spe_release_register(f, tmp);
-
-
- /* ALWAYS is a very common stencil-test, so some effort is applied to
- * optimize that case. The stencil-pass mask is the same as the input
- * fragment mask. This makes the stencil-test (above) a no-op, and the
- * input fragment mask can be "renamed" the stencil-pass mask.
- */
- if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
- spe_release_register(f, stencil_pass);
- stencil_pass = mask;
- } else {
- if (complement) {
- spe_andc(f, stencil_pass, mask, stencil_mask);
- } else {
- spe_and(f, stencil_pass, mask, stencil_mask);
- }
- }
-
- if (depth_complement) {
- spe_andc(f, depth_pass, stencil_pass, depth_mask);
- } else {
- spe_and(f, depth_pass, stencil_pass, depth_mask);
- }
-
-
- /* Conditionally emit code to update the stencil value under various
- * condititons. Note that there is no need to generate code under the
- * following circumstances:
- *
- * - Stencil write mask is zero.
- * - For stencil-fail if the stencil test is ALWAYS
- * - For depth-fail if the stencil test is NEVER
- * - For depth-pass if the stencil test is NEVER
- * - Any of the 3 conditions if the operation is KEEP
- */
- if (dsa->stencil[face].writemask != 0) {
- if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
- && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
- if (complement) {
- spe_and(f, stencil_fail, mask, stencil_mask);
- } else {
- spe_andc(f, stencil_fail, mask, stencil_mask);
- }
-
- emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
- dsa->stencil[face].fail_op,
- sr->ref_value[face]);
-
- stencil_src = face_stencil;
- }
-
- if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
- && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
- if (depth_complement) {
- spe_and(f, depth_fail, stencil_pass, depth_mask);
- } else {
- spe_andc(f, depth_fail, stencil_pass, depth_mask);
- }
-
- emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
- dsa->stencil[face].zfail_op,
- sr->ref_value[face]);
- stencil_src = face_stencil;
- }
-
- if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
- && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
- emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
- dsa->stencil[face].zpass_op,
- sr->ref_value[face]);
- stencil_src = face_stencil;
- }
- }
-
- spe_release_register(f, stencil_fail);
- spe_release_register(f, depth_fail);
- spe_release_register(f, stencil_mask);
- if (stencil_pass != mask) {
- spe_release_register(f, stencil_pass);
- }
-
- /* If all of the stencil operations were KEEP or the stencil write mask was
- * zero, "stencil_src" will still be set to "stencil". In this case
- * release the "face_stencil" register. Otherwise apply the stencil write
- * mask to select bits from the calculated stencil value and the previous
- * stencil value.
- */
- if (stencil_src == stencil) {
- spe_release_register(f, face_stencil);
- } else if (dsa->stencil[face].writemask != 0x0ff) {
- int tmp = spe_allocate_available_register(f);
-
- spe_il(f, tmp, dsa->stencil[face].writemask);
- spe_selb(f, stencil_src, stencil, stencil_src, tmp);
-
- spe_release_register(f, tmp);
- }
-
- return stencil_src;
-}
-
-
-void
-cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa,
- struct pipe_stencil_ref *sr)
-{
- struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
- struct spe_function *const f = &cdsa->code;
-
- /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
- * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
- * up to 64 to make it a happy power-of-two.
- */
- spe_init_func(f, SPE_INST_SIZE * 64);
-
-
- /* Allocate registers for the function's input parameters. Cleverly (and
- * clever code is usually dangerous, but I couldn't resist) the generated
- * function returns a structure. Returned structures start with register
- * 3, and the structure fields are ordered to match up exactly with the
- * input parameters.
- */
- int mask = spe_allocate_register(f, 3);
- int depth = spe_allocate_register(f, 4);
- int stencil = spe_allocate_register(f, 5);
- int zvals = spe_allocate_register(f, 6);
- int frag_a = spe_allocate_register(f, 7);
- int facing = spe_allocate_register(f, 8);
-
- int depth_mask = spe_allocate_available_register(f);
-
- boolean depth_complement;
-
-
- emit_alpha_test(dsa, f, mask, frag_a);
-
- depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
-
- if (dsa->stencil[0].enabled) {
- const int front_depth_pass = spe_allocate_available_register(f);
- int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask,
- depth_mask, depth_complement,
- stencil, front_depth_pass);
-
- if (dsa->stencil[1].enabled) {
- const int back_depth_pass = spe_allocate_available_register(f);
- int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask,
- depth_mask, depth_complement,
- stencil, back_depth_pass);
-
- /* If the front facing stencil value and the back facing stencil
- * value are stored in the same register, there is no need to select
- * a value based on the facing. This can happen if the stencil value
- * was not modified due to the write masks being zero, the stencil
- * operations being KEEP, etc.
- */
- if (front_stencil != back_stencil) {
- spe_selb(f, stencil, back_stencil, front_stencil, facing);
- }
-
- if (back_stencil != stencil) {
- spe_release_register(f, back_stencil);
- }
-
- if (front_stencil != stencil) {
- spe_release_register(f, front_stencil);
- }
-
- spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
-
- spe_release_register(f, back_depth_pass);
- } else {
- if (front_stencil != stencil) {
- spe_or(f, stencil, front_stencil, front_stencil);
- spe_release_register(f, front_stencil);
- }
- spe_or(f, mask, front_depth_pass, front_depth_pass);
- }
-
- spe_release_register(f, front_depth_pass);
- } else if (dsa->depth.enabled) {
- if (depth_complement) {
- spe_andc(f, mask, mask, depth_mask);
- } else {
- spe_and(f, mask, mask, depth_mask);
- }
- }
-
- if (dsa->depth.writemask) {
- spe_selb(f, depth, depth, zvals, mask);
- }
-
- spe_bi(f, 0, 0, 0); /* return from function call */
-
-
-#if 0
- {
- const uint32_t *p = f->store;
- unsigned i;
-
- printf("# alpha (%sabled)\n",
- (dsa->alpha.enabled) ? "en" : "dis");
- printf("# func: %u\n", dsa->alpha.func);
- printf("# ref: %.2f\n", dsa->alpha.ref);
-
- printf("# depth (%sabled)\n",
- (dsa->depth.enabled) ? "en" : "dis");
- printf("# func: %u\n", dsa->depth.func);
-
- for (i = 0; i < 2; i++) {
- printf("# %s stencil (%sabled)\n",
- (i == 0) ? "front" : "back",
- (dsa->stencil[i].enabled) ? "en" : "dis");
-
- printf("# func: %u\n", dsa->stencil[i].func);
- printf("# op (sf, zf, zp): %u %u %u\n",
- dsa->stencil[i].fail_op,
- dsa->stencil[i].zfail_op,
- dsa->stencil[i].zpass_op);
- printf("# ref value / value mask / write mask: %02x %02x %02x\n",
- sr->ref_value[i],
- dsa->stencil[i].valuemask,
- dsa->stencil[i].writemask);
- }
-
- printf("\t.text\n");
- for (/* empty */; p < f->csr; p++) {
- printf("\t.long\t0x%04x\n", *p);
- }
- fflush(stdout);
- }
-#endif
-}
-
-
-/**
- * \note Emits a maximum of 3 instructions
- */
-static int
-emit_alpha_factor_calculation(struct spe_function *f,
- unsigned factor,
- int src_alpha, int dst_alpha, int const_alpha)
-{
- int factor_reg;
- int tmp;
-
-
- switch (factor) {
- case PIPE_BLENDFACTOR_ONE:
- factor_reg = -1;
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- factor_reg = spe_allocate_available_register(f);
-
- spe_or(f, factor_reg, src_alpha, src_alpha);
- break;
-
- case PIPE_BLENDFACTOR_DST_ALPHA:
- factor_reg = dst_alpha;
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- factor_reg = -1;
- break;
-
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- factor_reg = spe_allocate_available_register(f);
-
- tmp = spe_allocate_available_register(f);
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, factor_reg, tmp, const_alpha);
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- factor_reg = const_alpha;
- break;
-
- case PIPE_BLENDFACTOR_ZERO:
- factor_reg = -1;
- break;
-
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- tmp = spe_allocate_available_register(f);
- factor_reg = spe_allocate_available_register(f);
-
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, factor_reg, tmp, src_alpha);
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- tmp = spe_allocate_available_register(f);
- factor_reg = spe_allocate_available_register(f);
-
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, factor_reg, tmp, dst_alpha);
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- default:
- assert(0);
- factor_reg = -1;
- break;
- }
-
- return factor_reg;
-}
-
-
-/**
- * \note Emits a maximum of 6 instructions
- */
-static void
-emit_color_factor_calculation(struct spe_function *f,
- unsigned sF, unsigned mask,
- const int *src,
- const int *dst,
- const int *const_color,
- int *factor)
-{
- int tmp;
- unsigned i;
-
-
- factor[0] = -1;
- factor[1] = -1;
- factor[2] = -1;
- factor[3] = -1;
-
- switch (sF) {
- case PIPE_BLENDFACTOR_ONE:
- break;
-
- case PIPE_BLENDFACTOR_SRC_COLOR:
- for (i = 0; i < 3; ++i) {
- if ((mask & (1U << i)) != 0) {
- factor[i] = spe_allocate_available_register(f);
- spe_or(f, factor[i], src[i], src[i]);
- }
- }
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- factor[0] = spe_allocate_available_register(f);
- factor[1] = factor[0];
- factor[2] = factor[0];
-
- spe_or(f, factor[0], src[3], src[3]);
- break;
-
- case PIPE_BLENDFACTOR_DST_ALPHA:
- factor[0] = dst[3];
- factor[1] = dst[3];
- factor[2] = dst[3];
- break;
-
- case PIPE_BLENDFACTOR_DST_COLOR:
- factor[0] = dst[0];
- factor[1] = dst[1];
- factor[2] = dst[2];
- break;
-
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- tmp = spe_allocate_available_register(f);
- factor[0] = spe_allocate_available_register(f);
- factor[1] = factor[0];
- factor[2] = factor[0];
-
- /* Alpha saturate means min(As, 1-Ad).
- */
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, tmp, tmp, dst[3]);
- spe_fcgt(f, factor[0], tmp, src[3]);
- spe_selb(f, factor[0], src[3], tmp, factor[0]);
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_INV_CONST_COLOR:
- tmp = spe_allocate_available_register(f);
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
-
- for (i = 0; i < 3; i++) {
- factor[i] = spe_allocate_available_register(f);
-
- spe_fs(f, factor[i], tmp, const_color[i]);
- }
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_CONST_COLOR:
- for (i = 0; i < 3; i++) {
- factor[i] = const_color[i];
- }
- break;
-
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
- factor[0] = spe_allocate_available_register(f);
- factor[1] = factor[0];
- factor[2] = factor[0];
-
- tmp = spe_allocate_available_register(f);
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, factor[0], tmp, const_color[3]);
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- factor[0] = const_color[3];
- factor[1] = factor[0];
- factor[2] = factor[0];
- break;
-
- case PIPE_BLENDFACTOR_ZERO:
- break;
-
- case PIPE_BLENDFACTOR_INV_SRC_COLOR:
- tmp = spe_allocate_available_register(f);
-
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
-
- for (i = 0; i < 3; ++i) {
- if ((mask & (1U << i)) != 0) {
- factor[i] = spe_allocate_available_register(f);
- spe_fs(f, factor[i], tmp, src[i]);
- }
- }
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- tmp = spe_allocate_available_register(f);
- factor[0] = spe_allocate_available_register(f);
- factor[1] = factor[0];
- factor[2] = factor[0];
-
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, factor[0], tmp, src[3]);
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_INV_DST_ALPHA:
- tmp = spe_allocate_available_register(f);
- factor[0] = spe_allocate_available_register(f);
- factor[1] = factor[0];
- factor[2] = factor[0];
-
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
- spe_fs(f, factor[0], tmp, dst[3]);
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_INV_DST_COLOR:
- tmp = spe_allocate_available_register(f);
-
- spe_il(f, tmp, 1);
- spe_cuflt(f, tmp, tmp, 0);
-
- for (i = 0; i < 3; ++i) {
- if ((mask & (1U << i)) != 0) {
- factor[i] = spe_allocate_available_register(f);
- spe_fs(f, factor[i], tmp, dst[i]);
- }
- }
-
- spe_release_register(f, tmp);
- break;
-
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- case PIPE_BLENDFACTOR_SRC1_ALPHA:
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
- default:
- assert(0);
- }
-}
-
-
-static void
-emit_blend_calculation(struct spe_function *f,
- unsigned func, unsigned sF, unsigned dF,
- int src, int src_factor, int dst, int dst_factor)
-{
- int tmp = spe_allocate_available_register(f);
-
- switch (func) {
- case PIPE_BLEND_ADD:
- if (sF == PIPE_BLENDFACTOR_ONE) {
- if (dF == PIPE_BLENDFACTOR_ZERO) {
- /* Do nothing. */
- } else if (dF == PIPE_BLENDFACTOR_ONE) {
- spe_fa(f, src, src, dst);
- }
- } else if (sF == PIPE_BLENDFACTOR_ZERO) {
- if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_il(f, src, 0);
- } else if (dF == PIPE_BLENDFACTOR_ONE) {
- spe_or(f, src, dst, dst);
- } else {
- spe_fm(f, src, dst, dst_factor);
- }
- } else if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_fm(f, src, src, src_factor);
- } else {
- spe_fm(f, tmp, dst, dst_factor);
- spe_fma(f, src, src, src_factor, tmp);
- }
- break;
-
- case PIPE_BLEND_SUBTRACT:
- if (sF == PIPE_BLENDFACTOR_ONE) {
- if (dF == PIPE_BLENDFACTOR_ZERO) {
- /* Do nothing. */
- } else if (dF == PIPE_BLENDFACTOR_ONE) {
- spe_fs(f, src, src, dst);
- }
- } else if (sF == PIPE_BLENDFACTOR_ZERO) {
- if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_il(f, src, 0);
- } else if (dF == PIPE_BLENDFACTOR_ONE) {
- spe_il(f, tmp, 0);
- spe_fs(f, src, tmp, dst);
- } else {
- spe_fm(f, src, dst, dst_factor);
- }
- } else if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_fm(f, src, src, src_factor);
- } else {
- spe_fm(f, tmp, dst, dst_factor);
- spe_fms(f, src, src, src_factor, tmp);
- }
- break;
-
- case PIPE_BLEND_REVERSE_SUBTRACT:
- if (sF == PIPE_BLENDFACTOR_ONE) {
- if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_il(f, tmp, 0);
- spe_fs(f, src, tmp, src);
- } else if (dF == PIPE_BLENDFACTOR_ONE) {
- spe_fs(f, src, dst, src);
- }
- } else if (sF == PIPE_BLENDFACTOR_ZERO) {
- if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_il(f, src, 0);
- } else if (dF == PIPE_BLENDFACTOR_ONE) {
- spe_or(f, src, dst, dst);
- } else {
- spe_fm(f, src, dst, dst_factor);
- }
- } else if (dF == PIPE_BLENDFACTOR_ZERO) {
- spe_fm(f, src, src, src_factor);
- } else {
- spe_fm(f, tmp, src, src_factor);
- spe_fms(f, src, src, dst_factor, tmp);
- }
- break;
-
- case PIPE_BLEND_MIN:
- spe_cgt(f, tmp, src, dst);
- spe_selb(f, src, src, dst, tmp);
- break;
-
- case PIPE_BLEND_MAX:
- spe_cgt(f, tmp, src, dst);
- spe_selb(f, src, dst, src, tmp);
- break;
-
- default:
- assert(0);
- }
-
- spe_release_register(f, tmp);
-}
-
-
-/**
- * Generate code to perform alpha blending on the SPE
- */
-void
-cell_generate_alpha_blend(struct cell_blend_state *cb)
-{
- struct pipe_blend_state *const b = &cb->base;
- struct spe_function *const f = &cb->code;
-
- /* This code generates a maximum of 3 (source alpha factor)
- * + 3 (destination alpha factor) + (3 * 6) (source color factor)
- * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
- * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
- * make it a happy power-of-two.
- */
- spe_init_func(f, SPE_INST_SIZE * 64);
-
-
- const int frag[4] = {
- spe_allocate_register(f, 3),
- spe_allocate_register(f, 4),
- spe_allocate_register(f, 5),
- spe_allocate_register(f, 6),
- };
- const int pixel[4] = {
- spe_allocate_register(f, 7),
- spe_allocate_register(f, 8),
- spe_allocate_register(f, 9),
- spe_allocate_register(f, 10),
- };
- const int const_color[4] = {
- spe_allocate_register(f, 11),
- spe_allocate_register(f, 12),
- spe_allocate_register(f, 13),
- spe_allocate_register(f, 14),
- };
- unsigned func[4];
- unsigned sF[4];
- unsigned dF[4];
- unsigned i;
- int src_factor[4];
- int dst_factor[4];
-
-
- /* Does the selected blend mode make use of the source / destination
- * color (RGB) blend factors?
- */
- boolean need_color_factor = b->rt[0].blend_enable
- && (b->rt[0].rgb_func != PIPE_BLEND_MIN)
- && (b->rt[0].rgb_func != PIPE_BLEND_MAX);
-
- /* Does the selected blend mode make use of the source / destination
- * alpha blend factors?
- */
- boolean need_alpha_factor = b->rt[0].blend_enable
- && (b->rt[0].alpha_func != PIPE_BLEND_MIN)
- && (b->rt[0].alpha_func != PIPE_BLEND_MAX);
-
-
- if (b->rt[0].blend_enable) {
- sF[0] = b->rt[0].rgb_src_factor;
- sF[1] = sF[0];
- sF[2] = sF[0];
- switch (b->rt[0].alpha_src_factor & 0x0f) {
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
- sF[3] = PIPE_BLENDFACTOR_ONE;
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- sF[3] = b->rt[0].alpha_src_factor + 1;
- break;
- default:
- sF[3] = b->rt[0].alpha_src_factor;
- }
-
- dF[0] = b->rt[0].rgb_dst_factor;
- dF[1] = dF[0];
- dF[2] = dF[0];
- switch (b->rt[0].alpha_dst_factor & 0x0f) {
- case PIPE_BLENDFACTOR_SRC_COLOR:
- case PIPE_BLENDFACTOR_DST_COLOR:
- case PIPE_BLENDFACTOR_CONST_COLOR:
- case PIPE_BLENDFACTOR_SRC1_COLOR:
- dF[3] = b->rt[0].alpha_dst_factor + 1;
- break;
- default:
- dF[3] = b->rt[0].alpha_dst_factor;
- }
-
- func[0] = b->rt[0].rgb_func;
- func[1] = func[0];
- func[2] = func[0];
- func[3] = b->rt[0].alpha_func;
- } else {
- sF[0] = PIPE_BLENDFACTOR_ONE;
- sF[1] = PIPE_BLENDFACTOR_ONE;
- sF[2] = PIPE_BLENDFACTOR_ONE;
- sF[3] = PIPE_BLENDFACTOR_ONE;
- dF[0] = PIPE_BLENDFACTOR_ZERO;
- dF[1] = PIPE_BLENDFACTOR_ZERO;
- dF[2] = PIPE_BLENDFACTOR_ZERO;
- dF[3] = PIPE_BLENDFACTOR_ZERO;
-
- func[0] = PIPE_BLEND_ADD;
- func[1] = PIPE_BLEND_ADD;
- func[2] = PIPE_BLEND_ADD;
- func[3] = PIPE_BLEND_ADD;
- }
-
-
- /* If alpha writing is enabled and the alpha blend mode requires use of
- * the alpha factor, calculate the alpha factor.
- */
- if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) {
- src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
- frag[3], pixel[3]);
-
- /* If the alpha destination blend factor is the same as the alpha source
- * blend factor, re-use the previously calculated value.
- */
- dst_factor[3] = (dF[3] == sF[3])
- ? src_factor[3]
- : emit_alpha_factor_calculation(f, dF[3], const_color[3],
- frag[3], pixel[3]);
- }
-
-
- if (sF[0] == sF[3]) {
- src_factor[0] = src_factor[3];
- src_factor[1] = src_factor[3];
- src_factor[2] = src_factor[3];
- } else if (sF[0] == dF[3]) {
- src_factor[0] = dst_factor[3];
- src_factor[1] = dst_factor[3];
- src_factor[2] = dst_factor[3];
- } else if (need_color_factor) {
- emit_color_factor_calculation(f,
- b->rt[0].rgb_src_factor,
- b->rt[0].colormask,
- frag, pixel, const_color, src_factor);
- }
-
-
- if (dF[0] == sF[3]) {
- dst_factor[0] = src_factor[3];
- dst_factor[1] = src_factor[3];
- dst_factor[2] = src_factor[3];
- } else if (dF[0] == dF[3]) {
- dst_factor[0] = dst_factor[3];
- dst_factor[1] = dst_factor[3];
- dst_factor[2] = dst_factor[3];
- } else if (dF[0] == sF[0]) {
- dst_factor[0] = src_factor[0];
- dst_factor[1] = src_factor[1];
- dst_factor[2] = src_factor[2];
- } else if (need_color_factor) {
- emit_color_factor_calculation(f,
- b->rt[0].rgb_dst_factor,
- b->rt[0].colormask,
- frag, pixel, const_color, dst_factor);
- }
-
-
-
- for (i = 0; i < 4; ++i) {
- if ((b->rt[0].colormask & (1U << i)) != 0) {
- emit_blend_calculation(f,
- func[i], sF[i], dF[i],
- frag[i], src_factor[i],
- pixel[i], dst_factor[i]);
- }
- }
-
- spe_bi(f, 0, 0, 0);
-
-#if 0
- {
- const uint32_t *p = f->store;
-
- printf("# %u instructions\n", f->csr - f->store);
- printf("# blend (%sabled)\n",
- (cb->base.blend_enable) ? "en" : "dis");
- printf("# RGB func / sf / df: %u %u %u\n",
- cb->base.rgb_func,
- cb->base.rgb_src_factor,
- cb->base.rgb_dst_factor);
- printf("# ALP func / sf / df: %u %u %u\n",
- cb->base.alpha_func,
- cb->base.alpha_src_factor,
- cb->base.alpha_dst_factor);
-
- printf("\t.text\n");
- for (/* empty */; p < f->csr; p++) {
- printf("\t.long\t0x%04x\n", *p);
- }
- fflush(stdout);
- }
-#endif
-}
-
-
-static int
-PC_OFFSET(const struct spe_function *f, const void *d)
-{
- const intptr_t pc = (intptr_t) &f->store[f->num_inst];
- const intptr_t ea = ~0x0f & (intptr_t) d;
-
- return (ea - pc) >> 2;
-}
-
-
-/**
- * Generate code to perform color conversion and logic op
- *
- * \bug
- * The code generated by this function should also perform dithering.
- *
- * \bug
- * The code generated by this function should also perform color-write
- * masking.
- *
- * \bug
- * Only two framebuffer formats are supported at this time.
- */
-void
-cell_generate_logic_op(struct spe_function *f,
- const struct pipe_blend_state *blend,
- struct pipe_surface *surf)
-{
- const unsigned logic_op = (blend->logicop_enable)
- ? blend->logicop_func : PIPE_LOGICOP_COPY;
-
- /* This code generates a maximum of 37 instructions. An additional 32
- * bytes (equiv. to 8 instructions) are needed for data storage. Round up
- * to 64 to make it a happy power-of-two.
- */
- spe_init_func(f, SPE_INST_SIZE * 64);
-
-
- /* Pixel colors in framebuffer format in AoS layout.
- */
- const int pixel[4] = {
- spe_allocate_register(f, 3),
- spe_allocate_register(f, 4),
- spe_allocate_register(f, 5),
- spe_allocate_register(f, 6),
- };
-
- /* Fragment colors stored as floats in SoA layout.
- */
- const int frag[4] = {
- spe_allocate_register(f, 7),
- spe_allocate_register(f, 8),
- spe_allocate_register(f, 9),
- spe_allocate_register(f, 10),
- };
-
- const int mask = spe_allocate_register(f, 11);
-
-
- /* Short-circuit the noop and invert cases.
- */
- if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) {
- spe_bi(f, 0, 0, 0);
- return;
- } else if (logic_op == PIPE_LOGICOP_INVERT) {
- spe_nor(f, pixel[0], pixel[0], pixel[0]);
- spe_nor(f, pixel[1], pixel[1], pixel[1]);
- spe_nor(f, pixel[2], pixel[2], pixel[2]);
- spe_nor(f, pixel[3], pixel[3], pixel[3]);
- spe_bi(f, 0, 0, 0);
- return;
- }
-
-
- const int tmp[4] = {
- spe_allocate_available_register(f),
- spe_allocate_available_register(f),
- spe_allocate_available_register(f),
- spe_allocate_available_register(f),
- };
-
- const int shuf_xpose_hi = spe_allocate_available_register(f);
- const int shuf_xpose_lo = spe_allocate_available_register(f);
- const int shuf_color = spe_allocate_available_register(f);
-
-
- /* Pointer to the begining of the function's private data area.
- */
- uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
-
-
- /* Convert fragment colors to framebuffer format in AoS layout.
- */
- switch (surf->format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- data[0] = 0x00010203;
- data[1] = 0x10111213;
- data[2] = 0x04050607;
- data[3] = 0x14151617;
- data[4] = 0x0c000408;
- data[5] = 0x80808080;
- data[6] = 0x80808080;
- data[7] = 0x80808080;
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- data[0] = 0x03020100;
- data[1] = 0x13121110;
- data[2] = 0x07060504;
- data[3] = 0x17161514;
- data[4] = 0x0804000c;
- data[5] = 0x80808080;
- data[6] = 0x80808080;
- data[7] = 0x80808080;
- break;
- default:
- fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
- ASSERT(0);
- }
-
- spe_ilh(f, tmp[0], 0x0808);
- spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
- spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
- spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
-
- spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
- spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
- spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
- spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
-
- spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
- spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
- spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
- spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
-
- spe_cfltu(f, frag[0], frag[0], 32);
- spe_cfltu(f, frag[1], frag[1], 32);
- spe_cfltu(f, frag[2], frag[2], 32);
- spe_cfltu(f, frag[3], frag[3], 32);
-
- spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
- spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
- spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
- spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
-
-
- /* If logic op is enabled, perform the requested logical operation on the
- * converted fragment colors and the pixel colors.
- */
- switch (logic_op) {
- case PIPE_LOGICOP_CLEAR:
- spe_il(f, frag[0], 0);
- spe_il(f, frag[1], 0);
- spe_il(f, frag[2], 0);
- spe_il(f, frag[3], 0);
- break;
- case PIPE_LOGICOP_NOR:
- spe_nor(f, frag[0], frag[0], pixel[0]);
- spe_nor(f, frag[1], frag[1], pixel[1]);
- spe_nor(f, frag[2], frag[2], pixel[2]);
- spe_nor(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_AND_INVERTED:
- spe_andc(f, frag[0], pixel[0], frag[0]);
- spe_andc(f, frag[1], pixel[1], frag[1]);
- spe_andc(f, frag[2], pixel[2], frag[2]);
- spe_andc(f, frag[3], pixel[3], frag[3]);
- break;
- case PIPE_LOGICOP_COPY_INVERTED:
- spe_nor(f, frag[0], frag[0], frag[0]);
- spe_nor(f, frag[1], frag[1], frag[1]);
- spe_nor(f, frag[2], frag[2], frag[2]);
- spe_nor(f, frag[3], frag[3], frag[3]);
- break;
- case PIPE_LOGICOP_AND_REVERSE:
- spe_andc(f, frag[0], frag[0], pixel[0]);
- spe_andc(f, frag[1], frag[1], pixel[1]);
- spe_andc(f, frag[2], frag[2], pixel[2]);
- spe_andc(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_XOR:
- spe_xor(f, frag[0], frag[0], pixel[0]);
- spe_xor(f, frag[1], frag[1], pixel[1]);
- spe_xor(f, frag[2], frag[2], pixel[2]);
- spe_xor(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_NAND:
- spe_nand(f, frag[0], frag[0], pixel[0]);
- spe_nand(f, frag[1], frag[1], pixel[1]);
- spe_nand(f, frag[2], frag[2], pixel[2]);
- spe_nand(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_AND:
- spe_and(f, frag[0], frag[0], pixel[0]);
- spe_and(f, frag[1], frag[1], pixel[1]);
- spe_and(f, frag[2], frag[2], pixel[2]);
- spe_and(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_EQUIV:
- spe_eqv(f, frag[0], frag[0], pixel[0]);
- spe_eqv(f, frag[1], frag[1], pixel[1]);
- spe_eqv(f, frag[2], frag[2], pixel[2]);
- spe_eqv(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_OR_INVERTED:
- spe_orc(f, frag[0], pixel[0], frag[0]);
- spe_orc(f, frag[1], pixel[1], frag[1]);
- spe_orc(f, frag[2], pixel[2], frag[2]);
- spe_orc(f, frag[3], pixel[3], frag[3]);
- break;
- case PIPE_LOGICOP_COPY:
- break;
- case PIPE_LOGICOP_OR_REVERSE:
- spe_orc(f, frag[0], frag[0], pixel[0]);
- spe_orc(f, frag[1], frag[1], pixel[1]);
- spe_orc(f, frag[2], frag[2], pixel[2]);
- spe_orc(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_OR:
- spe_or(f, frag[0], frag[0], pixel[0]);
- spe_or(f, frag[1], frag[1], pixel[1]);
- spe_or(f, frag[2], frag[2], pixel[2]);
- spe_or(f, frag[3], frag[3], pixel[3]);
- break;
- case PIPE_LOGICOP_SET:
- spe_il(f, frag[0], ~0);
- spe_il(f, frag[1], ~0);
- spe_il(f, frag[2], ~0);
- spe_il(f, frag[3], ~0);
- break;
-
- /* These two cases are short-circuited above.
- */
- case PIPE_LOGICOP_INVERT:
- case PIPE_LOGICOP_NOOP:
- default:
- assert(0);
- }
-
-
- /* Apply fragment mask.
- */
- spe_ilh(f, tmp[0], 0x0000);
- spe_ilh(f, tmp[1], 0x0404);
- spe_ilh(f, tmp[2], 0x0808);
- spe_ilh(f, tmp[3], 0x0c0c);
-
- spe_shufb(f, tmp[0], mask, mask, tmp[0]);
- spe_shufb(f, tmp[1], mask, mask, tmp[1]);
- spe_shufb(f, tmp[2], mask, mask, tmp[2]);
- spe_shufb(f, tmp[3], mask, mask, tmp[3]);
-
- spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
- spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
- spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
- spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
-
- spe_bi(f, 0, 0, 0);
-
-#if 0
- {
- const uint32_t *p = f->store;
- unsigned i;
-
- printf("# %u instructions\n", f->csr - f->store);
-
- printf("\t.text\n");
- for (i = 0; i < 64; i++) {
- printf("\t.long\t0x%04x\n", p[i]);
- }
- fflush(stdout);
- }
-#endif
-}
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef CELL_STATE_PER_FRAGMENT_H
-#define CELL_STATE_PER_FRAGMENT_H
-
-extern void
-cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa);
-
-extern void
-cell_generate_alpha_blend(struct cell_blend_state *cb);
-
-extern void
-cell_generate_logic_op(struct spe_function *f,
- const struct pipe_blend_state *blend,
- struct pipe_surface *surf);
-
-#endif /* CELL_STATE_PER_FRAGMENT_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "draw/draw_context.h"
-#include "tgsi/tgsi_parse.h"
-
-#include "cell_context.h"
-#include "cell_state.h"
-#include "cell_gen_fp.h"
-#include "cell_texture.h"
-
-
-/** cast wrapper */
-static INLINE struct cell_fragment_shader_state *
-cell_fragment_shader_state(void *shader)
-{
- return (struct cell_fragment_shader_state *) shader;
-}
-
-
-/** cast wrapper */
-static INLINE struct cell_vertex_shader_state *
-cell_vertex_shader_state(void *shader)
-{
- return (struct cell_vertex_shader_state *) shader;
-}
-
-
-/**
- * Create fragment shader state.
- * Called via pipe->create_fs_state()
- */
-static void *
-cell_create_fs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *templ)
-{
- struct cell_context *cell = cell_context(pipe);
- struct cell_fragment_shader_state *cfs;
-
- cfs = CALLOC_STRUCT(cell_fragment_shader_state);
- if (!cfs)
- return NULL;
-
- cfs->shader.tokens = tgsi_dup_tokens(templ->tokens);
- if (!cfs->shader.tokens) {
- FREE(cfs);
- return NULL;
- }
-
- tgsi_scan_shader(templ->tokens, &cfs->info);
-
- cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code);
-
- return cfs;
-}
-
-
-/**
- * Called via pipe->bind_fs_state()
- */
-static void
-cell_bind_fs_state(struct pipe_context *pipe, void *fs)
-{
- struct cell_context *cell = cell_context(pipe);
-
- cell->fs = cell_fragment_shader_state(fs);
-
- cell->dirty |= CELL_NEW_FS;
-}
-
-
-/**
- * Called via pipe->delete_fs_state()
- */
-static void
-cell_delete_fs_state(struct pipe_context *pipe, void *fs)
-{
- struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs);
-
- spe_release_func(&cfs->code);
-
- FREE((void *) cfs->shader.tokens);
- FREE(cfs);
-}
-
-
-/**
- * Create vertex shader state.
- * Called via pipe->create_vs_state()
- */
-static void *
-cell_create_vs_state(struct pipe_context *pipe,
- const struct pipe_shader_state *templ)
-{
- struct cell_context *cell = cell_context(pipe);
- struct cell_vertex_shader_state *cvs;
-
- cvs = CALLOC_STRUCT(cell_vertex_shader_state);
- if (!cvs)
- return NULL;
-
- cvs->shader.tokens = tgsi_dup_tokens(templ->tokens);
- if (!cvs->shader.tokens) {
- FREE(cvs);
- return NULL;
- }
-
- tgsi_scan_shader(templ->tokens, &cvs->info);
-
- cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader);
- if (cvs->draw_data == NULL) {
- FREE( (void *) cvs->shader.tokens );
- FREE( cvs );
- return NULL;
- }
-
- return cvs;
-}
-
-
-/**
- * Called via pipe->bind_vs_state()
- */
-static void
-cell_bind_vs_state(struct pipe_context *pipe, void *vs)
-{
- struct cell_context *cell = cell_context(pipe);
-
- cell->vs = cell_vertex_shader_state(vs);
-
- draw_bind_vertex_shader(cell->draw,
- (cell->vs ? cell->vs->draw_data : NULL));
-
- cell->dirty |= CELL_NEW_VS;
-}
-
-
-/**
- * Called via pipe->delete_vs_state()
- */
-static void
-cell_delete_vs_state(struct pipe_context *pipe, void *vs)
-{
- struct cell_context *cell = cell_context(pipe);
- struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs);
-
- draw_delete_vertex_shader(cell->draw, cvs->draw_data);
- FREE( (void *) cvs->shader.tokens );
- FREE( cvs );
-}
-
-
-/**
- * Called via pipe->set_constant_buffer()
- */
-static void
-cell_set_constant_buffer(struct pipe_context *pipe,
- uint shader, uint index,
- struct pipe_resource *constants)
-{
- struct cell_context *cell = cell_context(pipe);
- unsigned size = constants ? constants->width0 : 0;
- const void *data = constants ? cell_resource(constants)->data : NULL;
-
- assert(shader < PIPE_SHADER_TYPES);
- assert(index == 0);
-
- if (cell->constants[shader] == constants)
- return;
-
- draw_flush(cell->draw);
-
- /* note: reference counting */
- pipe_resource_reference(&cell->constants[shader], constants);
-
- if(shader == PIPE_SHADER_VERTEX) {
- draw_set_mapped_constant_buffer(cell->draw, PIPE_SHADER_VERTEX, 0,
- data, size);
- }
-
- cell->mapped_constants[shader] = data;
-
- if (shader == PIPE_SHADER_VERTEX)
- cell->dirty |= CELL_NEW_VS_CONSTANTS;
- else if (shader == PIPE_SHADER_FRAGMENT)
- cell->dirty |= CELL_NEW_FS_CONSTANTS;
-}
-
-
-void
-cell_init_shader_functions(struct cell_context *cell)
-{
- cell->pipe.create_fs_state = cell_create_fs_state;
- cell->pipe.bind_fs_state = cell_bind_fs_state;
- cell->pipe.delete_fs_state = cell_delete_fs_state;
-
- cell->pipe.create_vs_state = cell_create_vs_state;
- cell->pipe.bind_vs_state = cell_bind_vs_state;
- cell->pipe.delete_vs_state = cell_delete_vs_state;
-
- cell->pipe.set_constant_buffer = cell_set_constant_buffer;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "cell_context.h"
-#include "cell_state.h"
-
-#include "util/u_memory.h"
-#include "util/u_transfer.h"
-#include "draw/draw_context.h"
-
-
-static void *
-cell_create_vertex_elements_state(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_element *attribs)
-{
- struct cell_velems_state *velems;
- assert(count <= PIPE_MAX_ATTRIBS);
- velems = (struct cell_velems_state *) MALLOC(sizeof(struct cell_velems_state));
- if (velems) {
- velems->count = count;
- memcpy(velems->velem, attribs, sizeof(*attribs) * count);
- }
- return velems;
-}
-
-static void
-cell_bind_vertex_elements_state(struct pipe_context *pipe,
- void *velems)
-{
- struct cell_context *cell = cell_context(pipe);
- struct cell_velems_state *cell_velems = (struct cell_velems_state *) velems;
-
- cell->velems = cell_velems;
-
- cell->dirty |= CELL_NEW_VERTEX;
-
- if (cell_velems)
- draw_set_vertex_elements(cell->draw, cell_velems->count, cell_velems->velem);
-}
-
-static void
-cell_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
-{
- FREE( velems );
-}
-
-
-static void
-cell_set_vertex_buffers(struct pipe_context *pipe,
- unsigned count,
- const struct pipe_vertex_buffer *buffers)
-{
- struct cell_context *cell = cell_context(pipe);
-
- assert(count <= PIPE_MAX_ATTRIBS);
-
- util_copy_vertex_buffers(cell->vertex_buffer,
- &cell->num_vertex_buffers,
- buffers, count);
-
- cell->dirty |= CELL_NEW_VERTEX;
-
- draw_set_vertex_buffers(cell->draw, count, buffers);
-}
-
-
-static void
-cell_set_index_buffer(struct pipe_context *pipe,
- const struct pipe_index_buffer *ib)
-{
- struct cell_context *cell = cell_context(pipe);
-
- if (ib)
- memcpy(&cell->index_buffer, ib, sizeof(cell->index_buffer));
- else
- memset(&cell->index_buffer, 0, sizeof(cell->index_buffer));
-
- draw_set_index_buffer(cell->draw, ib);
-}
-
-
-void
-cell_init_vertex_functions(struct cell_context *cell)
-{
- cell->pipe.set_vertex_buffers = cell_set_vertex_buffers;
- cell->pipe.set_index_buffer = cell_set_index_buffer;
- cell->pipe.create_vertex_elements_state = cell_create_vertex_elements_state;
- cell->pipe.bind_vertex_elements_state = cell_bind_vertex_elements_state;
- cell->pipe.delete_vertex_elements_state = cell_delete_vertex_elements_state;
- cell->pipe.redefine_user_buffer = u_default_redefine_user_buffer;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_surface.h"
-#include "cell_context.h"
-#include "cell_surface.h"
-
-
-void
-cell_init_surface_functions(struct cell_context *cell)
-{
- cell->pipe.resource_copy_region = util_resource_copy_region;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#ifndef CELL_SURFACE_H
-#define CELL_SURFACE_H
-
-
-struct cell_context;
-
-
-extern void
-cell_init_surface_functions(struct cell_context *cell);
-
-
-#endif /* SP_SURFACE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Michel Dänzer <michel@tungstengraphics.com>
- * Brian Paul
- */
-
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
-#include "util/u_transfer.h"
-#include "util/u_format.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-#include "cell_context.h"
-#include "cell_screen.h"
-#include "cell_state.h"
-#include "cell_texture.h"
-
-#include "state_tracker/sw_winsys.h"
-
-
-
-static boolean
-cell_resource_layout(struct pipe_screen *screen,
- struct cell_resource *ct)
-{
- struct pipe_resource *pt = &ct->base;
- unsigned level;
- unsigned width = pt->width0;
- unsigned height = pt->height0;
- unsigned depth = pt->depth0;
-
- ct->buffer_size = 0;
-
- for (level = 0; level <= pt->last_level; level++) {
- unsigned size;
- unsigned w_tile, h_tile;
-
- assert(level < CELL_MAX_TEXTURE_LEVELS);
-
- /* width, height, rounded up to tile size */
- w_tile = align(width, TILE_SIZE);
- h_tile = align(height, TILE_SIZE);
-
- ct->stride[level] = util_format_get_stride(pt->format, w_tile);
-
- ct->level_offset[level] = ct->buffer_size;
-
- size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile);
- if (pt->target == PIPE_TEXTURE_CUBE)
- size *= 6;
- else
- size *= depth;
-
- ct->buffer_size += size;
-
- width = u_minify(width, 1);
- height = u_minify(height, 1);
- depth = u_minify(depth, 1);
- }
-
- ct->data = align_malloc(ct->buffer_size, 16);
-
- return ct->data != NULL;
-}
-
-
-/**
- * Texture layout for simple color buffers.
- */
-static boolean
-cell_displaytarget_layout(struct pipe_screen *screen,
- struct cell_resource * ct)
-{
- struct sw_winsys *winsys = cell_screen(screen)->winsys;
-
- /* Round up the surface size to a multiple of the tile size?
- */
- ct->dt = winsys->displaytarget_create(winsys,
- ct->base.bind,
- ct->base.format,
- ct->base.width0,
- ct->base.height0,
- 16,
- &ct->dt_stride );
-
- return ct->dt != NULL;
-}
-
-static struct pipe_resource *
-cell_resource_create(struct pipe_screen *screen,
- const struct pipe_resource *templat)
-{
- struct cell_resource *ct = CALLOC_STRUCT(cell_resource);
- if (!ct)
- return NULL;
-
- ct->base = *templat;
- pipe_reference_init(&ct->base.reference, 1);
- ct->base.screen = screen;
-
- /* Create both a displaytarget (linear) and regular texture
- * (twiddled). Convert twiddled->linear at flush_frontbuffer time.
- */
- if (ct->base.bind & (PIPE_BIND_DISPLAY_TARGET |
- PIPE_BIND_SCANOUT |
- PIPE_BIND_SHARED)) {
- if (!cell_displaytarget_layout(screen, ct))
- goto fail;
- }
-
- if (!cell_resource_layout(screen, ct))
- goto fail;
-
- return &ct->base;
-
-fail:
- if (ct->dt) {
- struct sw_winsys *winsys = cell_screen(screen)->winsys;
- winsys->displaytarget_destroy(winsys, ct->dt);
- }
-
- FREE(ct);
-
- return NULL;
-}
-
-
-static void
-cell_resource_destroy(struct pipe_screen *scrn, struct pipe_resource *pt)
-{
- struct cell_screen *screen = cell_screen(scrn);
- struct sw_winsys *winsys = screen->winsys;
- struct cell_resource *ct = cell_resource(pt);
-
- if (ct->dt) {
- /* display target */
- winsys->displaytarget_destroy(winsys, ct->dt);
- }
- else if (!ct->userBuffer) {
- align_free(ct->data);
- }
-
- FREE(ct);
-}
-
-
-
-/**
- * Convert image from linear layout to tiled layout. 4-byte pixels.
- */
-static void
-twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
- uint src_stride, const uint *src)
-{
- const uint tile_size2 = tile_size * tile_size;
- const uint h_t = (h + tile_size - 1) / tile_size;
- const uint w_t = (w + tile_size - 1) / tile_size;
-
- uint it, jt; /* tile counters */
- uint i, j; /* intra-tile counters */
-
- src_stride /= 4; /* convert from bytes to pixels */
-
- /* loop over dest tiles */
- for (it = 0; it < h_t; it++) {
- for (jt = 0; jt < w_t; jt++) {
- /* start of dest tile: */
- uint *tdst = dst + (it * w_t + jt) * tile_size2;
-
- /* compute size of this tile (may be smaller than tile_size) */
- /* XXX note: a compiler bug was found here. That's why the code
- * looks as it does.
- */
- uint tile_width = w - jt * tile_size;
- tile_width = MIN2(tile_width, tile_size);
- uint tile_height = h - it * tile_size;
- tile_height = MIN2(tile_height, tile_size);
-
- /* loop over texels in the tile */
- for (i = 0; i < tile_height; i++) {
- for (j = 0; j < tile_width; j++) {
- const uint srci = it * tile_size + i;
- const uint srcj = jt * tile_size + j;
- ASSERT(srci < h);
- ASSERT(srcj < w);
- tdst[i * tile_size + j] = src[srci * src_stride + srcj];
- }
- }
- }
- }
-}
-
-
-/**
- * For Cell. Basically, rearrange the pixels/quads from this layout:
- * +--+--+--+--+
- * |p0|p1|p2|p3|....
- * +--+--+--+--+
- *
- * to this layout:
- * +--+--+
- * |p0|p1|....
- * +--+--+
- * |p2|p3|
- * +--+--+
- */
-static void
-twiddle_tile(const uint *tileIn, uint *tileOut)
-{
- int y, x;
-
- for (y = 0; y < TILE_SIZE; y+=2) {
- for (x = 0; x < TILE_SIZE; x+=2) {
- int k = 4 * (y/2 * TILE_SIZE/2 + x/2);
- tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k];
- tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1];
- tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2];
- tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3];
- }
- }
-}
-
-
-/**
- * Convert image from tiled layout to linear layout. 4-byte pixels.
- */
-static void
-untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst,
- uint dst_stride, const uint *src)
-{
- const uint tile_size2 = tile_size * tile_size;
- const uint h_t = (h + tile_size - 1) / tile_size;
- const uint w_t = (w + tile_size - 1) / tile_size;
- uint *tile_buf;
- uint it, jt; /* tile counters */
- uint i, j; /* intra-tile counters */
-
- dst_stride /= 4; /* convert from bytes to pixels */
-
- tile_buf = align_malloc(tile_size * tile_size * 4, 16);
-
- /* loop over src tiles */
- for (it = 0; it < h_t; it++) {
- for (jt = 0; jt < w_t; jt++) {
- /* start of src tile: */
- const uint *tsrc = src + (it * w_t + jt) * tile_size2;
-
- twiddle_tile(tsrc, tile_buf);
- tsrc = tile_buf;
-
- /* compute size of this tile (may be smaller than tile_size) */
- /* XXX note: a compiler bug was found here. That's why the code
- * looks as it does.
- */
- uint tile_width = w - jt * tile_size;
- tile_width = MIN2(tile_width, tile_size);
- uint tile_height = h - it * tile_size;
- tile_height = MIN2(tile_height, tile_size);
-
- /* loop over texels in the tile */
- for (i = 0; i < tile_height; i++) {
- for (j = 0; j < tile_width; j++) {
- uint dsti = it * tile_size + i;
- uint dstj = jt * tile_size + j;
- ASSERT(dsti < h);
- ASSERT(dstj < w);
- dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j];
- }
- }
- }
- }
-
- align_free(tile_buf);
-}
-
-
-static struct pipe_surface *
-cell_create_surface(struct pipe_context *ctx,
- struct pipe_resource *pt,
- const struct pipe_surface *surf_tmpl)
-{
- struct cell_resource *ct = cell_resource(pt);
- struct pipe_surface *ps;
-
- assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
- ps = CALLOC_STRUCT(pipe_surface);
- if (ps) {
- pipe_reference_init(&ps->reference, 1);
- pipe_resource_reference(&ps->texture, pt);
- ps->format = surf_tmpl->format;
- ps->context = ctx;
- ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
- ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);
- /* XXX may need to override usage flags (see sp_texture.c) */
- ps->usage = surf_tmpl->usage;
- ps->u.tex.level = surf_tmpl->u.tex.level;
- ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
- ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
- }
- return ps;
-}
-
-
-static void
-cell_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surf)
-{
- pipe_resource_reference(&surf->texture, NULL);
- FREE(surf);
-}
-
-
-/**
- * Create new pipe_transfer object.
- * This is used by the user to put tex data into a texture (and get it
- * back out for glGetTexImage).
- */
-static struct pipe_transfer *
-cell_get_transfer(struct pipe_context *ctx,
- struct pipe_resource *resource,
- unsigned level,
- unsigned usage,
- const struct pipe_box *box)
-{
- struct cell_resource *ct = cell_resource(resource);
- struct cell_transfer *ctrans;
- enum pipe_format format = resource->format;
-
- assert(resource);
- assert(level <= resource->last_level);
-
- /* make sure the requested region is in the image bounds */
- assert(box->x + box->width <= u_minify(resource->width0, level));
- assert(box->y + box->height <= u_minify(resource->height0, level));
- assert(box->z + box->depth <= (u_minify(resource->depth0, level) + resource->array_size - 1));
-
- ctrans = CALLOC_STRUCT(cell_transfer);
- if (ctrans) {
- struct pipe_transfer *pt = &ctrans->base;
- pipe_resource_reference(&pt->resource, resource);
- pt->level = level;
- pt->usage = usage;
- pt->box = *box;
- pt->stride = ct->stride[level];
-
- ctrans->offset = ct->level_offset[level];
-
- if (resource->target == PIPE_TEXTURE_CUBE || resource->target == PIPE_TEXTURE_3D) {
- unsigned h_tile = align(u_minify(resource->height0, level), TILE_SIZE);
- ctrans->offset += box->z * util_format_get_nblocksy(format, h_tile) * pt->stride;
- }
- else {
- assert(box->z == 0);
- }
-
- return pt;
- }
- return NULL;
-}
-
-
-static void
-cell_transfer_destroy(struct pipe_context *ctx, struct pipe_transfer *t)
-{
- struct cell_transfer *transfer = cell_transfer(t);
- /* Effectively do the texture_update work here - if texture images
- * needed post-processing to put them into hardware layout, this is
- * where it would happen. For cell, nothing to do.
- */
- assert (transfer->base.resource);
- pipe_resource_reference(&transfer->base.resource, NULL);
- FREE(transfer);
-}
-
-
-/**
- * Return pointer to texture image data in linear layout.
- */
-static void *
-cell_transfer_map(struct pipe_context *ctx, struct pipe_transfer *transfer)
-{
- struct cell_transfer *ctrans = cell_transfer(transfer);
- struct pipe_resource *pt = transfer->resource;
- struct cell_resource *ct = cell_resource(pt);
-
- assert(transfer->resource);
-
- if (ct->mapped == NULL) {
- ct->mapped = ct->data;
- }
-
-
- /* Better test would be resource->is_linear
- */
- if (transfer->resource->target != PIPE_BUFFER) {
- const uint level = ctrans->base.level;
- const uint texWidth = u_minify(pt->width0, level);
- const uint texHeight = u_minify(pt->height0, level);
- unsigned size;
-
-
- /*
- * Create a buffer of ordinary memory for the linear texture.
- * This is the memory that the user will read/write.
- */
- size = (util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
- util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE)));
-
- ctrans->map = align_malloc(size, 16);
- if (!ctrans->map)
- return NULL; /* out of memory */
-
- if (transfer->usage & PIPE_TRANSFER_READ) {
- /* Textures always stored twiddled, need to untwiddle the
- * texture to make a linear version.
- */
- const uint bpp = util_format_get_blocksize(ct->base.format);
- if (bpp == 4) {
- const uint *src = (uint *) (ct->mapped + ctrans->offset);
- uint *dst = ctrans->map;
- untwiddle_image_uint(texWidth, texHeight, TILE_SIZE,
- dst, transfer->stride, src);
- }
- else {
- // xxx fix
- }
- }
- }
- else {
- unsigned stride = transfer->stride;
- enum pipe_format format = pt->format;
- unsigned blocksize = util_format_get_blocksize(format);
-
- ctrans->map = (ct->mapped +
- ctrans->offset +
- ctrans->base.box.y / util_format_get_blockheight(format) * stride +
- ctrans->base.box.x / util_format_get_blockwidth(format) * blocksize);
- }
-
-
- return ctrans->map;
-}
-
-
-/**
- * Called when user is done reading/writing texture data.
- * If new data was written, this is where we convert the linear data
- * to tiled data.
- */
-static void
-cell_transfer_unmap(struct pipe_context *ctx,
- struct pipe_transfer *transfer)
-{
- struct cell_transfer *ctrans = cell_transfer(transfer);
- struct pipe_resource *pt = transfer->resource;
- struct cell_resource *ct = cell_resource(pt);
- const uint level = ctrans->base.level;
- const uint texWidth = u_minify(pt->width0, level);
- const uint texHeight = u_minify(pt->height0, level);
- const uint stride = ct->stride[level];
-
- if (!ct->mapped) {
- assert(0);
- return;
- }
-
- if (pt->target != PIPE_BUFFER) {
- if (transfer->usage & PIPE_TRANSFER_WRITE) {
- /* The user wrote new texture data into the mapped buffer.
- * We need to convert the new linear data into the twiddled/tiled format.
- */
- const uint bpp = util_format_get_blocksize(ct->base.format);
- if (bpp == 4) {
- const uint *src = ctrans->map;
- uint *dst = (uint *) (ct->mapped + ctrans->offset);
- twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, stride, src);
- }
- else {
- // xxx fix
- }
- }
-
- align_free(ctrans->map);
- }
- else {
- /* nothing to do */
- }
-
- ctrans->map = NULL;
-}
-
-
-
-/* This used to be overriden by the co-state tracker, but really needs
- * to be active with sw_winsys.
- *
- * Contrasting with llvmpipe and softpipe, this is the only place
- * where we use the ct->dt display target in any real sense.
- *
- * Basically just untwiddle our local data into the linear
- * displaytarget.
- */
-static void
-cell_flush_frontbuffer(struct pipe_screen *_screen,
- struct pipe_resource *resource,
- unsigned level, unsigned layer,
- void *context_private)
-{
- struct cell_screen *screen = cell_screen(_screen);
- struct sw_winsys *winsys = screen->winsys;
- struct cell_resource *ct = cell_resource(resource);
-
- if (!ct->dt)
- return;
-
- /* Need to untwiddle from our internal representation here:
- */
- {
- unsigned *map = winsys->displaytarget_map(winsys, ct->dt,
- (PIPE_TRANSFER_READ |
- PIPE_TRANSFER_WRITE));
- unsigned *src = (unsigned *)(ct->data + ct->level_offset[level]);
-
- untwiddle_image_uint(u_minify(resource->width0, level),
- u_minify(resource->height0, level),
- TILE_SIZE,
- map,
- ct->dt_stride,
- src);
-
- winsys->displaytarget_unmap(winsys, ct->dt);
- }
-
- winsys->displaytarget_display(winsys, ct->dt, context_private);
-}
-
-
-
-/**
- * Create buffer which wraps user-space data.
- */
-static struct pipe_resource *
-cell_user_buffer_create(struct pipe_screen *screen,
- void *ptr,
- unsigned bytes,
- unsigned bind_flags)
-{
- struct cell_resource *buffer;
-
- buffer = CALLOC_STRUCT(cell_resource);
- if(!buffer)
- return NULL;
-
- pipe_reference_init(&buffer->base.reference, 1);
- buffer->base.screen = screen;
- buffer->base.format = PIPE_FORMAT_R8_UNORM; /* ?? */
- buffer->base.bind = PIPE_BIND_TRANSFER_READ | bind_flags;
- buffer->base.usage = PIPE_USAGE_IMMUTABLE;
- buffer->base.flags = 0;
- buffer->base.width0 = bytes;
- buffer->base.height0 = 1;
- buffer->base.depth0 = 1;
- buffer->base.array_size = 1;
- buffer->userBuffer = TRUE;
- buffer->data = ptr;
-
- return &buffer->base;
-}
-
-
-static struct pipe_resource *
-cell_resource_from_handle(struct pipe_screen *screen,
- const struct pipe_resource *templat,
- struct winsys_handle *handle)
-{
- /* XXX todo */
- return NULL;
-}
-
-
-static boolean
-cell_resource_get_handle(struct pipe_screen *scree,
- struct pipe_resource *tex,
- struct winsys_handle *handle)
-{
- /* XXX todo */
- return FALSE;
-}
-
-
-void
-cell_init_screen_texture_funcs(struct pipe_screen *screen)
-{
- screen->resource_create = cell_resource_create;
- screen->resource_destroy = cell_resource_destroy;
- screen->resource_from_handle = cell_resource_from_handle;
- screen->resource_get_handle = cell_resource_get_handle;
- screen->user_buffer_create = cell_user_buffer_create;
-
- screen->flush_frontbuffer = cell_flush_frontbuffer;
-}
-
-void
-cell_init_texture_transfer_funcs(struct cell_context *cell)
-{
- cell->pipe.get_transfer = cell_get_transfer;
- cell->pipe.transfer_destroy = cell_transfer_destroy;
- cell->pipe.transfer_map = cell_transfer_map;
- cell->pipe.transfer_unmap = cell_transfer_unmap;
-
- cell->pipe.transfer_flush_region = u_default_transfer_flush_region;
- cell->pipe.transfer_inline_write = u_default_transfer_inline_write;
-
- cell->pipe.create_surface = cell_create_surface;
- cell->pipe.surface_destroy = cell_surface_destroy;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef CELL_TEXTURE_H
-#define CELL_TEXTURE_H
-
-#include "cell/common.h"
-
-struct cell_context;
-struct pipe_resource;
-
-
-/**
- * Subclass of pipe_resource
- */
-struct cell_resource
-{
- struct pipe_resource base;
-
- unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS];
- unsigned long stride[CELL_MAX_TEXTURE_LEVELS];
-
- /**
- * Display target, for textures with the PIPE_BIND_DISPLAY_TARGET
- * usage.
- */
- struct sw_displaytarget *dt;
- unsigned dt_stride;
-
- /**
- * Malloc'ed data for regular textures, or a mapping to dt above.
- */
- void *data;
- boolean userBuffer;
-
- /* Size of the linear buffer??
- */
- unsigned long buffer_size;
-
- /** The buffer above, mapped. This is the memory from which the
- * SPUs will fetch texels. This texture data is in the tiled layout.
- */
- ubyte *mapped;
-};
-
-
-struct cell_transfer
-{
- struct pipe_transfer base;
-
- unsigned long offset;
- void *map;
-};
-
-
-/** cast wrapper */
-static INLINE struct cell_resource *
-cell_resource(struct pipe_resource *pt)
-{
- return (struct cell_resource *) pt;
-}
-
-
-/** cast wrapper */
-static INLINE struct cell_transfer *
-cell_transfer(struct pipe_transfer *pt)
-{
- return (struct cell_transfer *) pt;
-}
-
-
-extern void
-cell_init_screen_texture_funcs(struct pipe_screen *screen);
-
-extern void
-cell_init_texture_transfer_funcs(struct cell_context *cell);
-
-#endif /* CELL_TEXTURE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Vertex buffer code. The draw module transforms vertices to window
- * coords, etc. and emits the vertices into buffer supplied by this module.
- * When a vertex buffer is full, or we flush, we'll send the vertex data
- * to the SPUs.
- *
- * Authors
- * Brian Paul
- */
-
-
-#include "cell_batch.h"
-#include "cell_context.h"
-#include "cell_fence.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_vbuf.h"
-#include "draw/draw_vbuf.h"
-#include "util/u_memory.h"
-
-
-/** Allow vertex data to be inlined after RENDER command */
-#define ALLOW_INLINE_VERTS 1
-
-
-/**
- * Subclass of vbuf_render because we need a cell_context pointer in
- * a few places.
- */
-struct cell_vbuf_render
-{
- struct vbuf_render base;
- struct cell_context *cell;
- uint prim; /**< PIPE_PRIM_x */
- uint vertex_size; /**< in bytes */
- void *vertex_buffer; /**< just for debug, really */
- uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
- uint vertex_buffer_size; /**< size in bytes */
-};
-
-
-/** cast wrapper */
-static struct cell_vbuf_render *
-cell_vbuf_render(struct vbuf_render *vbr)
-{
- return (struct cell_vbuf_render *) vbr;
-}
-
-
-
-static const struct vertex_info *
-cell_vbuf_get_vertex_info(struct vbuf_render *vbr)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- return &cvbr->cell->vertex_info;
-}
-
-
-static boolean
-cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
- ushort vertex_size, ushort nr_vertices)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- unsigned size = vertex_size * nr_vertices;
- /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
-
- assert(cvbr->vertex_buf == ~0);
- cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
- cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
- cvbr->vertex_buffer_size = size;
- cvbr->vertex_size = vertex_size;
-
- return cvbr->vertex_buffer != NULL;
-}
-
-
-static void
-cell_vbuf_release_vertices(struct vbuf_render *vbr)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- struct cell_context *cell = cvbr->cell;
-
- /*
- printf("%s vertex_buf = %u count = %u\n",
- __FUNCTION__, cvbr->vertex_buf, vertices_used);
- */
-
- /* Make sure texture buffers aren't released until we're done rendering
- * with them.
- */
- cell_add_fenced_textures(cell);
-
- /* Tell SPUs they can release the vert buf */
- if (cvbr->vertex_buf != ~0U) {
- STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0);
- struct cell_command_release_verts *release
- = (struct cell_command_release_verts *)
- cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts));
- release->opcode[0] = CELL_CMD_RELEASE_VERTS;
- release->vertex_buf = cvbr->vertex_buf;
- }
-
- cvbr->vertex_buf = ~0;
- cell_flush_int(cell, 0x0);
-
- cvbr->vertex_buffer = NULL;
-}
-
-
-static void *
-cell_vbuf_map_vertices(struct vbuf_render *vbr)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- return cvbr->vertex_buffer;
-}
-
-
-static void
-cell_vbuf_unmap_vertices(struct vbuf_render *vbr,
- ushort min_index,
- ushort max_index )
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
- /* do nothing */
-}
-
-
-
-static boolean
-cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- cvbr->prim = prim;
- /*printf("cell_set_prim %u\n", prim);*/
- return TRUE;
-}
-
-
-static void
-cell_vbuf_draw_elements(struct vbuf_render *vbr,
- const ushort *indices,
- uint nr_indices)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- struct cell_context *cell = cvbr->cell;
- float xmin, ymin, xmax, ymax;
- uint i;
- uint nr_vertices = 0, min_index = ~0;
- const void *vertices = cvbr->vertex_buffer;
- const uint vertex_size = cvbr->vertex_size;
-
- for (i = 0; i < nr_indices; i++) {
- if (indices[i] > nr_vertices)
- nr_vertices = indices[i];
- if (indices[i] < min_index)
- min_index = indices[i];
- }
- nr_vertices++;
-
-#if 0
- /*if (min_index > 0)*/
- printf("%s min_index = %u\n", __FUNCTION__, min_index);
-#endif
-
-#if 0
- printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n",
- nr_indices, nr_vertices);
- printf(" ");
- for (i = 0; i < nr_indices; i += 3) {
- printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]);
- }
- printf("\n");
-#elif 0
- printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n",
- nr_indices, nr_vertices,
- indices[0], indices[1], indices[2]);
- printf("ind space = %u, vert space = %u, space = %u\n",
- nr_indices * 2,
- nr_vertices * 4 * cell->vertex_info.size,
- cell_batch_free_space(cell));
-#endif
-
- /* compute x/y bounding box */
- xmin = ymin = 1e50;
- xmax = ymax = -1e50;
- for (i = min_index; i < nr_vertices; i++) {
- const float *v = (float *) ((ubyte *) vertices + i * vertex_size);
- if (v[0] < xmin)
- xmin = v[0];
- if (v[0] > xmax)
- xmax = v[0];
- if (v[1] < ymin)
- ymin = v[1];
- if (v[1] > ymax)
- ymax = v[1];
- }
-#if 0
- printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax);
- fflush(stdout);
-#endif
-
- if (cvbr->prim != PIPE_PRIM_TRIANGLES)
- return; /* only render tris for now */
-
- /* build/insert batch RENDER command */
- {
- const uint index_bytes = ROUNDUP16(nr_indices * 2);
- const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size);
- STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0);
- const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
-
- struct cell_command_render *render
- = (struct cell_command_render *)
- cell_batch_alloc16(cell, batch_size);
-
- render->opcode[0] = CELL_CMD_RENDER;
- render->prim_type = cvbr->prim;
-
- render->num_indexes = nr_indices;
- render->min_index = min_index;
-
- /* append indices after render command */
- memcpy(render + 1, indices, nr_indices * 2);
-
- /* if there's room, append vertices after the indices, else leave
- * vertices in the original/separate buffer.
- */
- render->vertex_size = 4 * cell->vertex_info.size;
- render->num_verts = nr_vertices;
- if (ALLOW_INLINE_VERTS &&
- min_index == 0 &&
- vertex_bytes + 16 <= cell_batch_free_space(cell)) {
- /* vertex data inlined, after indices, at 16-byte boundary */
- void *dst = cell_batch_alloc16(cell, vertex_bytes);
- memcpy(dst, vertices, vertex_bytes);
- render->inline_verts = TRUE;
- render->vertex_buf = ~0;
- }
- else {
- /* vertex data in separate buffer */
- render->inline_verts = FALSE;
- ASSERT(cvbr->vertex_buf >= 0);
- render->vertex_buf = cvbr->vertex_buf;
- }
-
- render->xmin = xmin;
- render->ymin = ymin;
- render->xmax = xmax;
- render->ymax = ymax;
- }
-
-#if 0
- /* helpful for debug */
- cell_flush_int(cell, CELL_FLUSH_WAIT);
-#endif
-}
-
-
-static void
-cell_vbuf_destroy(struct vbuf_render *vbr)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
- cvbr->cell->vbuf_render = NULL;
- FREE(cvbr);
-}
-
-
-/**
- * Initialize the post-transform vertex buffer information for the given
- * context.
- */
-void
-cell_init_vbuf(struct cell_context *cell)
-{
- assert(cell->draw);
-
- cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render);
-
- /* The max number of indexes is what can fix into a batch buffer,
- * minus the render and release-verts commands.
- */
- cell->vbuf_render->base.max_indices
- = (CELL_BUFFER_SIZE
- - sizeof(struct cell_command_render)
- - sizeof(struct cell_command_release_verts))
- / sizeof(ushort);
- cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE;
-
- cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
- cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
- cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices;
- cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices;
- cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive;
- cell->vbuf_render->base.draw_elements = cell_vbuf_draw_elements;
- cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices;
- cell->vbuf_render->base.destroy = cell_vbuf_destroy;
-
- cell->vbuf_render->cell = cell;
-#if 1
- cell->vbuf_render->vertex_buf = ~0;
-#endif
-
- cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base);
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef CELL_VBUF_H
-#define CELL_VBUF_H
-
-
-struct cell_context;
-
-extern void
-cell_init_vbuf(struct cell_context *cell);
-
-
-#endif /* CELL_VBUF_H */
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <inttypes.h>
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "pipe/p_format.h"
-
-#include "../auxiliary/draw/draw_context.h"
-#include "../auxiliary/draw/draw_private.h"
-
-#include "cell_context.h"
-#include "rtasm/rtasm_ppc_spe.h"
-
-
-/**
- * Emit a 4x4 matrix transpose operation
- *
- * \param p Function that the transpose operation is to be appended to
- * \param row0 Register containing row 0 of the source matrix
- * \param row1 Register containing row 1 of the source matrix
- * \param row2 Register containing row 2 of the source matrix
- * \param row3 Register containing row 3 of the source matrix
- * \param dest_ptr Register containing the address of the destination matrix
- * \param shuf_ptr Register containing the address of the shuffled data
- * \param count Number of colums to actually be written to the destination
- *
- * \note
- * This function assumes that the registers named by \c row0, \c row1,
- * \c row2, and \c row3 are scratch and can be modified by the generated code.
- * Furthermore, these registers will be released, via calls to
- * \c release_register, by this function.
- *
- * \note
- * This function requires that four temporary are available on entry.
- */
-static void
-emit_matrix_transpose(struct spe_function *p,
- unsigned row0, unsigned row1, unsigned row2,
- unsigned row3, unsigned dest_ptr,
- unsigned shuf_ptr, unsigned count)
-{
- int shuf_hi = spe_allocate_available_register(p);
- int shuf_lo = spe_allocate_available_register(p);
- int t1 = spe_allocate_available_register(p);
- int t2 = spe_allocate_available_register(p);
- int t3;
- int t4;
- int col0;
- int col1;
- int col2;
- int col3;
-
-
- spe_lqd(p, shuf_hi, shuf_ptr, 3*16);
- spe_lqd(p, shuf_lo, shuf_ptr, 4*16);
- spe_shufb(p, t1, row0, row2, shuf_hi);
- spe_shufb(p, t2, row0, row2, shuf_lo);
-
-
- /* row0 and row2 are now no longer needed. Re-use those registers as
- * temporaries.
- */
- t3 = row0;
- t4 = row2;
-
- spe_shufb(p, t3, row1, row3, shuf_hi);
- spe_shufb(p, t4, row1, row3, shuf_lo);
-
-
- /* row1 and row3 are now no longer needed. Re-use those registers as
- * temporaries.
- */
- col0 = row1;
- col1 = row3;
-
- spe_shufb(p, col0, t1, t3, shuf_hi);
- if (count > 1) {
- spe_shufb(p, col1, t1, t3, shuf_lo);
- }
-
- /* t1 and t3 are now no longer needed. Re-use those registers as
- * temporaries.
- */
- col2 = t1;
- col3 = t3;
-
- if (count > 2) {
- spe_shufb(p, col2, t2, t4, shuf_hi);
- }
-
- if (count > 3) {
- spe_shufb(p, col3, t2, t4, shuf_lo);
- }
-
-
- /* Store the results. Remember that the stqd instruction is encoded using
- * the qword offset (stand-alone assemblers to the byte-offset to
- * qword-offset conversion for you), so the byte-offset needs be divided by
- * 16.
- */
- switch (count) {
- case 4:
- spe_stqd(p, col3, dest_ptr, 3 * 16);
- case 3:
- spe_stqd(p, col2, dest_ptr, 2 * 16);
- case 2:
- spe_stqd(p, col1, dest_ptr, 1 * 16);
- case 1:
- spe_stqd(p, col0, dest_ptr, 0 * 16);
- }
-
-
- /* Release all of the temporary registers used.
- */
- spe_release_register(p, col0);
- spe_release_register(p, col1);
- spe_release_register(p, col2);
- spe_release_register(p, col3);
- spe_release_register(p, shuf_hi);
- spe_release_register(p, shuf_lo);
- spe_release_register(p, t2);
- spe_release_register(p, t4);
-}
-
-
-#if 0
-/* This appears to not be used currently */
-static void
-emit_fetch(struct spe_function *p,
- unsigned in_ptr, unsigned *offset,
- unsigned out_ptr, unsigned shuf_ptr,
- enum pipe_format format)
-{
- const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0)
- + (pf_size_z(format) != 0) + (pf_size_w(format) != 0);
- const unsigned type = pf_type(format);
- const unsigned bytes = pf_size_x(format);
-
- int v0 = spe_allocate_available_register(p);
- int v1 = spe_allocate_available_register(p);
- int v2 = spe_allocate_available_register(p);
- int v3 = spe_allocate_available_register(p);
- int tmp = spe_allocate_available_register(p);
- int float_zero = -1;
- int float_one = -1;
- float scale_signed = 0.0;
- float scale_unsigned = 0.0;
-
- spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16);
- spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16);
- spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16);
- spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16);
- offset[0] += 4;
-
- switch (bytes) {
- case 1:
- scale_signed = 1.0f / 127.0f;
- scale_unsigned = 1.0f / 255.0f;
- spe_lqd(p, tmp, shuf_ptr, 1 * 16);
- spe_shufb(p, v0, v0, v0, tmp);
- spe_shufb(p, v1, v1, v1, tmp);
- spe_shufb(p, v2, v2, v2, tmp);
- spe_shufb(p, v3, v3, v3, tmp);
- break;
- case 2:
- scale_signed = 1.0f / 32767.0f;
- scale_unsigned = 1.0f / 65535.0f;
- spe_lqd(p, tmp, shuf_ptr, 2 * 16);
- spe_shufb(p, v0, v0, v0, tmp);
- spe_shufb(p, v1, v1, v1, tmp);
- spe_shufb(p, v2, v2, v2, tmp);
- spe_shufb(p, v3, v3, v3, tmp);
- break;
- case 4:
- scale_signed = 1.0f / 2147483647.0f;
- scale_unsigned = 1.0f / 4294967295.0f;
- break;
- default:
- assert(0);
- break;
- }
-
- switch (type) {
- case PIPE_FORMAT_TYPE_FLOAT:
- break;
- case PIPE_FORMAT_TYPE_UNORM:
- spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16);
- spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff);
- spe_cuflt(p, v0, v0, 0);
- spe_fm(p, v0, v0, tmp);
- break;
- case PIPE_FORMAT_TYPE_SNORM:
- spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16);
- spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff);
- spe_csflt(p, v0, v0, 0);
- spe_fm(p, v0, v0, tmp);
- break;
- case PIPE_FORMAT_TYPE_USCALED:
- spe_cuflt(p, v0, v0, 0);
- break;
- case PIPE_FORMAT_TYPE_SSCALED:
- spe_csflt(p, v0, v0, 0);
- break;
- }
-
-
- if (count < 4) {
- float_one = spe_allocate_available_register(p);
- spe_il(p, float_one, 1);
- spe_cuflt(p, float_one, float_one, 0);
-
- if (count < 3) {
- float_zero = spe_allocate_available_register(p);
- spe_il(p, float_zero, 0);
- }
- }
-
- spe_release_register(p, tmp);
-
- emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count);
-
- switch (count) {
- case 1:
- spe_stqd(p, float_zero, out_ptr, 1 * 16);
- case 2:
- spe_stqd(p, float_zero, out_ptr, 2 * 16);
- case 3:
- spe_stqd(p, float_one, out_ptr, 3 * 16);
- }
-
- if (float_zero != -1) {
- spe_release_register(p, float_zero);
- }
-
- if (float_one != -1) {
- spe_release_register(p, float_one);
- }
-}
-#endif
-
-
-void cell_update_vertex_fetch(struct draw_context *draw)
-{
-#if 0
- struct cell_context *const cell =
- (struct cell_context *) draw->driver_private;
- struct spe_function *p = &cell->attrib_fetch;
- unsigned function_index[PIPE_MAX_ATTRIBS];
- unsigned unique_attr_formats;
- int out_ptr;
- int in_ptr;
- int shuf_ptr;
- unsigned i;
- unsigned j;
-
-
- /* Determine how many unique input attribute formats there are. At the
- * same time, store the index of the lowest numbered attribute that has
- * the same format as any non-unique format.
- */
- unique_attr_formats = 1;
- function_index[0] = 0;
- for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) {
- const enum pipe_format curr_fmt = draw->vertex_element[i].src_format;
-
- for (j = 0; j < i; j++) {
- if (curr_fmt == draw->vertex_element[j].src_format) {
- break;
- }
- }
-
- if (j == i) {
- unique_attr_formats++;
- }
-
- function_index[i] = j;
- }
-
-
- /* Each fetch function can be a maximum of 34 instructions (note: this is
- * actually a slight over-estimate).
- */
- spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats);
-
-
- /* Allocate registers for the function's input parameters.
- */
- out_ptr = spe_allocate_register(p, 3);
- in_ptr = spe_allocate_register(p, 4);
- shuf_ptr = spe_allocate_register(p, 5);
-
-
- /* Generate code for the individual attribute fetch functions.
- */
- for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
- unsigned offset;
-
- if (function_index[i] == i) {
- cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr
- - (void *) p->store);
-
- offset = 0;
- emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr,
- draw->vertex_element[i].src_format);
- spe_bi(p, 0, 0, 0);
-
- /* Round up to the next 16-byte boundary.
- */
- if ((((unsigned) p->store) & 0x0f) != 0) {
- const unsigned align = ((unsigned) p->store) & 0x0f;
- p->store = (uint32_t *) (((void *) p->store) + align);
- }
- } else {
- /* Use the same function entry-point as a previously seen attribute
- * with the same format.
- */
- cell->attrib_fetch_offsets[i] =
- cell->attrib_fetch_offsets[function_index[i]];
- }
- }
-#else
- assert(0);
-#endif
-}
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \file cell_vertex_shader.c
- * Vertex shader interface routines for Cell.
- *
- * \author Ian Romanick <idr@us.ibm.com>
- */
-
-#include "pipe/p_defines.h"
-#include "pipe/p_context.h"
-#include "util/u_math.h"
-
-#include "cell_context.h"
-#include "cell_draw_arrays.h"
-#include "cell_flush.h"
-#include "cell_spu.h"
-#include "cell_batch.h"
-
-#include "cell/common.h"
-#include "draw/draw_context.h"
-#include "draw/draw_private.h"
-
-/**
- * Run the vertex shader on all vertices in the vertex queue.
- * Called by the draw module when the vertx cache needs to be flushed.
- */
-void
-cell_vertex_shader_queue_flush(struct draw_context *draw)
-{
-#if 0
- struct cell_context *const cell =
- (struct cell_context *) draw->driver_private;
- struct cell_command_vs *const vs = &cell_global.command[0].vs;
- uint64_t *batch;
- struct cell_array_info *array_info;
- unsigned i, j;
- struct cell_attribute_fetch_code *cf;
-
- assert(draw->vs.queue_nr != 0);
-
- /* XXX: do this on statechange:
- */
- draw_update_vertex_fetch(draw);
- cell_update_vertex_fetch(draw);
-
-
- batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf));
- batch[0] = CELL_CMD_STATE_ATTRIB_FETCH;
- cf = (struct cell_attribute_fetch_code *) (&batch[1]);
- cf->base = (uint64_t) cell->attrib_fetch.store;
- cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr
- - (void *) cell->attrib_fetch.store));
-
-
- for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
- const enum pipe_format format = draw->vertex_element[i].src_format;
- const unsigned count = ((pf_size_x(format) != 0)
- + (pf_size_y(format) != 0)
- + (pf_size_z(format) != 0)
- + (pf_size_w(format) != 0));
- const unsigned size = pf_size_x(format) * count;
-
- batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info));
-
- batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO;
-
- array_info = (struct cell_array_info *) &batch[1];
- assert(draw->vertex_fetch.src_ptr[i] != NULL);
- array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
- array_info->attr = i;
- array_info->pitch = draw->vertex_fetch.pitch[i];
- array_info->size = size;
- array_info->function_offset = cell->attrib_fetch_offsets[i];
- }
-
- batch = cell_batch_alloc(cell, sizeof(batch[0])
- + sizeof(struct pipe_viewport_state));
- batch[0] = CELL_CMD_STATE_VIEWPORT;
- (void) memcpy(&batch[1], &draw->viewport,
- sizeof(struct pipe_viewport_state));
-
- {
- uint64_t uniforms = (uintptr_t) draw->user.constants;
-
- batch = cell_batch_alloc(cell, 2 *sizeof(batch[0]));
- batch[0] = CELL_CMD_STATE_UNIFORMS;
- batch[1] = uniforms;
- }
-
- cell_batch_flush(cell);
-
- vs->opcode = CELL_CMD_VS_EXECUTE;
- vs->nr_attrs = draw->vertex_fetch.nr_attrs;
-
- (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane));
- vs->nr_planes = draw->nr_planes;
-
- for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) {
- const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i);
-
- for (j = 0; j < n; j++) {
- vs->elts[j] = draw->vs.queue[i + j].elt;
- vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
- }
-
- for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
- vs->elts[j] = vs->elts[0];
- vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex;
- }
-
- vs->num_elts = n;
- send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
-
- cell_flush_int(cell, CELL_FLUSH_WAIT);
- }
-
- draw->vs.post_nr = draw->vs.queue_nr;
- draw->vs.queue_nr = 0;
-#else
- assert(0);
-#endif
-}
+++ /dev/null
-# Gallium3D Cell driver: SPU code
-
-# This makefile builds the g3d_spu.a file that's linked into the
-# PPU code/library.
-
-
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-
-PROG = g3d
-
-PROG_SPU = $(PROG)_spu
-PROG_SPU_A = $(PROG)_spu.a
-PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
-
-
-SOURCES = \
- spu_command.c \
- spu_dcache.c \
- spu_funcs.c \
- spu_main.c \
- spu_per_fragment_op.c \
- spu_render.c \
- spu_texture.c \
- spu_tile.c \
- spu_tri.c
-
-OLD_SOURCES = \
- spu_exec.c \
- spu_util.c \
- spu_vertex_fetch.c \
- spu_vertex_shader.c
-
-
-SPU_OBJECTS = $(SOURCES:.c=.o)
-
-SPU_ASM_OUT = $(SOURCES:.c=.s)
-
-
-INCLUDE_DIRS = \
- -I$(TOP)/src/mesa \
- -I$(TOP)/src/gallium/include \
- -I$(TOP)/src/gallium/auxiliary \
- -I$(TOP)/src/gallium/drivers
-
-
-.c.o:
- $(SPU_CC) $(SPU_CFLAGS) -c $<
-
-.c.s:
- $(SPU_CC) $(SPU_CFLAGS) -O3 -S $<
-
-
-# The .a file will be linked into the main/PPU executable
-default: $(PROG_SPU_A)
-
-$(PROG_SPU_A): $(PROG_SPU_EMBED_O)
- $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O)
-
-$(PROG_SPU_EMBED_O): $(PROG_SPU)
- $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O)
-
-$(PROG_SPU): $(SPU_OBJECTS)
- $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS)
-
-
-
-asmfiles: $(SPU_ASM_OUT)
-
-
-clean:
- rm -f *~ *.o *.a *.d *.s $(PROG_SPU)
-
-
-
-depend: $(SOURCES)
- rm -f depend
- touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null
-
-include depend
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-
-#ifndef SPU_COLORPACK_H
-#define SPU_COLORPACK_H
-
-
-#include <transpose_matrix4x4.h>
-#include <spu_intrinsics.h>
-
-
-static INLINE unsigned int
-spu_pack_R8G8B8A8(vector float rgba)
-{
- vector unsigned int out = spu_convtu(rgba, 32);
-
- out = spu_shuffle(out, out, ((vector unsigned char) {
- 0, 4, 8, 12, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0 }) );
-
- return spu_extract(out, 0);
-}
-
-
-static INLINE unsigned int
-spu_pack_A8R8G8B8(vector float rgba)
-{
- vector unsigned int out = spu_convtu(rgba, 32);
- out = spu_shuffle(out, out, ((vector unsigned char) {
- 12, 0, 4, 8, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0}) );
- return spu_extract(out, 0);
-}
-
-
-static INLINE unsigned int
-spu_pack_B8G8R8A8(vector float rgba)
-{
- vector unsigned int out = spu_convtu(rgba, 32);
- out = spu_shuffle(out, out, ((vector unsigned char) {
- 8, 4, 0, 12, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0}) );
- return spu_extract(out, 0);
-}
-
-
-static INLINE unsigned int
-spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
-{
- vector unsigned int out = spu_convtu(rgba, 32);
- out = spu_shuffle(out, out, shuffle);
- return spu_extract(out, 0);
-}
-
-
-static INLINE vector float
-spu_unpack_B8G8R8A8(uint color)
-{
- vector unsigned int color_u4 = spu_splats(color);
- color_u4 = spu_shuffle(color_u4, color_u4,
- ((vector unsigned char) {
- 2, 2, 2, 2,
- 1, 1, 1, 1,
- 0, 0, 0, 0,
- 3, 3, 3, 3}) );
- return spu_convtf(color_u4, 32);
-}
-
-
-static INLINE vector float
-spu_unpack_A8R8G8B8(uint color)
-{
- vector unsigned int color_u4 = spu_splats(color);
- color_u4 = spu_shuffle(color_u4, color_u4,
- ((vector unsigned char) {
- 1, 1, 1, 1,
- 2, 2, 2, 2,
- 3, 3, 3, 3,
- 0, 0, 0, 0}) );
- return spu_convtf(color_u4, 32);
-}
-
-
-/**
- * \param color_in - array of 32-bit packed ARGB colors
- * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order
- */
-static INLINE void
-spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4],
- vector float color_out[4])
-{
- vector unsigned int c0;
-
- c0 = spu_shuffle(color_in[0], color_in[0],
- ((vector unsigned char) {
- 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
- color_out[0] = spu_convtf(c0, 32);
-
- c0 = spu_shuffle(color_in[1], color_in[1],
- ((vector unsigned char) {
- 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
- color_out[1] = spu_convtf(c0, 32);
-
- c0 = spu_shuffle(color_in[2], color_in[2],
- ((vector unsigned char) {
- 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
- color_out[2] = spu_convtf(c0, 32);
-
- c0 = spu_shuffle(color_in[3], color_in[3],
- ((vector unsigned char) {
- 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) );
- color_out[3] = spu_convtf(c0, 32);
-
- _transpose_matrix4x4(color_out, color_out);
-}
-
-
-
-#endif /* SPU_COLORPACK_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * SPU command processing code
- */
-
-
-#include <stdio.h>
-#include <libmisc.h>
-
-#include "pipe/p_defines.h"
-
-#include "spu_command.h"
-#include "spu_main.h"
-#include "spu_render.h"
-#include "spu_per_fragment_op.h"
-#include "spu_texture.h"
-#include "spu_tile.h"
-#include "spu_vertex_shader.h"
-#include "spu_dcache.h"
-#include "cell/common.h"
-
-
-struct spu_vs_context draw;
-
-
-/**
- * Buffers containing dynamically generated SPU code:
- */
-PIPE_ALIGN_VAR(16) static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS];
-
-
-
-static INLINE int
-align(int value, int alignment)
-{
- return (value + alignment - 1) & ~(alignment - 1);
-}
-
-
-
-/**
- * Tell the PPU that this SPU has finished copying a buffer to
- * local store and that it may be reused by the PPU.
- * This is done by writting a 16-byte batch-buffer-status block back into
- * main memory (in cell_context->buffer_status[]).
- */
-static void
-release_buffer(uint buffer)
-{
- /* Evidently, using less than a 16-byte status doesn't work reliably */
- static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
- CELL_BUFFER_STATUS_FREE,
- CELL_BUFFER_STATUS_FREE,
- CELL_BUFFER_STATUS_FREE};
- const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
- uint *dst = spu.init.buffer_status + index;
-
- ASSERT(buffer < CELL_NUM_BUFFERS);
-
- mfc_put((void *) &status, /* src in local memory */
- (unsigned int) dst, /* dst in main memory */
- sizeof(status), /* size */
- TAG_MISC, /* tag is unimportant */
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-/**
- * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
- * There's a qword of status per SPU.
- */
-static void
-cmd_fence(struct cell_command_fence *fence_cmd)
-{
- static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
- CELL_FENCE_SIGNALLED,
- CELL_FENCE_SIGNALLED,
- CELL_FENCE_SIGNALLED};
- uint *dst = (uint *) fence_cmd->fence;
- dst += 4 * spu.init.id; /* main store/memory address, not local store */
- ASSERT_ALIGN16(dst);
- mfc_put((void *) &status, /* src in local memory */
- (unsigned int) dst, /* dst in main memory */
- sizeof(status), /* size */
- TAG_FENCE, /* tag */
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-static void
-cmd_clear_surface(const struct cell_command_clear_surface *clear)
-{
- D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
-
- if (clear->surface == 0) {
- spu.fb.color_clear_value = clear->value;
- if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
- uint x = (spu.init.id << 4) | (spu.init.id << 12) |
- (spu.init.id << 20) | (spu.init.id << 28);
- spu.fb.color_clear_value ^= x;
- }
- }
- else {
- spu.fb.depth_clear_value = clear->value;
- }
-
-#define CLEAR_OPT 1
-#if CLEAR_OPT
-
- /* Simply set all tiles' status to CLEAR.
- * When we actually begin rendering into a tile, we'll initialize it to
- * the clear value. If any tiles go untouched during the frame,
- * really_clear_tiles() will set them to the clear value.
- */
- if (clear->surface == 0) {
- memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
- }
- else {
- memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
- }
-
-#else
-
- /*
- * This path clears the whole framebuffer to the clear color right now.
- */
-
- /*
- printf("SPU: %s num=%d w=%d h=%d\n",
- __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
- */
-
- /* init a single tile to the clear value */
- if (clear->surface == 0) {
- clear_c_tile(&spu.ctile);
- }
- else {
- clear_z_tile(&spu.ztile);
- }
-
- /* walk over my tiles, writing the 'clear' tile's data */
- {
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (clear->surface == 0)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- else
- put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
- }
- }
-
- if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
- }
-
-#endif /* CLEAR_OPT */
-
- D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
-}
-
-
-static void
-cmd_release_verts(const struct cell_command_release_verts *release)
-{
- D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
- ASSERT(release->vertex_buf != ~0U);
- release_buffer(release->vertex_buf);
-}
-
-
-/**
- * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
- * This involves installing new fragment ops SPU code.
- * If this function is never called, we'll use a regular C fallback function
- * for fragment processing.
- */
-static void
-cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
-{
- D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
-
- /* Copy state info (for fallback case only - this will eventually
- * go away when the fallback case goes away)
- */
- memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
- memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
- memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
-
- /* Make sure the SPU knows which buffers it's expected to read when
- * it's told to pull tiles.
- */
- spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
-
- /* If we're forcing the fallback code to be used (for debug purposes),
- * install that. Otherwise install the incoming SPU code.
- */
- if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
- static unsigned int warned = 0;
- if (!warned) {
- fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
- warned = 1;
- }
- /* The following two lines aren't really necessary if you
- * know the debug flags won't change during a run, and if you
- * know that the function pointers are initialized correctly.
- * We set them here to allow a person to change the debug
- * flags during a run (from inside a debugger).
- */
- spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
- spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
- return;
- }
-
- /* Make sure the SPU code buffer is large enough to hold the incoming code.
- * Note that we *don't* use align_malloc() and align_free(), because
- * those utility functions are *not* available in SPU code.
- * */
- if (spu.fragment_ops_code_size < fops->total_code_size) {
- if (spu.fragment_ops_code != NULL) {
- free(spu.fragment_ops_code);
- }
- spu.fragment_ops_code_size = fops->total_code_size;
- spu.fragment_ops_code = malloc(fops->total_code_size);
- if (spu.fragment_ops_code == NULL) {
- /* Whoops. */
- fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
- spu.fragment_ops_code = NULL;
- spu.fragment_ops_code_size = 0;
- spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
- spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
- return;
- }
- }
-
- /* Copy the SPU code from the command buffer to the spu buffer */
- memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
-
- /* Set the pointers for the front-facing and back-facing fragments
- * to the specified offsets within the code. Note that if the
- * front-facing and back-facing code are the same, they'll have
- * the same offset.
- */
- spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
- spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
-}
-
-static void
-cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
-{
- D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
- /* Copy SPU code from batch buffer to spu buffer */
- memcpy(spu.fragment_program_code, fp->code,
- SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
-#if 01
- /* Point function pointer at new code */
- spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
-#endif
-}
-
-
-static uint
-cmd_state_fs_constants(const qword *buffer, uint pos)
-{
- const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0);
- const float *constants = (const float *) &buffer[pos+2];
- uint i;
-
- D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
-
- /* Expand each float to float[4] for SOA execution */
- for (i = 0; i < num_const; i++) {
- D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
- spu.constants[i] = spu_splats(constants[i]);
- }
-
- /* return new buffer pos (in 16-byte words) */
- return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16);
-}
-
-
-static void
-cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
-{
- D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
- cmd->width,
- cmd->height,
- cmd->color_start,
- cmd->color_format,
- cmd->depth_format);
-
- ASSERT_ALIGN16(cmd->color_start);
- ASSERT_ALIGN16(cmd->depth_start);
-
- spu.fb.color_start = cmd->color_start;
- spu.fb.depth_start = cmd->depth_start;
- spu.fb.color_format = cmd->color_format;
- spu.fb.depth_format = cmd->depth_format;
- spu.fb.width = cmd->width;
- spu.fb.height = cmd->height;
- spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
- spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
-
- switch (spu.fb.depth_format) {
- case PIPE_FORMAT_Z32_UNORM:
- spu.fb.zsize = 4;
- spu.fb.zscale = (float) 0xffffffffu;
- break;
- case PIPE_FORMAT_S8_UINT_Z24_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- case PIPE_FORMAT_X8Z24_UNORM:
- case PIPE_FORMAT_Z24X8_UNORM:
- spu.fb.zsize = 4;
- spu.fb.zscale = (float) 0x00ffffffu;
- break;
- case PIPE_FORMAT_Z16_UNORM:
- spu.fb.zsize = 2;
- spu.fb.zscale = (float) 0xffffu;
- break;
- default:
- spu.fb.zsize = 0;
- break;
- }
-}
-
-
-/**
- * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
- * sampler wrap modes.
- */
-static void
-update_tex_masks(struct spu_texture *texture,
- const struct pipe_sampler_state *sampler)
-{
- uint i;
-
- for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
- int width = texture->level[i].width;
- int height = texture->level[i].height;
-
- if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
- texture->level[i].mask_s = spu_splats(width - 1);
- else
- texture->level[i].mask_s = spu_splats(~0);
-
- if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
- texture->level[i].mask_t = spu_splats(height - 1);
- else
- texture->level[i].mask_t = spu_splats(~0);
-
- if (sampler->normalized_coords) {
- texture->level[i].scale_s = spu_splats((float) width);
- texture->level[i].scale_t = spu_splats((float) height);
- }
- else {
- texture->level[i].scale_s = spu_splats(1.0f);
- texture->level[i].scale_t = spu_splats(1.0f);
- }
- }
-}
-
-
-static void
-cmd_state_sampler(const struct cell_command_sampler *sampler)
-{
- uint unit = sampler->unit;
-
- D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
-
- spu.sampler[unit] = sampler->state;
-
- switch (spu.sampler[unit].min_img_filter) {
- case PIPE_TEX_FILTER_LINEAR:
- spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
- break;
- case PIPE_TEX_FILTER_NEAREST:
- spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
- break;
- default:
- ASSERT(0);
- }
-
- switch (spu.sampler[sampler->unit].mag_img_filter) {
- case PIPE_TEX_FILTER_LINEAR:
- spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
- break;
- case PIPE_TEX_FILTER_NEAREST:
- spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
- break;
- default:
- ASSERT(0);
- }
-
- switch (spu.sampler[sampler->unit].min_mip_filter) {
- case PIPE_TEX_MIPFILTER_NEAREST:
- case PIPE_TEX_MIPFILTER_LINEAR:
- spu.sample_texture_2d[unit] = sample_texture_2d_lod;
- break;
- case PIPE_TEX_MIPFILTER_NONE:
- spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
- break;
- default:
- ASSERT(0);
- }
-
- update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
-}
-
-
-static void
-cmd_state_texture(const struct cell_command_texture *texture)
-{
- const uint unit = texture->unit;
- uint i;
-
- D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
-
- spu.texture[unit].max_level = 0;
- spu.texture[unit].target = texture->target;
-
- for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
- uint width = texture->width[i];
- uint height = texture->height[i];
- uint depth = texture->depth[i];
-
- D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
- texture->start[i], texture->width[i], texture->height[i]);
-
- spu.texture[unit].level[i].start = texture->start[i];
- spu.texture[unit].level[i].width = width;
- spu.texture[unit].level[i].height = height;
- spu.texture[unit].level[i].depth = depth;
-
- spu.texture[unit].level[i].tiles_per_row =
- (width + TILE_SIZE - 1) / TILE_SIZE;
-
- spu.texture[unit].level[i].bytes_per_image =
- 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
-
- spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
- spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
-
- if (texture->start[i])
- spu.texture[unit].max_level = i;
- }
-
- update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
-}
-
-
-static void
-cmd_state_vertex_info(const struct vertex_info *vinfo)
-{
- D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
- ASSERT(vinfo->num_attribs >= 1);
- ASSERT(vinfo->num_attribs <= 8);
- memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
-}
-
-
-static void
-cmd_state_vs_array_info(const struct cell_array_info *vs_info)
-{
- const unsigned attr = vs_info->attr;
-
- ASSERT(attr < PIPE_MAX_ATTRIBS);
- draw.vertex_fetch.src_ptr[attr] = vs_info->base;
- draw.vertex_fetch.pitch[attr] = vs_info->pitch;
- draw.vertex_fetch.size[attr] = vs_info->size;
- draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
- draw.vertex_fetch.dirty = 1;
-}
-
-
-static void
-cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
-{
- mfc_get(attribute_fetch_code_buffer,
- (unsigned int) code->base, /* src */
- code->size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- draw.vertex_fetch.code = attribute_fetch_code_buffer;
-}
-
-
-static void
-cmd_finish(void)
-{
- D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
- really_clear_tiles(0);
- /* wait for all outstanding DMAs to finish */
- mfc_write_tag_mask(~0);
- mfc_read_tag_status_all();
- /* send mbox message to PPU */
- spu_write_out_mbox(CELL_CMD_FINISH);
-}
-
-
-/**
- * Execute a batch of commands which was sent to us by the PPU.
- * See the cell_emit_state.c code to see where the commands come from.
- *
- * The opcode param encodes the location of the buffer and its size.
- */
-static void
-cmd_batch(uint opcode)
-{
- const uint buf = (opcode >> 8) & 0xff;
- uint size = (opcode >> 16);
- PIPE_ALIGN_VAR(16) qword buffer[CELL_BUFFER_SIZE / 16];
- const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]);
- uint pos;
-
- D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
- buf, size, spu.init.buffers[buf]);
-
- ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
-
- ASSERT_ALIGN16(spu.init.buffers[buf]);
-
- size = ROUNDUP16(size);
-
- ASSERT_ALIGN16(spu.init.buffers[buf]);
-
- mfc_get(buffer, /* dest */
- (unsigned int) spu.init.buffers[buf], /* src */
- size,
- TAG_BATCH_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << TAG_BATCH_BUFFER);
-
- /* Tell PPU we're done copying the buffer to local store */
- D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
- release_buffer(buf);
-
- /*
- * Loop over commands in the batch buffer
- */
- for (pos = 0; pos < usize; /* no incr */) {
- switch (si_to_uint(buffer[pos])) {
- /*
- * rendering commands
- */
- case CELL_CMD_CLEAR_SURFACE:
- {
- struct cell_command_clear_surface *clr
- = (struct cell_command_clear_surface *) &buffer[pos];
- cmd_clear_surface(clr);
- pos += sizeof(*clr) / 16;
- }
- break;
- case CELL_CMD_RENDER:
- {
- struct cell_command_render *render
- = (struct cell_command_render *) &buffer[pos];
- uint pos_incr;
- cmd_render(render, &pos_incr);
- pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return
- }
- break;
- /*
- * state-update commands
- */
- case CELL_CMD_STATE_FRAMEBUFFER:
- {
- struct cell_command_framebuffer *fb
- = (struct cell_command_framebuffer *) &buffer[pos];
- cmd_state_framebuffer(fb);
- pos += sizeof(*fb) / 16;
- }
- break;
- case CELL_CMD_STATE_FRAGMENT_OPS:
- {
- struct cell_command_fragment_ops *fops
- = (struct cell_command_fragment_ops *) &buffer[pos];
- cmd_state_fragment_ops(fops);
- /* This is a variant-sized command */
- pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16;
- }
- break;
- case CELL_CMD_STATE_FRAGMENT_PROGRAM:
- {
- struct cell_command_fragment_program *fp
- = (struct cell_command_fragment_program *) &buffer[pos];
- cmd_state_fragment_program(fp);
- pos += sizeof(*fp) / 16;
- }
- break;
- case CELL_CMD_STATE_FS_CONSTANTS:
- pos = cmd_state_fs_constants(buffer, pos);
- break;
- case CELL_CMD_STATE_RASTERIZER:
- {
- struct cell_command_rasterizer *rast =
- (struct cell_command_rasterizer *) &buffer[pos];
- spu.rasterizer = rast->rasterizer;
- pos += sizeof(*rast) / 16;
- }
- break;
- case CELL_CMD_STATE_SAMPLER:
- {
- struct cell_command_sampler *sampler
- = (struct cell_command_sampler *) &buffer[pos];
- cmd_state_sampler(sampler);
- pos += sizeof(*sampler) / 16;
- }
- break;
- case CELL_CMD_STATE_TEXTURE:
- {
- struct cell_command_texture *texture
- = (struct cell_command_texture *) &buffer[pos];
- cmd_state_texture(texture);
- pos += sizeof(*texture) / 16;
- }
- break;
- case CELL_CMD_STATE_VERTEX_INFO:
- cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
- pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16;
- break;
- case CELL_CMD_STATE_VIEWPORT:
- (void) memcpy(& draw.viewport, &buffer[pos+1],
- sizeof(struct pipe_viewport_state));
- pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16;
- break;
- case CELL_CMD_STATE_UNIFORMS:
- draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0);
- pos += 2;
- break;
- case CELL_CMD_STATE_VS_ARRAY_INFO:
- cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
- pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16;
- break;
- case CELL_CMD_STATE_BIND_VS:
-#if 0
- spu_bind_vertex_shader(&draw,
- (struct cell_shader_info *) &buffer[pos+1]);
-#endif
- pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16;
- break;
- case CELL_CMD_STATE_ATTRIB_FETCH:
- cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
- &buffer[pos+1]);
- pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16;
- break;
- /*
- * misc commands
- */
- case CELL_CMD_FINISH:
- cmd_finish();
- pos += 1;
- break;
- case CELL_CMD_FENCE:
- {
- struct cell_command_fence *fence_cmd =
- (struct cell_command_fence *) &buffer[pos];
- cmd_fence(fence_cmd);
- pos += sizeof(*fence_cmd) / 16;
- }
- break;
- case CELL_CMD_RELEASE_VERTS:
- {
- struct cell_command_release_verts *release
- = (struct cell_command_release_verts *) &buffer[pos];
- cmd_release_verts(release);
- pos += sizeof(*release) / 16;
- }
- break;
- case CELL_CMD_FLUSH_BUFFER_RANGE: {
- struct cell_buffer_range *br = (struct cell_buffer_range *)
- &buffer[pos+1];
-
- spu_dcache_mark_dirty((unsigned) br->base, br->size);
- pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16;
- break;
- }
- default:
- printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos]));
- ASSERT(0);
- break;
- }
- }
-
- D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
-}
-
-
-#define PERF 0
-
-
-/**
- * Main loop for SPEs: Get a command, execute it, repeat.
- */
-void
-command_loop(void)
-{
- int exitFlag = 0;
- uint t0, t1;
-
- D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
-
- while (!exitFlag) {
- unsigned opcode;
-
- D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
-
- if (PERF)
- spu_write_decrementer(~0);
-
- /* read/wait from mailbox */
- opcode = (unsigned int) spu_read_in_mbox();
- D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
-
- if (PERF)
- t0 = spu_read_decrementer();
-
- switch (opcode & CELL_CMD_OPCODE_MASK) {
- case CELL_CMD_EXIT:
- D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
- exitFlag = 1;
- break;
- case CELL_CMD_VS_EXECUTE:
-#if 0
- spu_execute_vertex_shader(&draw, &cmd.vs);
-#endif
- break;
- case CELL_CMD_BATCH:
- cmd_batch(opcode);
- break;
- default:
- printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
- }
-
- if (PERF) {
- t1 = spu_read_decrementer();
- printf("wait mbox time: %gms batch time: %gms\n",
- (~0u - t0) * spu.init.inv_timebase,
- (t0 - t1) * spu.init.inv_timebase);
- }
- }
-
- D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
-
- if (spu.init.debug_flags & CELL_DEBUG_CACHE)
- spu_dcache_report();
-}
-
-/* Initialize this module; we manage the fragment ops buffer here. */
-void
-spu_command_init(void)
-{
- /* Install default/fallback fragment processing function.
- * This will normally be overriden by a code-gen'd function
- * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
- */
- spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
- spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
-
- /* Set up the basic empty buffer for code-gen'ed fragment ops */
- spu.fragment_ops_code = NULL;
- spu.fragment_ops_code_size = 0;
-}
-
-void
-spu_command_close(void)
-{
- /* Deallocate the code-gen buffer for fragment ops, and reset the
- * fragment ops functions to their initial setting (just to leave
- * things in a good state).
- */
- if (spu.fragment_ops_code != NULL) {
- free(spu.fragment_ops_code);
- }
- spu_command_init();
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-extern void
-command_loop(void);
-
-extern void
-spu_command_init(void);
-
-extern void
-spu_command_close(void);
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include "cell/common.h"
-#include "spu_main.h"
-#include "spu_dcache.h"
-
-#define CACHELINE_LOG2SIZE 7
-#define LINE_SIZE (1U << 7)
-#define ALIGN_MASK (~(LINE_SIZE - 1))
-
-#define CACHE_NAME data
-#define CACHED_TYPE qword
-#define CACHE_TYPE CACHE_TYPE_RO
-#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0)
-#define CACHE_LOG2NNWAY 2
-#define CACHE_LOG2NSETS 6
-#ifdef DEBUG
-#define CACHE_STATS 1
-#endif
-#include <cache-api.h>
-
-/* Yes folks, this is ugly.
- */
-#undef CACHE_NWAY
-#undef CACHE_NSETS
-#define CACHE_NAME data
-#define CACHE_NWAY 4
-#define CACHE_NSETS (1U << 6)
-
-
-/**
- * Fetch between arbitrary number of bytes from an unaligned address
- *
- * \param dst Destination data buffer
- * \param ea Main memory effective address of source data
- * \param size Number of bytes to read
- *
- * \warning
- * As is hinted by the type of the \c dst pointer, this function writes
- * multiples of 16-bytes.
- */
-void
-spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size)
-{
- const int shift = ea & 0x0f;
- const unsigned read_size = ROUNDUP16(size + shift);
- const unsigned last_read = ROUNDUP16(ea + size);
- const qword *const last_write = dst + (ROUNDUP16(size) / 16);
- unsigned i;
-
-
- if (shift == 0) {
- /* Data is already aligned. Fetch directly into the destination buffer.
- */
- for (i = 0; i < size; i += 16) {
- *(dst++) = cache_rd(data, ea + i);
- }
- } else {
- qword hi;
-
-
- /* Please exercise extreme caution when modifying this code. This code
- * must not read past the end of the page containing the source data,
- * and it must not write more than ((size + 15) / 16) qwords to the
- * destination buffer.
- */
- ea &= ~0x0f;
- hi = cache_rd(data, ea);
- for (i = 16; i < read_size; i += 16) {
- qword lo = cache_rd(data, ea + i);
-
- *(dst++) = si_or((qword) spu_slqwbyte(hi, shift),
- (qword) spu_rlmaskqwbyte(lo, shift - 16));
- hi = lo;
- }
-
- if (dst != last_write) {
- *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0));
- }
- }
-
- ASSERT((ea + i) == last_read);
- ASSERT(dst == last_write);
-}
-
-
-/**
- * Notify the cache that a range of main memory may have been modified
- */
-void
-spu_dcache_mark_dirty(unsigned ea, unsigned size)
-{
- unsigned i;
- const unsigned aligned_start = (ea & ALIGN_MASK);
- const unsigned aligned_end = (ea + size + (LINE_SIZE - 1))
- & ALIGN_MASK;
-
-
- for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) {
- const unsigned entry = __cache_dir[i];
- const unsigned addr = entry & ~0x0f;
-
- __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end))
- ? (entry & ~CACHELINE_VALID) : entry;
- }
-}
-
-
-/**
- * Print cache utilization report
- */
-void
-spu_dcache_report(void)
-{
-#ifdef CACHE_STATS
- if (spu.init.id == 0) {
- printf("SPU 0: Texture cache report:\n");
- cache_pr_stats(data);
- }
-#endif
-}
-
-
+++ /dev/null
-/*
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SPU_DCACHE_H
-#define SPU_DCACHE_H
-
-extern void
-spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size);
-
-extern void
-spu_dcache_mark_dirty(unsigned ea, unsigned size);
-
-extern void
-spu_dcache_report(void);
-
-#endif /* SPU_DCACHE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * TGSI interpretor/executor.
- *
- * Flow control information:
- *
- * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
- * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
- * care since a condition may be true for some quad components but false
- * for other components.
- *
- * We basically execute all statements (even if they're in the part of
- * an IF/ELSE clause that's "not taken") and use a special mask to
- * control writing to destination registers. This is the ExecMask.
- * See store_dest().
- *
- * The ExecMask is computed from three other masks (CondMask, LoopMask and
- * ContMask) which are controlled by the flow control instructions (namely:
- * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
- *
- *
- * Authors:
- * Michal Krol
- * Brian Paul
- */
-
-#include <transpose_matrix4x4.h>
-#include <simdmath/ceilf4.h>
-#include <simdmath/cosf4.h>
-#include <simdmath/divf4.h>
-#include <simdmath/floorf4.h>
-#include <simdmath/log2f4.h>
-#include <simdmath/powf4.h>
-#include <simdmath/sinf4.h>
-#include <simdmath/sqrtf4.h>
-#include <simdmath/truncf4.h>
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "spu_exec.h"
-#include "spu_main.h"
-#include "spu_vertex_shader.h"
-#include "spu_dcache.h"
-#include "cell/common.h"
-
-#define TILE_TOP_LEFT 0
-#define TILE_TOP_RIGHT 1
-#define TILE_BOTTOM_LEFT 2
-#define TILE_BOTTOM_RIGHT 3
-
-/*
- * Shorthand locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
-#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
-#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
-#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
-#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
-#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
-#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
-#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
-#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
-#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
-#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
-#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
-#define TEMP_128_I TGSI_EXEC_TEMP_128_I
-#define TEMP_128_C TGSI_EXEC_TEMP_128_C
-#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
-#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
-#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
-#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
-#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
-#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
-#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
-#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
-#define TEMP_R0 TGSI_EXEC_TEMP_R0
-
-#define FOR_EACH_CHANNEL(CHAN)\
- for (CHAN = 0; CHAN < 4; CHAN++)
-
-#define IS_CHANNEL_ENABLED(INST, CHAN)\
- ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
-
-#define IS_CHANNEL_ENABLED2(INST, CHAN)\
- ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
-
-#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
- if (IS_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
- if (IS_CHANNEL_ENABLED2( INST, CHAN ))
-
-
-/** The execution mask depends on the conditional mask and the loop mask */
-#define UPDATE_EXEC_MASK(MACH) \
- MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
-
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-
-
-/**
- * Initialize machine state by expanding tokens to full instructions,
- * allocating temporary storage, setting up constants, etc.
- * After this, we can call spu_exec_machine_run() many times.
- */
-void
-spu_exec_machine_init(struct spu_exec_machine *mach,
- uint numSamplers,
- struct spu_sampler *samplers,
- unsigned processor)
-{
- const qword zero = si_il(0);
- const qword not_zero = si_il(~0);
-
- (void) numSamplers;
- mach->Samplers = samplers;
- mach->Processor = processor;
- mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
-
- /* Setup constants. */
- mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
- mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
- mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
- mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
-
- mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
- mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
- mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
- mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
-}
-
-
-static INLINE qword
-micro_abs(qword src)
-{
- return si_rotmi(si_shli(src, 1), -1);
-}
-
-static INLINE qword
-micro_ceil(qword src)
-{
- return (qword) _ceilf4((vec_float4) src);
-}
-
-static INLINE qword
-micro_cos(qword src)
-{
- return (qword) _cosf4((vec_float4) src);
-}
-
-static const qword br_shuf = {
- TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
- TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
- TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
- TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
- TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
- TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
- TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
- TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
-};
-
-static const qword bl_shuf = {
- TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
- TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
- TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
- TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
- TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
- TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
- TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
- TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
-};
-
-static const qword tl_shuf = {
- TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
- TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
- TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
- TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
- TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
- TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
- TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
- TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
-};
-
-static qword
-micro_ddx(qword src)
-{
- qword bottom_right = si_shufb(src, src, br_shuf);
- qword bottom_left = si_shufb(src, src, bl_shuf);
-
- return si_fs(bottom_right, bottom_left);
-}
-
-static qword
-micro_ddy(qword src)
-{
- qword top_left = si_shufb(src, src, tl_shuf);
- qword bottom_left = si_shufb(src, src, bl_shuf);
-
- return si_fs(top_left, bottom_left);
-}
-
-static INLINE qword
-micro_div(qword src0, qword src1)
-{
- return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
-}
-
-static qword
-micro_flr(qword src)
-{
- return (qword) _floorf4((vec_float4) src);
-}
-
-static qword
-micro_frc(qword src)
-{
- return si_fs(src, (qword) _floorf4((vec_float4) src));
-}
-
-static INLINE qword
-micro_ge(qword src0, qword src1)
-{
- return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
-}
-
-static qword
-micro_lg2(qword src)
-{
- return (qword) _log2f4((vec_float4) src);
-}
-
-static INLINE qword
-micro_lt(qword src0, qword src1)
-{
- const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
-
- return si_xori(tmp, 0xff);
-}
-
-static INLINE qword
-micro_max(qword src0, qword src1)
-{
- return si_selb(src1, src0, si_fcgt(src0, src1));
-}
-
-static INLINE qword
-micro_min(qword src0, qword src1)
-{
- return si_selb(src0, src1, si_fcgt(src0, src1));
-}
-
-static qword
-micro_neg(qword src)
-{
- return si_xor(src, (qword) spu_splats(0x80000000));
-}
-
-static qword
-micro_set_sign(qword src)
-{
- return si_or(src, (qword) spu_splats(0x80000000));
-}
-
-static qword
-micro_pow(qword src0, qword src1)
-{
- return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
-}
-
-static qword
-micro_rnd(qword src)
-{
- const qword half = (qword) spu_splats(0.5f);
-
- /* May be able to use _roundf4. There may be some difference, though.
- */
- return (qword) _floorf4((vec_float4) si_fa(src, half));
-}
-
-static INLINE qword
-micro_ishr(qword src0, qword src1)
-{
- return si_rotma(src0, si_sfi(src1, 0));
-}
-
-static qword
-micro_trunc(qword src)
-{
- return (qword) _truncf4((vec_float4) src);
-}
-
-static qword
-micro_sin(qword src)
-{
- return (qword) _sinf4((vec_float4) src);
-}
-
-static INLINE qword
-micro_sqrt(qword src)
-{
- return (qword) _sqrtf4((vec_float4) src);
-}
-
-static void
-fetch_src_file_channel(
- const struct spu_exec_machine *mach,
- const uint file,
- const uint swizzle,
- const union spu_exec_channel *index,
- union spu_exec_channel *chan )
-{
- switch( swizzle ) {
- case TGSI_SWIZZLE_X:
- case TGSI_SWIZZLE_Y:
- case TGSI_SWIZZLE_Z:
- case TGSI_SWIZZLE_W:
- switch( file ) {
- case TGSI_FILE_CONSTANT: {
- unsigned i;
-
- for (i = 0; i < 4; i++) {
- const float *ptr = mach->Consts[index->i[i]];
- float tmp[4];
-
- spu_dcache_fetch_unaligned((qword *) tmp,
- (uintptr_t)(ptr + swizzle),
- sizeof(float));
-
- chan->f[i] = tmp[0];
- }
- break;
- }
-
- case TGSI_FILE_INPUT:
- chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_TEMPORARY:
- chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_IMMEDIATE:
- ASSERT( index->i[0] < (int) mach->ImmLimit );
- ASSERT( index->i[1] < (int) mach->ImmLimit );
- ASSERT( index->i[2] < (int) mach->ImmLimit );
- ASSERT( index->i[3] < (int) mach->ImmLimit );
-
- chan->f[0] = mach->Imms[index->i[0]][swizzle];
- chan->f[1] = mach->Imms[index->i[1]][swizzle];
- chan->f[2] = mach->Imms[index->i[2]][swizzle];
- chan->f[3] = mach->Imms[index->i[3]][swizzle];
- break;
-
- case TGSI_FILE_ADDRESS:
- chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_OUTPUT:
- /* vertex/fragment output vars can be read too */
- chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- default:
- ASSERT( 0 );
- }
- break;
-
- default:
- ASSERT( 0 );
- }
-}
-
-static void
-fetch_source(
- const struct spu_exec_machine *mach,
- union spu_exec_channel *chan,
- const struct tgsi_full_src_register *reg,
- const uint chan_index )
-{
- union spu_exec_channel index;
- uint swizzle;
-
- index.i[0] =
- index.i[1] =
- index.i[2] =
- index.i[3] = reg->Register.Index;
-
- if (reg->Register.Indirect) {
- union spu_exec_channel index2;
- union spu_exec_channel indir_index;
-
- index2.i[0] =
- index2.i[1] =
- index2.i[2] =
- index2.i[3] = reg->Indirect.Index;
-
- swizzle = tgsi_util_get_src_register_swizzle(®->Indirect,
- CHAN_X);
- fetch_src_file_channel(
- mach,
- reg->Indirect.File,
- swizzle,
- &index2,
- &indir_index );
-
- index.q = si_a(index.q, indir_index.q);
- }
-
- if( reg->Register.Dimension ) {
- switch( reg->Register.File ) {
- case TGSI_FILE_INPUT:
- index.q = si_mpyi(index.q, 17);
- break;
- case TGSI_FILE_CONSTANT:
- index.q = si_shli(index.q, 12);
- break;
- default:
- ASSERT( 0 );
- }
-
- index.i[0] += reg->Dimension.Index;
- index.i[1] += reg->Dimension.Index;
- index.i[2] += reg->Dimension.Index;
- index.i[3] += reg->Dimension.Index;
-
- if (reg->Dimension.Indirect) {
- union spu_exec_channel index2;
- union spu_exec_channel indir_index;
-
- index2.i[0] =
- index2.i[1] =
- index2.i[2] =
- index2.i[3] = reg->DimIndirect.Index;
-
- swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->DimIndirect.File,
- swizzle,
- &index2,
- &indir_index );
-
- index.q = si_a(index.q, indir_index.q);
- }
- }
-
- swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
- fetch_src_file_channel(
- mach,
- reg->Register.File,
- swizzle,
- &index,
- chan );
-
- switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
- case TGSI_UTIL_SIGN_CLEAR:
- chan->q = micro_abs(chan->q);
- break;
-
- case TGSI_UTIL_SIGN_SET:
- chan->q = micro_set_sign(chan->q);
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- chan->q = micro_neg(chan->q);
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- break;
- }
-
- if (reg->RegisterExtMod.Complement) {
- chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
- }
-}
-
-static void
-store_dest(
- struct spu_exec_machine *mach,
- const union spu_exec_channel *chan,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- uint chan_index )
-{
- union spu_exec_channel *dst;
-
- switch( reg->Register.File ) {
- case TGSI_FILE_NULL:
- return;
-
- case TGSI_FILE_OUTPUT:
- dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->Register.Index].xyzw[chan_index];
- break;
-
- case TGSI_FILE_TEMPORARY:
- dst = &mach->Temps[reg->Register.Index].xyzw[chan_index];
- break;
-
- case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index];
- break;
-
- default:
- ASSERT( 0 );
- return;
- }
-
- switch (inst->Instruction.Saturate)
- {
- case TGSI_SAT_NONE:
- if (mach->ExecMask & 0x1)
- dst->i[0] = chan->i[0];
- if (mach->ExecMask & 0x2)
- dst->i[1] = chan->i[1];
- if (mach->ExecMask & 0x4)
- dst->i[2] = chan->i[2];
- if (mach->ExecMask & 0x8)
- dst->i[3] = chan->i[3];
- break;
-
- case TGSI_SAT_ZERO_ONE:
- /* XXX need to obey ExecMask here */
- dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
- dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- ASSERT( 0 );
- break;
-
- default:
- ASSERT( 0 );
- }
-}
-
-#define FETCH(VAL,INDEX,CHAN)\
- fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
-
-#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
-
-
-/**
- * Execute ARB-style KIL which is predicated by a src register.
- * Kill fragment if any of the four values is less than zero.
- */
-static void
-exec_kil(struct spu_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- uint uniquemask;
- uint chan_index;
- uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
- union spu_exec_channel r[1];
-
- /* This mask stores component bits that were already tested. */
- uniquemask = 0;
-
- for (chan_index = 0; chan_index < 4; chan_index++)
- {
- uint swizzle;
- uint i;
-
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_swizzle (
- &inst->Src[0],
- chan_index);
-
- /* check if the component has not been already tested */
- if (uniquemask & (1 << swizzle))
- continue;
- uniquemask |= 1 << swizzle;
-
- FETCH(&r[0], 0, chan_index);
- for (i = 0; i < 4; i++)
- if (r[0].f[i] < 0.0f)
- kilmask |= 1 << i;
- }
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-/**
- * Execute NVIDIA-style KIL which is predicated by a condition code.
- * Kill fragment if the condition code is TRUE.
- */
-static void
-exec_kilp(struct spu_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
-
- /* TODO: build kilmask from CC mask */
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-/*
- * Fetch a texel using STR texture coordinates.
- */
-static void
-fetch_texel( struct spu_sampler *sampler,
- const union spu_exec_channel *s,
- const union spu_exec_channel *t,
- const union spu_exec_channel *p,
- float lodbias, /* XXX should be float[4] */
- union spu_exec_channel *r,
- union spu_exec_channel *g,
- union spu_exec_channel *b,
- union spu_exec_channel *a )
-{
- qword rgba[4];
- qword out[4];
-
- sampler->get_samples(sampler, s->f, t->f, p->f, lodbias,
- (float (*)[4]) rgba);
-
- _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba);
- r->q = out[0];
- g->q = out[1];
- b->q = out[2];
- a->q = out[3];
-}
-
-
-static void
-exec_tex(struct spu_exec_machine *mach,
- const struct tgsi_full_instruction *inst,
- boolean biasLod, boolean projected)
-{
- const uint unit = inst->Src[1].Register.Index;
- union spu_exec_channel r[8];
- uint chan_index;
- float lodBias;
-
- /* printf("Sampler %u unit %u\n", sampler, unit); */
-
- switch (inst->InstructionExtTexture.Texture) {
- case TGSI_TEXTURE_1D:
-
- FETCH(&r[0], 0, CHAN_X);
-
- if (projected) {
- FETCH(&r[1], 0, CHAN_W);
- r[0].q = micro_div(r[0].q, r[1].q);
- }
-
- if (biasLod) {
- FETCH(&r[1], 0, CHAN_W);
- lodBias = r[2].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
- &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
- break;
-
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
-
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 0, CHAN_Z);
-
- if (projected) {
- FETCH(&r[3], 0, CHAN_W);
- r[0].q = micro_div(r[0].q, r[3].q);
- r[1].q = micro_div(r[1].q, r[3].q);
- r[2].q = micro_div(r[2].q, r[3].q);
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias, /* inputs */
- &r[0], &r[1], &r[2], &r[3]); /* outputs */
- break;
-
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
-
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 0, CHAN_Z);
-
- if (projected) {
- FETCH(&r[3], 0, CHAN_W);
- r[0].q = micro_div(r[0].q, r[3].q);
- r[1].q = micro_div(r[1].q, r[3].q);
- r[2].q = micro_div(r[2].q, r[3].q);
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias,
- &r[0], &r[1], &r[2], &r[3]);
- break;
-
- default:
- ASSERT (0);
- }
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
- }
-}
-
-
-
-static void
-constant_interpolation(
- struct spu_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- unsigned i;
-
- for( i = 0; i < QUAD_SIZE; i++ ) {
- mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
- }
-}
-
-static void
-linear_interpolation(
- struct spu_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- mach->Inputs[attrib].xyzw[chan].f[0] = a0;
- mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
- mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
- mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-static void
-perspective_interpolation(
- struct spu_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- const float *w = mach->QuadPos.xyzw[3].f;
- /* divide by W here */
- mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
- mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
- mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
- mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void (* interpolation_func)(
- struct spu_exec_machine *mach,
- unsigned attrib,
- unsigned chan );
-
-static void
-exec_declaration(struct spu_exec_machine *mach,
- const struct tgsi_full_declaration *decl)
-{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- interpolation_func interp;
-
- first = decl->Range.First;
- last = decl->Range.Last;
- mask = decl->Declaration.UsageMask;
-
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- interp = constant_interpolation;
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- interp = linear_interpolation;
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- interp = perspective_interpolation;
- break;
-
- default:
- ASSERT( 0 );
- }
-
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- interp( mach, i, j );
- }
- }
- }
- else {
- unsigned i, j;
-
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- interp( mach, i, j );
- }
- }
- }
- }
- }
- }
-}
-
-static void
-exec_instruction(
- struct spu_exec_machine *mach,
- const struct tgsi_full_instruction *inst,
- int *pc )
-{
- uint chan_index;
- union spu_exec_channel r[8];
-
- (*pc)++;
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = si_cflts(r[0].q, 0);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOV:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
- STORE( &r[0], 0, CHAN_Y );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[1], 0, CHAN_Y );
- r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
-
- FETCH( &r[2], 0, CHAN_W );
- r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
- r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
- r[1].q = micro_pow(r[1].q, r[2].q);
-
- /* r0 = (r0 > 0.0) ? r1 : 0.0
- */
- r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
- r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
- r[0].q);
- STORE( &r[0], 0, CHAN_Z );
- }
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_RCP:
- FETCH( &r[0], 0, CHAN_X );
- r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RSQ:
- FETCH( &r[0], 0, CHAN_X );
- r[0].q = micro_sqrt(r[0].q);
- r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXP:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_LOG:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
- {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- r[0].q = si_fm(r[0].q, r[1].q);
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_ADD:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = si_fa(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- r[0].q = si_fm(r[0].q, r[1].q);
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
-
- FETCH( &r[1], 0, CHAN_Z );
- FETCH( &r[2], 1, CHAN_Z );
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- r[0].q = si_fm(r[0].q, r[1].q);
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FETCH(&r[1], 0, CHAN_W);
- FETCH(&r[2], 1, CHAN_W);
-
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- FETCH( &r[0], 0, CHAN_Y );
- FETCH( &r[1], 1, CHAN_Y);
- r[0].q = si_fm(r[0].q, r[1].q);
- STORE( &r[0], 0, CHAN_Y );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MIN:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- r[0].q = micro_min(r[0].q, r[1].q);
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_MAX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- r[0].q = micro_max(r[0].q, r[1].q);
-
- STORE(&r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
-
- r[0].q = micro_ge(r[0].q, r[1].q);
- r[0].q = si_xori(r[0].q, 0xff);
-
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = micro_ge(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MAD:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- FETCH( &r[2], 2, chan_index );
- r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SUB:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- r[0].q = si_fs(r[0].q, r[1].q);
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_LRP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
-
- r[1].q = si_fs(r[1].q, r[2].q);
- r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_CND:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_DP2A:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_FRC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_frc(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CLAMP:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_FLR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_flr(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ROUND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_rnd(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EX2:
- FETCH(&r[0], 0, CHAN_X);
-
- r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LG2:
- FETCH( &r[0], 0, CHAN_X );
- r[0].q = micro_lg2(r[0].q);
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_POW:
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- r[0].q = micro_pow(r[0].q, r[1].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_XPD:
- /* TGSI_OPCODE_XPD */
- FETCH(&r[0], 0, CHAN_Y);
- FETCH(&r[1], 1, CHAN_Z);
- FETCH(&r[3], 0, CHAN_Z);
- FETCH(&r[4], 1, CHAN_Y);
-
- /* r2 = (r0 * r1) - (r3 * r5)
- */
- r[2].q = si_fm(r[3].q, r[5].q);
- r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[2], 0, CHAN_X );
- }
-
- FETCH(&r[2], 1, CHAN_X);
- FETCH(&r[5], 0, CHAN_X);
-
- /* r3 = (r3 * r2) - (r1 * r5)
- */
- r[1].q = si_fm(r[1].q, r[5].q);
- r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- STORE( &r[3], 0, CHAN_Y );
- }
-
- /* r5 = (r5 * r4) - (r0 * r2)
- */
- r[0].q = si_fm(r[0].q, r[2].q);
- r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[5], 0, CHAN_Z );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_ABS:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
-
- r[0].q = micro_abs(r[0].q);
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_RCC:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_DPH:
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- r[0].q = si_fm(r[0].q, r[1].q);
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FETCH(&r[1], 1, CHAN_W);
-
- r[0].q = si_fa(r[0].q, r[1].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_COS:
- FETCH(&r[0], 0, CHAN_X);
-
- r[0].q = micro_cos(r[0].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_ddx(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDY:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_ddy(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_KILP:
- exec_kilp (mach, inst);
- break;
-
- case TGSI_OPCODE_KIL:
- exec_kil (mach, inst);
- break;
-
- case TGSI_OPCODE_PK2H:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_PK2US:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_PK4B:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_PK4UB:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_RFL:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_SEQ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
-
- r[0].q = si_fceq(r[0].q, r[1].q);
-
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SFL:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_SGT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = si_fcgt(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SIN:
- FETCH( &r[0], 0, CHAN_X );
- r[0].q = micro_sin(r[0].q);
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
-
- r[0].q = si_fcgt(r[0].q, r[1].q);
- r[0].q = si_xori(r[0].q, 0xff);
-
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SNE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
-
- r[0].q = si_fceq(r[0].q, r[1].q);
- r[0].q = si_xori(r[0].q, 0xff);
-
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_STR:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_TEX:
- /* simple texture lookup */
- /* src[0] = texcoord */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE, FALSE);
- break;
-
- case TGSI_OPCODE_TXB:
- /* Texture lookup with lod bias */
- /* src[0] = texcoord (src[0].w = load bias) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, FALSE);
- break;
-
- case TGSI_OPCODE_TXD:
- /* Texture lookup with explict partial derivatives */
- /* src[0] = texcoord */
- /* src[1] = d[strq]/dx */
- /* src[2] = d[strq]/dy */
- /* src[3] = sampler unit */
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_TXL:
- /* Texture lookup with explit LOD */
- /* src[0] = texcoord (src[0].w = load bias) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, FALSE);
- break;
-
- case TGSI_OPCODE_TXP:
- /* Texture lookup with projection */
- /* src[0] = texcoord (src[0].w = projection) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE, TRUE);
- break;
-
- case TGSI_OPCODE_UP2H:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_UP2US:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_UP4B:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_UP4UB:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_X2D:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_ARA:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_ARR:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_BRA:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_CAL:
- /* skip the call if no execution channels are enabled */
- if (mach->ExecMask) {
- /* do the call */
-
- /* push the Cond, Loop, Cont stacks */
- ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
- ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
- mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
-
- /* note that PC was already incremented above */
- mach->CallStack[mach->CallStackTop++] = *pc;
- *pc = inst->InstructionExtLabel.Label;
- }
- break;
-
- case TGSI_OPCODE_RET:
- mach->FuncMask &= ~mach->ExecMask;
- UPDATE_EXEC_MASK(mach);
-
- if (mach->ExecMask == 0x0) {
- /* really return now (otherwise, keep executing */
-
- if (mach->CallStackTop == 0) {
- /* returning from main() */
- *pc = -1;
- return;
- }
- *pc = mach->CallStack[--mach->CallStackTop];
-
- /* pop the Cond, Loop, Cont stacks */
- ASSERT(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- ASSERT(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- ASSERT(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- ASSERT(mach->FuncStackTop > 0);
- mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
-
- UPDATE_EXEC_MASK(mach);
- }
- break;
-
- case TGSI_OPCODE_SSG:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_CMP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
-
- /* r0 = (r0 < 0.0) ? r1 : r2
- */
- r[3].q = si_xor(r[3].q, r[3].q);
- r[0].q = micro_lt(r[0].q, r[3].q);
- r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SCS:
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( &r[0], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- r[1].q = micro_cos(r[0].q);
- STORE( &r[1], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- r[1].q = micro_sin(r[0].q);
- STORE( &r[1], 0, CHAN_Y );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_NRM:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_DIV:
- ASSERT( 0 );
- break;
-
- case TGSI_OPCODE_DP2:
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- r[0].q = si_fm(r[0].q, r[1].q);
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_IF:
- /* push CondMask */
- ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- FETCH( &r[0], 0, CHAN_X );
- /* update CondMask */
- if( ! r[0].u[0] ) {
- mach->CondMask &= ~0x1;
- }
- if( ! r[0].u[1] ) {
- mach->CondMask &= ~0x2;
- }
- if( ! r[0].u[2] ) {
- mach->CondMask &= ~0x4;
- }
- if( ! r[0].u[3] ) {
- mach->CondMask &= ~0x8;
- }
- UPDATE_EXEC_MASK(mach);
- /* Todo: If CondMask==0, jump to ELSE */
- break;
-
- case TGSI_OPCODE_ELSE:
- /* invert CondMask wrt previous mask */
- {
- uint prevMask;
- ASSERT(mach->CondStackTop > 0);
- prevMask = mach->CondStack[mach->CondStackTop - 1];
- mach->CondMask = ~mach->CondMask & prevMask;
- UPDATE_EXEC_MASK(mach);
- /* Todo: If CondMask==0, jump to ENDIF */
- }
- break;
-
- case TGSI_OPCODE_ENDIF:
- /* pop CondMask */
- ASSERT(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_END:
- /* halt execution */
- *pc = -1;
- break;
-
- case TGSI_OPCODE_PUSHA:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_POPA:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_CEIL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_ceil(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_I2F:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = si_csflt(r[0].q, 0);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_NOT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = si_xorbi(r[0].q, 0xff);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_TRUNC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- r[0].q = micro_trunc(r[0].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SHL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
-
- r[0].q = si_shl(r[0].q, r[1].q);
-
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ISHR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = micro_ishr(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_AND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = si_and(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_OR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = si_or(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOD:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_XOR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- r[0].q = si_xor(r[0].q, r[1].q);
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SAD:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_TXF:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_TXQ:
- ASSERT (0);
- break;
-
- case TGSI_OPCODE_EMIT:
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
- break;
-
- case TGSI_OPCODE_ENDPRIM:
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
- break;
-
- case TGSI_OPCODE_BGNLOOP:
- /* push LoopMask and ContMasks */
- ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- /* Restore ContMask, but don't pop */
- ASSERT(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
- if (mach->LoopMask) {
- /* repeat loop: jump to instruction just past BGNLOOP */
- *pc = inst->InstructionExtLabel.Label + 1;
- }
- else {
- /* exit loop: pop LoopMask */
- ASSERT(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- /* pop ContMask */
- ASSERT(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- }
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_BRK:
- /* turn off loop channels for each enabled exec channel */
- mach->LoopMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_CONT:
- /* turn off cont channels for each enabled exec channel */
- mach->ContMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_BGNSUB:
- /* no-op */
- break;
-
- case TGSI_OPCODE_ENDSUB:
- /* no-op */
- break;
-
- case TGSI_OPCODE_NOP:
- break;
-
- default:
- ASSERT( 0 );
- }
-}
-
-
-/**
- * Run TGSI interpreter.
- * \return bitmask of "alive" quad components
- */
-uint
-spu_exec_machine_run( struct spu_exec_machine *mach )
-{
- uint i;
- int pc = 0;
-
- mach->CondMask = 0xf;
- mach->LoopMask = 0xf;
- mach->ContMask = 0xf;
- mach->FuncMask = 0xf;
- mach->ExecMask = 0xf;
-
- mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */
- ASSERT(mach->CondStackTop == 0);
- ASSERT(mach->LoopStackTop == 0);
- ASSERT(mach->ContStackTop == 0);
- ASSERT(mach->CallStackTop == 0);
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
-
- if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
- mach->Primitives[0] = 0;
- }
-
-
- /* execute declarations (interpolants) */
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- for (i = 0; i < mach->NumDeclarations; i++) {
- PIPE_ALIGN_VAR(16)
- union {
- struct tgsi_full_declaration decl;
- qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16];
- } d;
- unsigned ea = (unsigned) (mach->Declarations + pc);
-
- spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl));
-
- exec_declaration( mach, &d.decl );
- }
- }
-
- /* execute instructions, until pc is set to -1 */
- while (pc != -1) {
- PIPE_ALIGN_VAR(16)
- union {
- struct tgsi_full_instruction inst;
- qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16];
- } i;
- unsigned ea = (unsigned) (mach->Instructions + pc);
-
- spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst));
- exec_instruction( mach, & i.inst, &pc );
- }
-
-#if 0
- /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
- if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
- /*
- * Scale back depth component.
- */
- for (i = 0; i < 4; i++)
- mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
- }
-#endif
-
- return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
-}
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#if !defined SPU_EXEC_H
-#define SPU_EXEC_H
-
-#include "pipe/p_compiler.h"
-
-#include "spu_tgsi_exec.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-/**
- * Registers may be treated as float, signed int or unsigned int.
- */
-union spu_exec_channel
-{
- float f[QUAD_SIZE];
- int i[QUAD_SIZE];
- unsigned u[QUAD_SIZE];
- qword q;
-};
-
-/**
- * A vector[RGBA] of channels[4 pixels]
- */
-struct spu_exec_vector
-{
- union spu_exec_channel xyzw[NUM_CHANNELS];
-};
-
-/**
- * For fragment programs, information for computing fragment input
- * values from plane equation of the triangle/line.
- */
-struct spu_interp_coef
-{
- float a0[NUM_CHANNELS]; /* in an xyzw layout */
- float dadx[NUM_CHANNELS];
- float dady[NUM_CHANNELS];
-};
-
-
-struct softpipe_tile_cache; /**< Opaque to TGSI */
-
-/**
- * Information for sampling textures, which must be implemented
- * by code outside the TGSI executor.
- */
-struct spu_sampler
-{
- const struct pipe_sampler_state *state;
- struct pipe_resource *texture;
- /** Get samples for four fragments in a quad */
- void (*get_samples)(struct spu_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
- void *pipe; /*XXX temporary*/
- struct softpipe_tile_cache *cache;
-};
-
-
-/**
- * Run-time virtual machine state for executing TGSI shader.
- */
-struct spu_exec_machine
-{
- /*
- * 32 program temporaries
- * 4 internal temporaries
- * 1 address
- */
- PIPE_ALIGN_VAR(16)
- struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
- + TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
-
- struct spu_exec_vector *Addrs;
-
- struct spu_sampler *Samplers;
-
- float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
- unsigned ImmLimit;
- float (*Consts)[4];
- struct spu_exec_vector *Inputs;
- struct spu_exec_vector *Outputs;
- unsigned Processor;
-
- /* GEOMETRY processor only. */
- unsigned *Primitives;
-
- /* FRAGMENT processor only. */
- const struct spu_interp_coef *InterpCoefs;
- struct spu_exec_vector QuadPos;
-
- /* Conditional execution masks */
- uint CondMask; /**< For IF/ELSE/ENDIF */
- uint LoopMask; /**< For BGNLOOP/ENDLOOP */
- uint ContMask; /**< For loop CONT statements */
- uint FuncMask; /**< For function calls */
- uint ExecMask; /**< = CondMask & LoopMask */
-
- /** Condition mask stack (for nested conditionals) */
- uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
- int CondStackTop;
-
- /** Loop mask stack (for nested loops) */
- uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int LoopStackTop;
-
- /** Loop continue mask stack (see comments in tgsi_exec.c) */
- uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int ContStackTop;
-
- /** Function execution mask stack (for executing subroutine code) */
- uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
- int FuncStackTop;
-
- /** Function call stack for saving/restoring the program counter */
- uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
- int CallStackTop;
-
- struct tgsi_full_instruction *Instructions;
- uint NumInstructions;
-
- struct tgsi_full_declaration *Declarations;
- uint NumDeclarations;
-};
-
-
-extern void
-spu_exec_machine_init(struct spu_exec_machine *mach,
- uint numSamplers,
- struct spu_sampler *samplers,
- unsigned processor);
-
-extern uint
-spu_exec_machine_run( struct spu_exec_machine *mach );
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* SPU_EXEC_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/**
- * SPU functions accessed by shaders.
- *
- * Authors: Brian Paul
- */
-
-
-#include <string.h>
-#include <libmisc.h>
-#include <math.h>
-#include <cos14_v.h>
-#include <sin14_v.h>
-#include <simdmath/exp2f4.h>
-#include <simdmath/log2f4.h>
-#include <simdmath/powf4.h>
-
-#include "cell/common.h"
-#include "spu_main.h"
-#include "spu_funcs.h"
-#include "spu_texture.h"
-
-
-/** For "return"-ing four vectors */
-struct vec_4x4
-{
- vector float v[4];
-};
-
-
-static vector float
-spu_cos(vector float x)
-{
- return _cos14_v(x);
-}
-
-static vector float
-spu_sin(vector float x)
-{
- return _sin14_v(x);
-}
-
-static vector float
-spu_pow(vector float x, vector float y)
-{
- return _powf4(x, y);
-}
-
-static vector float
-spu_exp2(vector float x)
-{
- return _exp2f4(x);
-}
-
-static vector float
-spu_log2(vector float x)
-{
- return _log2f4(x);
-}
-
-
-static struct vec_4x4
-spu_tex_2d(vector float s, vector float t, vector float r, vector float q,
- unsigned unit)
-{
- struct vec_4x4 colors;
- (void) r;
- (void) q;
- spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
- return colors;
-}
-
-static struct vec_4x4
-spu_tex_3d(vector float s, vector float t, vector float r, vector float q,
- unsigned unit)
-{
- struct vec_4x4 colors;
- (void) r;
- (void) q;
- spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v);
- return colors;
-}
-
-static struct vec_4x4
-spu_tex_cube(vector float s, vector float t, vector float r, vector float q,
- unsigned unit)
-{
- struct vec_4x4 colors;
- (void) q;
- sample_texture_cube(s, t, r, unit, colors.v);
- return colors;
-}
-
-
-/**
- * Add named function to list of "exported" functions that will be
- * made available to the PPU-hosted code generator.
- */
-static void
-export_func(struct cell_spu_function_info *spu_functions,
- const char *name, void *addr)
-{
- uint n = spu_functions->num;
- ASSERT(strlen(name) < 16);
- strcpy(spu_functions->names[n], name);
- spu_functions->addrs[n] = (uint) addr;
- spu_functions->num++;
- ASSERT(spu_functions->num <= 16);
-}
-
-
-/**
- * Return info about the SPU's function to the PPU / main memory.
- * The PPU needs to know the address of some SPU-side functions so
- * that we can generate shader code with function calls.
- */
-void
-return_function_info(void)
-{
- PIPE_ALIGN_VAR(16) struct cell_spu_function_info funcs;
- int tag = TAG_MISC;
-
- ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */
-
- funcs.num = 0;
- export_func(&funcs, "spu_cos", &spu_cos);
- export_func(&funcs, "spu_sin", &spu_sin);
- export_func(&funcs, "spu_pow", &spu_pow);
- export_func(&funcs, "spu_exp2", &spu_exp2);
- export_func(&funcs, "spu_log2", &spu_log2);
- export_func(&funcs, "spu_tex_2d", &spu_tex_2d);
- export_func(&funcs, "spu_tex_3d", &spu_tex_3d);
- export_func(&funcs, "spu_tex_cube", &spu_tex_cube);
-
- /* Send the function info back to the PPU / main memory */
- mfc_put((void *) &funcs, /* src in local store */
- (unsigned int) spu.init.spu_functions, /* dst in main memory */
- sizeof(funcs), /* bytes */
- tag,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask(1 << tag);
-}
-
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SPU_FUNCS_H
-#define SPU_FUNCS_H
-
-extern void
-return_function_info(void);
-
-#endif
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/* main() for Cell SPU code */
-
-
-#include <stdio.h>
-#include <libmisc.h>
-
-#include "pipe/p_defines.h"
-
-#include "spu_funcs.h"
-#include "spu_command.h"
-#include "spu_main.h"
-#include "spu_per_fragment_op.h"
-#include "spu_texture.h"
-//#include "spu_test.h"
-#include "cell/common.h"
-
-
-/*
-helpful headers:
-/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h
-/opt/cell/sdk/usr/include/libmisc.h
-*/
-
-struct spu_global spu;
-
-
-static void
-one_time_init(void)
-{
- memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
- memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
- invalidate_tex_cache();
-}
-
-/* In some versions of the SDK the SPE main takes 'unsigned long' as a
- * parameter. In others it takes 'unsigned long long'. Use a define to
- * select between the two.
- */
-#ifdef SPU_MAIN_PARAM_LONG_LONG
-typedef unsigned long long main_param_t;
-#else
-typedef unsigned long main_param_t;
-#endif
-
-/**
- * SPE entrypoint.
- */
-int
-main(main_param_t speid, main_param_t argp)
-{
- int tag = 0;
-
- (void) speid;
-
- ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
- ASSERT(sizeof(struct cell_command_render) % 8 == 0);
- ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0);
- ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0);
-
- one_time_init();
- spu_command_init();
-
- D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid);
- D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n");
-
- /* get initialization data */
- mfc_get(&spu.init, /* dest */
- (unsigned int) argp, /* src */
- sizeof(struct cell_init_info), /* bytes */
- tag,
- 0, /* tid */
- 0 /* rid */);
- wait_on_mask( 1 << tag );
-
- if (spu.init.id == 0) {
- return_function_info();
- }
-
-#if 0
- if (spu.init.id==0)
- spu_test_misc(spu.init.id);
-#endif
-
- command_loop();
-
- spu_command_close();
-
- return 0;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SPU_MAIN_H
-#define SPU_MAIN_H
-
-
-#include <spu_mfcio.h>
-
-#include "cell/common.h"
-#include "draw/draw_vertex.h"
-#include "pipe/p_state.h"
-
-
-#if DEBUG
-/* These debug macros use the unusual construction ", ##__VA_ARGS__"
- * which expands to the expected comma + args if variadic arguments
- * are supplied, but swallows the comma if there are no variadic
- * arguments (which avoids syntax errors that would otherwise occur).
- */
-#define D_PRINTF(flag, format,...) \
- if (spu.init.debug_flags & (flag)) \
- printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__)
-#else
-#define D_PRINTF(...)
-#endif
-
-
-/**
- * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels.
- * The data may be addressed through several different types.
- */
-typedef union {
- ushort us[TILE_SIZE][TILE_SIZE];
- uint ui[TILE_SIZE][TILE_SIZE];
- vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4];
- vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2];
-} tile_t;
-
-
-#define TILE_STATUS_CLEAR 1
-#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */
-#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */
-#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */
-#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
-
-
-/** Function for sampling textures */
-typedef void (*spu_sample_texture_2d_func)(vector float s,
- vector float t,
- uint unit, uint level, uint face,
- vector float colors[4]);
-
-
-/** Function for performing per-fragment ops */
-typedef void (*spu_fragment_ops_func)(uint x, uint y,
- tile_t *colorTile,
- tile_t *depthStencilTile,
- vector float fragZ,
- vector float fragRed,
- vector float fragGreen,
- vector float fragBlue,
- vector float fragAlpha,
- vector unsigned int mask);
-
-/** Function for running fragment program */
-typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs,
- vector float *outputs,
- vector float *constants);
-
-
-PIPE_ALIGN_TYPE(16,
-struct spu_framebuffer
-{
- void *color_start; /**< addr of color surface in main memory */
- void *depth_start; /**< addr of depth surface in main memory */
- enum pipe_format color_format;
- enum pipe_format depth_format;
- uint width; /**< width in pixels */
- uint height; /**< height in pixels */
- uint width_tiles; /**< width in tiles */
- uint height_tiles; /**< width in tiles */
-
- uint color_clear_value;
- uint depth_clear_value;
-
- uint zsize; /**< 0, 2 or 4 bytes per Z */
- float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */
-});
-
-
-/** per-texture level info */
-PIPE_ALIGN_TYPE(16,
-struct spu_texture_level
-{
- void *start;
- ushort width;
- ushort height;
- ushort depth;
- ushort tiles_per_row;
- uint bytes_per_image;
- /** texcoord scale factors */
- vector float scale_s;
- vector float scale_t;
- vector float scale_r;
- /** texcoord masks (if REPEAT then size-1, else ~0) */
- vector signed int mask_s;
- vector signed int mask_t;
- vector signed int mask_r;
- /** texcoord clamp limits */
- vector signed int max_s;
- vector signed int max_t;
- vector signed int max_r;
-});
-
-
-PIPE_ALIGN_TYPE(16,
-struct spu_texture
-{
- struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS];
- uint max_level;
- uint target; /**< PIPE_TEXTURE_x */
-});
-
-
-/**
- * All SPU global/context state will be in a singleton object of this type:
- */
-PIPE_ALIGN_TYPE(16,
-struct spu_global
-{
- /** One-time init/constant info */
- struct cell_init_info init;
-
- /*
- * Current state
- */
- struct spu_framebuffer fb;
- struct pipe_depth_stencil_alpha_state depth_stencil_alpha;
- struct pipe_blend_state blend;
- struct pipe_blend_color blend_color;
- struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
- struct pipe_rasterizer_state rasterizer;
- struct spu_texture texture[PIPE_MAX_SAMPLERS];
- struct vertex_info vertex_info;
-
- /** Current color and Z tiles */
- PIPE_ALIGN_VAR(16) tile_t ctile;
- PIPE_ALIGN_VAR(16) tile_t ztile;
-
- /** Read depth/stencil tiles? */
- boolean read_depth_stencil;
-
- /** Current tiles' status */
- ubyte cur_ctile_status;
- ubyte cur_ztile_status;
-
- /** Status of all tiles in framebuffer */
- PIPE_ALIGN_VAR(16) ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
- PIPE_ALIGN_VAR(16) ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE];
-
- /** Current fragment ops machine code, at 8-byte boundary */
- uint *fragment_ops_code;
- uint fragment_ops_code_size;
- /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */
- spu_fragment_ops_func fragment_ops[2];
-
- /** Current fragment program machine code, at 8-byte boundary */
- PIPE_ALIGN_VAR(8) uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS];
- /** Current fragment ops function */
- spu_fragment_program_func fragment_program;
-
- /** Current texture sampler function */
- spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS];
- spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS];
- spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS];
-
- /** Fragment program constants */
- vector float constants[4 * CELL_MAX_CONSTANTS];
-
-});
-
-
-extern struct spu_global spu;
-
-
-
-/* DMA TAGS */
-
-#define TAG_SURFACE_CLEAR 10
-#define TAG_VERTEX_BUFFER 11
-#define TAG_READ_TILE_COLOR 12
-#define TAG_READ_TILE_Z 13
-#define TAG_WRITE_TILE_COLOR 14
-#define TAG_WRITE_TILE_Z 15
-#define TAG_INDEX_BUFFER 16
-#define TAG_BATCH_BUFFER 17
-#define TAG_MISC 18
-#define TAG_DCACHE0 20
-#define TAG_DCACHE1 21
-#define TAG_DCACHE2 22
-#define TAG_DCACHE3 23
-#define TAG_FENCE 24
-
-
-static INLINE void
-wait_on_mask(unsigned tagMask)
-{
- mfc_write_tag_mask( tagMask );
- /* wait for completion of _any_ DMAs specified by tagMask */
- mfc_read_tag_status_any();
-}
-
-
-static INLINE void
-wait_on_mask_all(unsigned tagMask)
-{
- mfc_write_tag_mask( tagMask );
- /* wait for completion of _any_ DMAs specified by tagMask */
- mfc_read_tag_status_all();
-}
-
-
-
-
-
-static INLINE void
-memset16(ushort *d, ushort value, uint count)
-{
- uint i;
- for (i = 0; i < count; i++)
- d[i] = value;
-}
-
-
-static INLINE void
-memset32(uint *d, uint value, uint count)
-{
- uint i;
- for (i = 0; i < count; i++)
- d[i] = value;
-}
-
-
-#endif /* SPU_MAIN_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \author Brian Paul
- */
-
-
-#include <transpose_matrix4x4.h>
-#include "pipe/p_format.h"
-#include "spu_main.h"
-#include "spu_colorpack.h"
-#include "spu_per_fragment_op.h"
-
-
-#define LINEAR_QUAD_LAYOUT 1
-
-
-static INLINE vector float
-spu_min(vector float a, vector float b)
-{
- vector unsigned int m;
- m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
- return spu_sel(a, b, m);
-}
-
-
-static INLINE vector float
-spu_max(vector float a, vector float b)
-{
- vector unsigned int m;
- m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */
- return spu_sel(b, a, m);
-}
-
-
-/**
- * Called by rasterizer for each quad after the shader has run. Do
- * all the per-fragment operations including alpha test, z test,
- * stencil test, blend, colormask and logicops. This is a
- * fallback/debug function. In reality we'll use a generated function
- * produced by the PPU. But this function is useful for
- * debug/validation.
- */
-void
-spu_fallback_fragment_ops(uint x, uint y,
- tile_t *colorTile,
- tile_t *depthStencilTile,
- vector float fragZ,
- vector float fragR,
- vector float fragG,
- vector float fragB,
- vector float fragA,
- vector unsigned int mask)
-{
- vector float frag_aos[4];
- unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
- unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
-
- /*
- * Do alpha test
- */
- if (spu.depth_stencil_alpha.alpha.enabled) {
- vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value);
- vector unsigned int amask;
-
- switch (spu.depth_stencil_alpha.alpha.func) {
- case PIPE_FUNC_LESS:
- amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
- break;
- case PIPE_FUNC_GREATER:
- amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
- break;
- case PIPE_FUNC_GEQUAL:
- amask = spu_cmpgt(ref, fragA);
- amask = spu_nor(amask, amask);
- break;
- case PIPE_FUNC_LEQUAL:
- amask = spu_cmpgt(fragA, ref);
- amask = spu_nor(amask, amask);
- break;
- case PIPE_FUNC_EQUAL:
- amask = spu_cmpeq(ref, fragA);
- break;
- case PIPE_FUNC_NOTEQUAL:
- amask = spu_cmpeq(ref, fragA);
- amask = spu_nor(amask, amask);
- break;
- case PIPE_FUNC_ALWAYS:
- amask = spu_splats(0xffffffffU);
- break;
- case PIPE_FUNC_NEVER:
- amask = spu_splats( 0x0U);
- break;
- default:
- ;
- }
-
- mask = spu_and(mask, amask);
- }
-
-
- /*
- * Z and/or stencil testing...
- */
- if (spu.depth_stencil_alpha.depth.enabled ||
- spu.depth_stencil_alpha.stencil[0].enabled) {
-
- /* get four Z/Stencil values from tile */
- vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
- vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
- vector unsigned int ifbZ = spu_and(ifbZS, mask24);
- vector unsigned int ifbS = spu_andc(ifbZS, mask24);
-
- if (spu.depth_stencil_alpha.stencil[0].enabled) {
- /* do stencil test */
- ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT);
-
- }
- else if (spu.depth_stencil_alpha.depth.enabled) {
- /* do depth test */
-
- ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
- spu.fb.depth_format == PIPE_FORMAT_Z24X8_UNORM);
-
- vector unsigned int ifragZ;
- vector unsigned int zmask;
-
- /* convert four fragZ from float to uint */
- fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
- ifragZ = spu_convtu(fragZ, 0);
-
- /* do depth comparison, setting zmask with results */
- switch (spu.depth_stencil_alpha.depth.func) {
- case PIPE_FUNC_LESS:
- zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
- break;
- case PIPE_FUNC_GREATER:
- zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
- break;
- case PIPE_FUNC_GEQUAL:
- zmask = spu_cmpgt(ifbZ, ifragZ);
- zmask = spu_nor(zmask, zmask);
- break;
- case PIPE_FUNC_LEQUAL:
- zmask = spu_cmpgt(ifragZ, ifbZ);
- zmask = spu_nor(zmask, zmask);
- break;
- case PIPE_FUNC_EQUAL:
- zmask = spu_cmpeq(ifbZ, ifragZ);
- break;
- case PIPE_FUNC_NOTEQUAL:
- zmask = spu_cmpeq(ifbZ, ifragZ);
- zmask = spu_nor(zmask, zmask);
- break;
- case PIPE_FUNC_ALWAYS:
- zmask = spu_splats(0xffffffffU);
- break;
- case PIPE_FUNC_NEVER:
- zmask = spu_splats( 0x0U);
- break;
- default:
- ;
- }
-
- mask = spu_and(mask, zmask);
-
- /* merge framebuffer Z and fragment Z according to the mask */
- ifbZ = spu_or(spu_and(ifragZ, mask),
- spu_andc(ifbZ, mask));
- }
-
- if (spu_extract(spu_orx(mask), 0)) {
- /* put new fragment Z/Stencil values back into Z/Stencil tile */
- depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
-
- spu.cur_ztile_status = TILE_STATUS_DIRTY;
- }
- }
-
-
- /*
- * If we'll need the current framebuffer/tile colors for blending
- * or logicop or colormask, fetch them now.
- */
- if (spu.blend.rt[0].blend_enable ||
- spu.blend.logicop_enable ||
- spu.blend.rt[0].colormask != 0xf) {
-
-#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
- fbc0 = colorTile->ui[y][x*2+0];
- fbc1 = colorTile->ui[y][x*2+1];
- fbc2 = colorTile->ui[y][x*2+2];
- fbc3 = colorTile->ui[y][x*2+3];
-#else
- fbc0 = colorTile->ui[y+0][x+0];
- fbc1 = colorTile->ui[y+0][x+1];
- fbc2 = colorTile->ui[y+1][x+0];
- fbc3 = colorTile->ui[y+1][x+1];
-#endif
- }
-
-
- /*
- * Do blending
- */
- if (spu.blend.rt[0].blend_enable) {
- /* blending terms, misc regs */
- vector float term1r, term1g, term1b, term1a;
- vector float term2r, term2g, term2b, term2a;
- vector float one, tmp;
-
- vector float fbRGBA[4]; /* current framebuffer colors */
-
- /* convert framebuffer colors from packed int to vector float */
- {
- vector float temp[4]; /* float colors in AOS form */
- switch (spu.fb.color_format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- temp[0] = spu_unpack_B8G8R8A8(fbc0);
- temp[1] = spu_unpack_B8G8R8A8(fbc1);
- temp[2] = spu_unpack_B8G8R8A8(fbc2);
- temp[3] = spu_unpack_B8G8R8A8(fbc3);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- temp[0] = spu_unpack_A8R8G8B8(fbc0);
- temp[1] = spu_unpack_A8R8G8B8(fbc1);
- temp[2] = spu_unpack_A8R8G8B8(fbc2);
- temp[3] = spu_unpack_A8R8G8B8(fbc3);
- break;
- default:
- ASSERT(0);
- }
- _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
- }
-
- /*
- * Compute Src RGB terms (fragment color * factor)
- */
- switch (spu.blend.rt[0].rgb_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- term1r = fragR;
- term1g = fragG;
- term1b = fragB;
- break;
- case PIPE_BLENDFACTOR_ZERO:
- term1r =
- term1g =
- term1b = spu_splats(0.0f);
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- term1r = spu_mul(fragR, fragR);
- term1g = spu_mul(fragG, fragG);
- term1b = spu_mul(fragB, fragB);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- term1r = spu_mul(fragR, fragA);
- term1g = spu_mul(fragG, fragA);
- term1b = spu_mul(fragB, fragA);
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- term1r = spu_mul(fragR, fbRGBA[0]);
- term1g = spu_mul(fragG, fbRGBA[1]);
- term1b = spu_mul(fragB, fbRGBA[1]);
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- term1r = spu_mul(fragR, fbRGBA[3]);
- term1g = spu_mul(fragG, fbRGBA[3]);
- term1b = spu_mul(fragB, fbRGBA[3]);
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0]));
- term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1]));
- term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2]));
- break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
- term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3]));
- term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3]));
- break;
- /* XXX more cases */
- default:
- ASSERT(0);
- }
-
- /*
- * Compute Src Alpha term (fragment alpha * factor)
- */
- switch (spu.blend.rt[0].alpha_src_factor) {
- case PIPE_BLENDFACTOR_ONE:
- term1a = fragA;
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- term1a = spu_splats(0.0f);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- term1a = spu_mul(fragA, fragA);
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_DST_ALPHA:
- term1a = spu_mul(fragA, fbRGBA[3]);
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3]));
- break;
- /* XXX more cases */
- default:
- ASSERT(0);
- }
-
- /*
- * Compute Dest RGB terms (framebuffer color * factor)
- */
- switch (spu.blend.rt[0].rgb_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- term2r = fbRGBA[0];
- term2g = fbRGBA[1];
- term2b = fbRGBA[2];
- break;
- case PIPE_BLENDFACTOR_ZERO:
- term2r =
- term2g =
- term2b = spu_splats(0.0f);
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- term2r = spu_mul(fbRGBA[0], fragR);
- term2g = spu_mul(fbRGBA[1], fragG);
- term2b = spu_mul(fbRGBA[2], fragB);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- term2r = spu_mul(fbRGBA[0], fragA);
- term2g = spu_mul(fbRGBA[1], fragA);
- term2b = spu_mul(fbRGBA[2], fragA);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- one = spu_splats(1.0f);
- tmp = spu_sub(one, fragA);
- term2r = spu_mul(fbRGBA[0], tmp);
- term2g = spu_mul(fbRGBA[1], tmp);
- term2b = spu_mul(fbRGBA[2], tmp);
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- term2r = spu_mul(fbRGBA[0], fbRGBA[0]);
- term2g = spu_mul(fbRGBA[1], fbRGBA[1]);
- term2b = spu_mul(fbRGBA[2], fbRGBA[2]);
- break;
- case PIPE_BLENDFACTOR_DST_ALPHA:
- term2r = spu_mul(fbRGBA[0], fbRGBA[3]);
- term2g = spu_mul(fbRGBA[1], fbRGBA[3]);
- term2b = spu_mul(fbRGBA[2], fbRGBA[3]);
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0]));
- term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1]));
- term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2]));
- break;
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3]));
- term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3]));
- term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3]));
- break;
- /* XXX more cases */
- default:
- ASSERT(0);
- }
-
- /*
- * Compute Dest Alpha term (framebuffer alpha * factor)
- */
- switch (spu.blend.rt[0].alpha_dst_factor) {
- case PIPE_BLENDFACTOR_ONE:
- term2a = fbRGBA[3];
- break;
- case PIPE_BLENDFACTOR_SRC_COLOR:
- term2a = spu_splats(0.0f);
- break;
- case PIPE_BLENDFACTOR_SRC_ALPHA:
- term2a = spu_mul(fbRGBA[3], fragA);
- break;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
- one = spu_splats(1.0f);
- tmp = spu_sub(one, fragA);
- term2a = spu_mul(fbRGBA[3], tmp);
- break;
- case PIPE_BLENDFACTOR_DST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_DST_ALPHA:
- term2a = spu_mul(fbRGBA[3], fbRGBA[3]);
- break;
- case PIPE_BLENDFACTOR_CONST_COLOR:
- /* fall-through */
- case PIPE_BLENDFACTOR_CONST_ALPHA:
- term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3]));
- break;
- /* XXX more cases */
- default:
- ASSERT(0);
- }
-
- /*
- * Combine Src/Dest RGB terms
- */
- switch (spu.blend.rt[0].rgb_func) {
- case PIPE_BLEND_ADD:
- fragR = spu_add(term1r, term2r);
- fragG = spu_add(term1g, term2g);
- fragB = spu_add(term1b, term2b);
- break;
- case PIPE_BLEND_SUBTRACT:
- fragR = spu_sub(term1r, term2r);
- fragG = spu_sub(term1g, term2g);
- fragB = spu_sub(term1b, term2b);
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- fragR = spu_sub(term2r, term1r);
- fragG = spu_sub(term2g, term1g);
- fragB = spu_sub(term2b, term1b);
- break;
- case PIPE_BLEND_MIN:
- fragR = spu_min(term1r, term2r);
- fragG = spu_min(term1g, term2g);
- fragB = spu_min(term1b, term2b);
- break;
- case PIPE_BLEND_MAX:
- fragR = spu_max(term1r, term2r);
- fragG = spu_max(term1g, term2g);
- fragB = spu_max(term1b, term2b);
- break;
- default:
- ASSERT(0);
- }
-
- /*
- * Combine Src/Dest A term
- */
- switch (spu.blend.rt[0].alpha_func) {
- case PIPE_BLEND_ADD:
- fragA = spu_add(term1a, term2a);
- break;
- case PIPE_BLEND_SUBTRACT:
- fragA = spu_sub(term1a, term2a);
- break;
- case PIPE_BLEND_REVERSE_SUBTRACT:
- fragA = spu_sub(term2a, term1a);
- break;
- case PIPE_BLEND_MIN:
- fragA = spu_min(term1a, term2a);
- break;
- case PIPE_BLEND_MAX:
- fragA = spu_max(term1a, term2a);
- break;
- default:
- ASSERT(0);
- }
- }
-
-
- /*
- * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
- */
-#if 0
- /* original code */
- {
- vector float frag_soa[4];
- frag_soa[0] = fragR;
- frag_soa[1] = fragG;
- frag_soa[2] = fragB;
- frag_soa[3] = fragA;
- _transpose_matrix4x4(frag_aos, frag_soa);
- }
-#else
- /* short-cut relying on function parameter layout: */
- _transpose_matrix4x4(frag_aos, &fragR);
- (void) fragG;
- (void) fragB;
-#endif
-
- /*
- * Pack fragment float colors into 32-bit RGBA words.
- */
- switch (spu.fb.color_format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
- fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
- fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
- fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
- fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
- fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
- fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
- break;
- default:
- fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
- ASSERT(0);
- }
-
-
- /*
- * Do color masking
- */
- if (spu.blend.rt[0].colormask != 0xf) {
- uint cmask = 0x0; /* each byte corresponds to a color channel */
-
- /* Form bitmask depending on color buffer format and colormask bits */
- switch (spu.fb.color_format) {
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- if (spu.blend.rt[0].colormask & PIPE_MASK_R)
- cmask |= 0x00ff0000; /* red */
- if (spu.blend.rt[0].colormask & PIPE_MASK_G)
- cmask |= 0x0000ff00; /* green */
- if (spu.blend.rt[0].colormask & PIPE_MASK_B)
- cmask |= 0x000000ff; /* blue */
- if (spu.blend.rt[0].colormask & PIPE_MASK_A)
- cmask |= 0xff000000; /* alpha */
- break;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- if (spu.blend.rt[0].colormask & PIPE_MASK_R)
- cmask |= 0x0000ff00; /* red */
- if (spu.blend.rt[0].colormask & PIPE_MASK_G)
- cmask |= 0x00ff0000; /* green */
- if (spu.blend.rt[0].colormask & PIPE_MASK_B)
- cmask |= 0xff000000; /* blue */
- if (spu.blend.rt[0].colormask & PIPE_MASK_A)
- cmask |= 0x000000ff; /* alpha */
- break;
- default:
- ASSERT(0);
- }
-
- /*
- * Apply color mask to the 32-bit packed colors.
- * if (cmask[i])
- * frag color[i] = frag color[i];
- * else
- * frag color[i] = framebuffer color[i];
- */
- fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
- fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
- fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
- fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
- }
-
-
- /*
- * Do logic ops
- */
- if (spu.blend.logicop_enable) {
- /* XXX to do */
- /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
- }
-
-
- /*
- * If mask is non-zero, mark tile as dirty.
- */
- if (spu_extract(spu_orx(mask), 0)) {
- spu.cur_ctile_status = TILE_STATUS_DIRTY;
- }
- else {
- /* write no fragments */
- return;
- }
-
-
- /*
- * Write new fragment/quad colors to the framebuffer/tile.
- * Only write pixels where the corresponding mask word is set.
- */
-#if LINEAR_QUAD_LAYOUT
- /*
- * Quad layout:
- * +--+--+--+--+
- * |p0|p1|p2|p3|...
- * +--+--+--+--+
- */
- if (spu_extract(mask, 0))
- colorTile->ui[y][x*2] = fragc0;
- if (spu_extract(mask, 1))
- colorTile->ui[y][x*2+1] = fragc1;
- if (spu_extract(mask, 2))
- colorTile->ui[y][x*2+2] = fragc2;
- if (spu_extract(mask, 3))
- colorTile->ui[y][x*2+3] = fragc3;
-#else
- /*
- * Quad layout:
- * +--+--+
- * |p0|p1|...
- * +--+--+
- * |p2|p3|...
- * +--+--+
- */
- if (spu_extract(mask, 0))
- colorTile->ui[y+0][x+0] = fragc0;
- if (spu_extract(mask, 1))
- colorTile->ui[y+0][x+1] = fragc1;
- if (spu_extract(mask, 2))
- colorTile->ui[y+1][x+0] = fragc2;
- if (spu_extract(mask, 3))
- colorTile->ui[y+1][x+1] = fragc3;
-#endif
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SPU_PER_FRAGMENT_OP
-#define SPU_PER_FRAGMENT_OP
-
-
-extern void
-spu_fallback_fragment_ops(uint x, uint y,
- tile_t *colorTile,
- tile_t *depthStencilTile,
- vector float fragZ,
- vector float fragRed,
- vector float fragGreen,
- vector float fragBlue,
- vector float fragAlpha,
- vector unsigned int mask);
-
-
-#endif /* SPU_PER_FRAGMENT_OP */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <stdio.h>
-#include <libmisc.h>
-#include <spu_mfcio.h>
-
-#include "spu_main.h"
-#include "spu_render.h"
-#include "spu_shuffle.h"
-#include "spu_tri.h"
-#include "spu_tile.h"
-#include "cell/common.h"
-#include "util/u_memory.h"
-
-
-/**
- * Given a rendering command's bounding box (in pixels) compute the
- * location of the corresponding screen tile bounding box.
- */
-static INLINE void
-tile_bounding_box(const struct cell_command_render *render,
- uint *txmin, uint *tymin,
- uint *box_num_tiles, uint *box_width_tiles)
-{
-#if 0
- /* Debug: full-window bounding box */
- uint txmax = spu.fb.width_tiles - 1;
- uint tymax = spu.fb.height_tiles - 1;
- *txmin = 0;
- *tymin = 0;
- *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- *box_width_tiles = spu.fb.width_tiles;
- (void) render;
- (void) txmax;
- (void) tymax;
-#else
- uint txmax, tymax, box_height_tiles;
-
- *txmin = (uint) render->xmin / TILE_SIZE;
- *tymin = (uint) render->ymin / TILE_SIZE;
- txmax = (uint) render->xmax / TILE_SIZE;
- tymax = (uint) render->ymax / TILE_SIZE;
- if (txmax >= spu.fb.width_tiles)
- txmax = spu.fb.width_tiles-1;
- if (tymax >= spu.fb.height_tiles)
- tymax = spu.fb.height_tiles-1;
- *box_width_tiles = txmax - *txmin + 1;
- box_height_tiles = tymax - *tymin + 1;
- *box_num_tiles = *box_width_tiles * box_height_tiles;
-#endif
-#if 0
- printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id,
- render->xmin, render->ymin, render->xmax, render->ymax);
- printf("SPU %u: tiles: %u, %u .. %u, %u\n",
- spu.init.id, *txmin, *tymin, txmax, tymax);
- ASSERT(render->xmin <= render->xmax);
- ASSERT(render->ymin <= render->ymax);
-#endif
-}
-
-
-/** Check if the tile at (tx,ty) belongs to this SPU */
-static INLINE boolean
-my_tile(uint tx, uint ty)
-{
- return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id;
-}
-
-
-/**
- * Start fetching non-clear color/Z tiles from main memory
- */
-static INLINE void
-get_cz_tiles(uint tx, uint ty)
-{
- if (spu.read_depth_stencil) {
- if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
- //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
- get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
- spu.cur_ztile_status = TILE_STATUS_GETTING;
- }
- }
-
- if (spu.cur_ctile_status != TILE_STATUS_CLEAR) {
- //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty);
- get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0);
- spu.cur_ctile_status = TILE_STATUS_GETTING;
- }
-}
-
-
-/**
- * Start putting dirty color/Z tiles back to main memory
- */
-static INLINE void
-put_cz_tiles(uint tx, uint ty)
-{
- if (spu.cur_ztile_status == TILE_STATUS_DIRTY) {
- /* tile was modified and needs to be written back */
- //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty);
- put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1);
- spu.cur_ztile_status = TILE_STATUS_DEFINED;
- }
- else if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
- /* tile was never used */
- spu.cur_ztile_status = TILE_STATUS_DEFINED;
- //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty);
- }
-
- if (spu.cur_ctile_status == TILE_STATUS_DIRTY) {
- /* tile was modified and needs to be written back */
- //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty);
- put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0);
- spu.cur_ctile_status = TILE_STATUS_DEFINED;
- }
- else if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
- /* tile was never used */
- spu.cur_ctile_status = TILE_STATUS_DEFINED;
- //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty);
- }
-}
-
-
-/**
- * Wait for 'put' of color/z tiles to complete.
- */
-static INLINE void
-wait_put_cz_tiles(void)
-{
- wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
- if (spu.read_depth_stencil) {
- wait_on_mask(1 << TAG_WRITE_TILE_Z);
- }
-}
-
-
-/**
- * Render primitives
- * \param pos_incr returns value indicating how may words to skip after
- * this command in the batch buffer
- */
-void
-cmd_render(const struct cell_command_render *render, uint *pos_incr)
-{
- /* we'll DMA into these buffers */
- PIPE_ALIGN_VAR(16) ubyte vertex_data[CELL_BUFFER_SIZE];
- const uint vertex_size = render->vertex_size; /* in bytes */
- /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
- uint index_bytes;
- const ubyte *vertices;
- const ushort *indexes;
- uint i, j;
- uint num_tiles;
-
- D_PRINTF(CELL_DEBUG_CMD,
- "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n",
- render->prim_type,
- render->num_verts,
- render->num_indexes,
- render->inline_verts);
-
- ASSERT(sizeof(*render) % 4 == 0);
- ASSERT(total_vertex_bytes % 16 == 0);
- ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
- ASSERT(render->num_indexes % 3 == 0);
-
-
- /* indexes are right after the render command in the batch buffer */
- indexes = (const ushort *) (render + 1);
- index_bytes = ROUNDUP8(render->num_indexes * 2);
- *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
-
-
- if (render->inline_verts) {
- /* Vertices are after indexes in batch buffer at next 16-byte addr */
- vertices = (const ubyte *) render + (*pos_incr * 8);
- vertices = (const ubyte *) align_pointer((void *) vertices, 16);
- ASSERT_ALIGN16(vertices);
- *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
- }
- else {
- /* Begin DMA fetch of vertex buffer */
- ubyte *src = spu.init.buffers[render->vertex_buf];
- ubyte *dest = vertex_data;
-
- /* skip vertex data we won't use */
-#if 01
- src += render->min_index * vertex_size;
- dest += render->min_index * vertex_size;
- total_vertex_bytes -= render->min_index * vertex_size;
-#endif
- ASSERT(total_vertex_bytes % 16 == 0);
- ASSERT_ALIGN16(dest);
- ASSERT_ALIGN16(src);
-
- mfc_get(dest, /* in vertex_data[] array */
- (unsigned int) src, /* src in main memory */
- total_vertex_bytes, /* size */
- TAG_VERTEX_BUFFER,
- 0, /* tid */
- 0 /* rid */);
-
- vertices = vertex_data;
-
- wait_on_mask(1 << TAG_VERTEX_BUFFER);
- }
-
-
- /**
- ** find tiles which intersect the prim bounding box
- **/
- uint txmin, tymin, box_width_tiles, box_num_tiles;
- tile_bounding_box(render, &txmin, &tymin,
- &box_num_tiles, &box_width_tiles);
-
-
- /* make sure any pending clears have completed */
- wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
-
-
- num_tiles = 0;
-
- /**
- ** loop over tiles, rendering tris
- **/
- for (i = 0; i < box_num_tiles; i++) {
- const uint tx = txmin + i % box_width_tiles;
- const uint ty = tymin + i / box_width_tiles;
-
- ASSERT(tx < spu.fb.width_tiles);
- ASSERT(ty < spu.fb.height_tiles);
-
- if (!my_tile(tx, ty))
- continue;
-
- num_tiles++;
-
- spu.cur_ctile_status = spu.ctile_status[ty][tx];
- spu.cur_ztile_status = spu.ztile_status[ty][tx];
-
- get_cz_tiles(tx, ty);
-
- uint drawn = 0;
-
- const qword vertex_sizes = (qword)spu_splats(vertex_size);
- const qword verticess = (qword)spu_splats((uint)vertices);
-
- ASSERT_ALIGN16(&indexes[0]);
-
- const uint num_indexes = render->num_indexes;
-
- /* loop over tris
- * &indexes[0] will be 16 byte aligned. This loop is heavily unrolled
- * avoiding variable rotates when extracting vertex indices.
- */
- for (j = 0; j < num_indexes; j += 24) {
- /* Load three vectors, containing 24 ushort indices */
- const qword* lower_qword = (qword*)&indexes[j];
- const qword indices0 = lower_qword[0];
- const qword indices1 = lower_qword[1];
- const qword indices2 = lower_qword[2];
-
- /* stores three indices for each tri n in slots 0, 1 and 2 of vsn */
- /* Straightforward rotates for these */
- qword vs0 = indices0;
- qword vs1 = si_shlqbyi(indices0, 6);
- qword vs3 = si_shlqbyi(indices1, 2);
- qword vs4 = si_shlqbyi(indices1, 8);
- qword vs6 = si_shlqbyi(indices2, 4);
- qword vs7 = si_shlqbyi(indices2, 10);
-
- /* For tri 2 and 5, the three indices are split across two machine
- * words - rotate and combine */
- const qword tmp2a = si_shlqbyi(indices0, 12);
- const qword tmp2b = si_rotqmbyi(indices1, 12|16);
- qword vs2 = si_selb(tmp2a, tmp2b, si_fsmh(si_from_uint(0x20)));
-
- const qword tmp5a = si_shlqbyi(indices1, 14);
- const qword tmp5b = si_rotqmbyi(indices2, 14|16);
- qword vs5 = si_selb(tmp5a, tmp5b, si_fsmh(si_from_uint(0x60)));
-
- /* unpack indices from halfword slots to word slots */
- vs0 = si_shufb(vs0, vs0, SHUFB8(0,A,0,B,0,C,0,0));
- vs1 = si_shufb(vs1, vs1, SHUFB8(0,A,0,B,0,C,0,0));
- vs2 = si_shufb(vs2, vs2, SHUFB8(0,A,0,B,0,C,0,0));
- vs3 = si_shufb(vs3, vs3, SHUFB8(0,A,0,B,0,C,0,0));
- vs4 = si_shufb(vs4, vs4, SHUFB8(0,A,0,B,0,C,0,0));
- vs5 = si_shufb(vs5, vs5, SHUFB8(0,A,0,B,0,C,0,0));
- vs6 = si_shufb(vs6, vs6, SHUFB8(0,A,0,B,0,C,0,0));
- vs7 = si_shufb(vs7, vs7, SHUFB8(0,A,0,B,0,C,0,0));
-
- /* Calculate address of vertex in vertices[] */
- vs0 = si_mpya(vs0, vertex_sizes, verticess);
- vs1 = si_mpya(vs1, vertex_sizes, verticess);
- vs2 = si_mpya(vs2, vertex_sizes, verticess);
- vs3 = si_mpya(vs3, vertex_sizes, verticess);
- vs4 = si_mpya(vs4, vertex_sizes, verticess);
- vs5 = si_mpya(vs5, vertex_sizes, verticess);
- vs6 = si_mpya(vs6, vertex_sizes, verticess);
- vs7 = si_mpya(vs7, vertex_sizes, verticess);
-
- /* Select the appropriate call based on the number of vertices
- * remaining */
- switch(num_indexes - j) {
- default: drawn += tri_draw(vs7, tx, ty);
- case 21: drawn += tri_draw(vs6, tx, ty);
- case 18: drawn += tri_draw(vs5, tx, ty);
- case 15: drawn += tri_draw(vs4, tx, ty);
- case 12: drawn += tri_draw(vs3, tx, ty);
- case 9: drawn += tri_draw(vs2, tx, ty);
- case 6: drawn += tri_draw(vs1, tx, ty);
- case 3: drawn += tri_draw(vs0, tx, ty);
- }
- }
-
- //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
-
- /* write color/z tiles back to main framebuffer, if dirtied */
- put_cz_tiles(tx, ty);
-
- wait_put_cz_tiles(); /* XXX seems unnecessary... */
-
- spu.ctile_status[ty][tx] = spu.cur_ctile_status;
- spu.ztile_status[ty][tx] = spu.cur_ztile_status;
- }
-
- D_PRINTF(CELL_DEBUG_CMD,
- "RENDER done (%u tiles hit)\n",
- num_tiles);
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef SPU_RENDER_H
-#define SPU_RENDER_H
-
-#include "cell/common.h"
-
-extern void
-cmd_render(const struct cell_command_render *render, uint *pos_incr);
-
-#endif /* SPU_RENDER_H */
-
+++ /dev/null
-#ifndef SPU_SHUFFLE_H
-#define SPU_SHUFFLE_H
-
-/*
- * Generate shuffle patterns with minimal fuss.
- *
- * Based on ideas from
- * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf
- *
- * A-P indicates 0-15th position in first vector
- * a-p indicates 0-15th position in second vector
- *
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f|
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- * | A| B| C| D|
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | A| B| C| D| E| F| G| H|
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P|
- * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
- *
- * x or X indicates 0xff
- * 8 indicates 0x80
- * 0 indicates 0x00
- *
- * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector
- * unsigned char literal suitable for use with spu_shuffle().
- *
- * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector
- * literal suitable for use with si_shufb().
- *
- *
- * For example :
- * SHUFB4(A,A,A,A)
- * expands to :
- * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3})
- *
- * SHUFFLE8(A,B,a,b,C,c,8,8)
- * expands to :
- * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,
- * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0})
- *
- */
-
-#include <spu_intrinsics.h>
-
-#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03
-#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07
-#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b
-#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f
-#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13
-#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17
-#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b
-#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f
-#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0
-#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0
-#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80
-#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0
-
-#define SHUFFLE_VECTOR_4__(A, B, C, D) \
- SHUFFLE_PATTERN_4_##A##__, \
- SHUFFLE_PATTERN_4_##B##__, \
- SHUFFLE_PATTERN_4_##C##__, \
- SHUFFLE_PATTERN_4_##D##__
-
-#define SHUFFLE4(A, B, C, D) \
- ((const vector unsigned char){ \
- SHUFFLE_VECTOR_4__(A, B, C, D) \
- })
-
-#define SHUFB4(A, B, C, D) \
- ((const qword){ \
- SHUFFLE_VECTOR_4__(A, B, C, D) \
- })
-
-
-#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01
-#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03
-#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05
-#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07
-#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09
-#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b
-#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d
-#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f
-#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11
-#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13
-#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15
-#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17
-#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19
-#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b
-#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d
-#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f
-#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0
-#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0
-#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80
-#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0
-
-
-#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
- SHUFFLE_PATTERN_8_##A##__, \
- SHUFFLE_PATTERN_8_##B##__, \
- SHUFFLE_PATTERN_8_##C##__, \
- SHUFFLE_PATTERN_8_##D##__, \
- SHUFFLE_PATTERN_8_##E##__, \
- SHUFFLE_PATTERN_8_##F##__, \
- SHUFFLE_PATTERN_8_##G##__, \
- SHUFFLE_PATTERN_8_##H##__
-
-#define SHUFFLE8(A, B, C, D, E, F, G, H) \
- ((const vector unsigned char){ \
- SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
- })
-
-#define SHUFB8(A, B, C, D, E, F, G, H) \
- ((const qword){ \
- SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \
- })
-
-
-#define SHUFFLE_PATTERN_16_A__ 0x00
-#define SHUFFLE_PATTERN_16_B__ 0x01
-#define SHUFFLE_PATTERN_16_C__ 0x02
-#define SHUFFLE_PATTERN_16_D__ 0x03
-#define SHUFFLE_PATTERN_16_E__ 0x04
-#define SHUFFLE_PATTERN_16_F__ 0x05
-#define SHUFFLE_PATTERN_16_G__ 0x06
-#define SHUFFLE_PATTERN_16_H__ 0x07
-#define SHUFFLE_PATTERN_16_I__ 0x08
-#define SHUFFLE_PATTERN_16_J__ 0x09
-#define SHUFFLE_PATTERN_16_K__ 0x0a
-#define SHUFFLE_PATTERN_16_L__ 0x0b
-#define SHUFFLE_PATTERN_16_M__ 0x0c
-#define SHUFFLE_PATTERN_16_N__ 0x0d
-#define SHUFFLE_PATTERN_16_O__ 0x0e
-#define SHUFFLE_PATTERN_16_P__ 0x0f
-#define SHUFFLE_PATTERN_16_a__ 0x10
-#define SHUFFLE_PATTERN_16_b__ 0x11
-#define SHUFFLE_PATTERN_16_c__ 0x12
-#define SHUFFLE_PATTERN_16_d__ 0x13
-#define SHUFFLE_PATTERN_16_e__ 0x14
-#define SHUFFLE_PATTERN_16_f__ 0x15
-#define SHUFFLE_PATTERN_16_g__ 0x16
-#define SHUFFLE_PATTERN_16_h__ 0x17
-#define SHUFFLE_PATTERN_16_i__ 0x18
-#define SHUFFLE_PATTERN_16_j__ 0x19
-#define SHUFFLE_PATTERN_16_k__ 0x1a
-#define SHUFFLE_PATTERN_16_l__ 0x1b
-#define SHUFFLE_PATTERN_16_m__ 0x1c
-#define SHUFFLE_PATTERN_16_n__ 0x1d
-#define SHUFFLE_PATTERN_16_o__ 0x1e
-#define SHUFFLE_PATTERN_16_p__ 0x1f
-#define SHUFFLE_PATTERN_16_X__ 0xc0
-#define SHUFFLE_PATTERN_16_x__ 0xc0
-#define SHUFFLE_PATTERN_16_0__ 0x80
-#define SHUFFLE_PATTERN_16_8__ 0xe0
-
-#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- SHUFFLE_PATTERN_16_##A##__, \
- SHUFFLE_PATTERN_16_##B##__, \
- SHUFFLE_PATTERN_16_##C##__, \
- SHUFFLE_PATTERN_16_##D##__, \
- SHUFFLE_PATTERN_16_##E##__, \
- SHUFFLE_PATTERN_16_##F##__, \
- SHUFFLE_PATTERN_16_##G##__, \
- SHUFFLE_PATTERN_16_##H##__, \
- SHUFFLE_PATTERN_16_##I##__, \
- SHUFFLE_PATTERN_16_##J##__, \
- SHUFFLE_PATTERN_16_##K##__, \
- SHUFFLE_PATTERN_16_##L##__, \
- SHUFFLE_PATTERN_16_##M##__, \
- SHUFFLE_PATTERN_16_##N##__, \
- SHUFFLE_PATTERN_16_##O##__, \
- SHUFFLE_PATTERN_16_##P##__
-
-#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- ((const vector unsigned char){ \
- SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- })
-
-#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- ((const qword){ \
- SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
- })
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <math.h>
-
-#include "pipe/p_compiler.h"
-#include "spu_main.h"
-#include "spu_texture.h"
-#include "spu_tile.h"
-#include "spu_colorpack.h"
-#include "spu_dcache.h"
-
-
-/**
- * Mark all tex cache entries as invalid.
- */
-void
-invalidate_tex_cache(void)
-{
- uint lvl;
- for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
- uint unit = 0;
- uint bytes = 4 * spu.texture[unit].level[lvl].width
- * spu.texture[unit].level[lvl].height;
-
- if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
- bytes *= 6;
- else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
- bytes *= spu.texture[unit].level[lvl].depth;
-
- spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
- }
-}
-
-
-/**
- * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
- *
- * NOTE: in the typical case of bilinear filtering, the four texels
- * are in a 2x2 group so we could get by with just two dcache fetches
- * (two side-by-side texels per fetch). But when bilinear filtering
- * wraps around a texture edge, we'll probably need code like we have
- * now.
- * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
- * it's quite likely that the four pixels in a quad will need some of the
- * same texels. So look into doing texture fetches for four pixels at
- * a time.
- */
-static void
-get_four_texels(const struct spu_texture_level *tlevel, uint face,
- vec_int4 x, vec_int4 y,
- vec_uint4 *texels)
-{
- unsigned texture_ea = (uintptr_t) tlevel->start;
- const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
- const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
- const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
- const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
-
- const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
- const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
-
- qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
- tile_offset = si_mpy((qword) tile_offset, tile_size);
-
- qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
- texel_offset = si_mpyui(texel_offset, 4);
-
- vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
-
- texture_ea = texture_ea + face * tlevel->bytes_per_image;
-
- spu_dcache_fetch_unaligned((qword *) & texels[0],
- texture_ea + spu_extract(offset, 0), 4);
- spu_dcache_fetch_unaligned((qword *) & texels[1],
- texture_ea + spu_extract(offset, 1), 4);
- spu_dcache_fetch_unaligned((qword *) & texels[2],
- texture_ea + spu_extract(offset, 2), 4);
- spu_dcache_fetch_unaligned((qword *) & texels[3],
- texture_ea + spu_extract(offset, 3), 4);
-}
-
-
-/** clamp vec to [0, max] */
-static INLINE vector signed int
-spu_clamp(vector signed int vec, vector signed int max)
-{
- static const vector signed int zero = {0,0,0,0};
- vector unsigned int c;
- c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
- vec = spu_sel(zero, vec, c);
- c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
- vec = spu_sel(vec, max, c);
- return vec;
-}
-
-
-
-/**
- * Do nearest texture sampling for four pixels.
- * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
- */
-void
-sample_texture_2d_nearest(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4])
-{
- const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
- vector float ss = spu_mul(s, tlevel->scale_s);
- vector float tt = spu_mul(t, tlevel->scale_t);
- vector signed int is = spu_convts(ss, 0);
- vector signed int it = spu_convts(tt, 0);
- vec_uint4 texels[4];
-
- /* PIPE_TEX_WRAP_REPEAT */
- is = spu_and(is, tlevel->mask_s);
- it = spu_and(it, tlevel->mask_t);
-
- /* PIPE_TEX_WRAP_CLAMP */
- is = spu_clamp(is, tlevel->max_s);
- it = spu_clamp(it, tlevel->max_t);
-
- get_four_texels(tlevel, face, is, it, texels);
-
- /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
- spu_unpack_A8R8G8B8_transpose4(texels, colors);
-}
-
-
-/**
- * Do bilinear texture sampling for four pixels.
- * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
- */
-void
-sample_texture_2d_bilinear(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4])
-{
- const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
- static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
-
- vector float ss = spu_madd(s, tlevel->scale_s, half);
- vector float tt = spu_madd(t, tlevel->scale_t, half);
-
- vector signed int is0 = spu_convts(ss, 0);
- vector signed int it0 = spu_convts(tt, 0);
-
- /* is + 1, it + 1 */
- vector signed int is1 = spu_add(is0, 1);
- vector signed int it1 = spu_add(it0, 1);
-
- /* PIPE_TEX_WRAP_REPEAT */
- is0 = spu_and(is0, tlevel->mask_s);
- it0 = spu_and(it0, tlevel->mask_t);
- is1 = spu_and(is1, tlevel->mask_s);
- it1 = spu_and(it1, tlevel->mask_t);
-
- /* PIPE_TEX_WRAP_CLAMP */
- is0 = spu_clamp(is0, tlevel->max_s);
- it0 = spu_clamp(it0, tlevel->max_t);
- is1 = spu_clamp(is1, tlevel->max_s);
- it1 = spu_clamp(it1, tlevel->max_t);
-
- /* get packed int texels */
- vector unsigned int texels[16];
- get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
- get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
- get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
- get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
-
- /* convert packed int texels to float colors */
- vector float ftexels[16];
- spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
- spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
- spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
- spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
-
- /* Compute weighting factors in [0,1]
- * Multiply texcoord by 1024, AND with 1023, convert back to float.
- */
- vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
- vector signed int iss1024 = spu_convts(ss1024, 0);
- iss1024 = spu_and(iss1024, 1023);
- vector float sWeights0 = spu_convtf(iss1024, 10);
-
- vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
- vector signed int itt1024 = spu_convts(tt1024, 0);
- itt1024 = spu_and(itt1024, 1023);
- vector float tWeights0 = spu_convtf(itt1024, 10);
-
- /* 1 - sWeight and 1 - tWeight */
- vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
- vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
-
- /* reds, for four pixels */
- ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
- ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
- ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
- ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
- colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
- spu_add(ftexels[8], ftexels[12]));
-
- /* greens, for four pixels */
- ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
- ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
- ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
- ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
- colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
- spu_add(ftexels[9], ftexels[13]));
-
- /* blues, for four pixels */
- ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
- ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
- ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
- ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
- colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
- spu_add(ftexels[10], ftexels[14]));
-
- /* alphas, for four pixels */
- ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
- ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
- ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
- ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
- colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
- spu_add(ftexels[11], ftexels[15]));
-}
-
-
-
-/**
- * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
- */
-static INLINE void
-transpose(vector unsigned int *mOut0,
- vector unsigned int *mOut1,
- vector unsigned int *mOut2,
- vector unsigned int *mOut3,
- vector unsigned int *mIn)
-{
- vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
- vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
- vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
-
- vector unsigned char shufflehi = ((vector unsigned char) {
- 0x00, 0x01, 0x02, 0x03,
- 0x10, 0x11, 0x12, 0x13,
- 0x04, 0x05, 0x06, 0x07,
- 0x14, 0x15, 0x16, 0x17});
- vector unsigned char shufflelo = ((vector unsigned char) {
- 0x08, 0x09, 0x0A, 0x0B,
- 0x18, 0x19, 0x1A, 0x1B,
- 0x0C, 0x0D, 0x0E, 0x0F,
- 0x1C, 0x1D, 0x1E, 0x1F});
- abcd = *(mIn+0);
- efgh = *(mIn+1);
- ijkl = *(mIn+2);
- mnop = *(mIn+3);
-
- aibj = spu_shuffle(abcd, ijkl, shufflehi);
- ckdl = spu_shuffle(abcd, ijkl, shufflelo);
- emfn = spu_shuffle(efgh, mnop, shufflehi);
- gohp = spu_shuffle(efgh, mnop, shufflelo);
-
- aeim = spu_shuffle(aibj, emfn, shufflehi);
- bfjn = spu_shuffle(aibj, emfn, shufflelo);
- cgko = spu_shuffle(ckdl, gohp, shufflehi);
- dhlp = spu_shuffle(ckdl, gohp, shufflelo);
-
- *mOut0 = aeim;
- *mOut1 = bfjn;
- *mOut2 = cgko;
- *mOut3 = dhlp;
-}
-
-
-/**
- * Bilinear filtering, using int instead of float arithmetic for computing
- * sample weights.
- */
-void
-sample_texture_2d_bilinear_int(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4])
-{
- const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
- static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
-
- /* Scale texcoords by size of texture, and add half pixel bias */
- vector float ss = spu_madd(s, tlevel->scale_s, half);
- vector float tt = spu_madd(t, tlevel->scale_t, half);
-
- /* convert float coords to fixed-pt coords with 7 fraction bits */
- vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
- vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
-
- /* compute integer texel weights in [0, 127] */
- vector signed int sWeights0 = spu_and(is, 127);
- vector signed int tWeights0 = spu_and(it, 127);
- vector signed int sWeights1 = spu_sub(127, sWeights0);
- vector signed int tWeights1 = spu_sub(127, tWeights0);
-
- /* texel coords: is0 = is / 128, it0 = is / 128 */
- vector signed int is0 = spu_rlmask(is, -7);
- vector signed int it0 = spu_rlmask(it, -7);
-
- /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
- vector signed int is1 = spu_add(is0, 1);
- vector signed int it1 = spu_add(it0, 1);
-
- /* PIPE_TEX_WRAP_REPEAT */
- is0 = spu_and(is0, tlevel->mask_s);
- it0 = spu_and(it0, tlevel->mask_t);
- is1 = spu_and(is1, tlevel->mask_s);
- it1 = spu_and(it1, tlevel->mask_t);
-
- /* PIPE_TEX_WRAP_CLAMP */
- is0 = spu_clamp(is0, tlevel->max_s);
- it0 = spu_clamp(it0, tlevel->max_t);
- is1 = spu_clamp(is1, tlevel->max_s);
- it1 = spu_clamp(it1, tlevel->max_t);
-
- /* get packed int texels */
- vector unsigned int texels[16];
- get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
- get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
- get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
- get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
-
- /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
- {
- static const unsigned char ZERO = 0x80;
- int i;
- for (i = 0; i < 16; i++) {
- texels[i] = spu_shuffle(texels[i], texels[i],
- ((vector unsigned char) {
- ZERO, ZERO, ZERO, 1,
- ZERO, ZERO, ZERO, 2,
- ZERO, ZERO, ZERO, 3,
- ZERO, ZERO, ZERO, 0}));
- }
- }
-
- /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
- vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
- texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
- transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
- transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
- transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
- transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
-
- /* computed weighted colors */
- vector unsigned int c0, c1, c2, c3, cSum;
-
- /* red */
- c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
- cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[0] = spu_convtf(cSum, 22);
-
- /* green */
- c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
- cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[1] = spu_convtf(cSum, 22);
-
- /* blue */
- c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
- cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[2] = spu_convtf(cSum, 22);
-
- /* alpha */
- c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
- c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
- c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
- c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
- cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
- colors[3] = spu_convtf(cSum, 22);
-}
-
-
-
-/**
- * Compute level of detail factor from texcoords.
- */
-static INLINE float
-compute_lambda_2d(uint unit, vector float s, vector float t)
-{
- uint baseLevel = 0;
- float width = spu.texture[unit].level[baseLevel].width;
- float height = spu.texture[unit].level[baseLevel].width;
- float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
- float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
- float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
- float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
-#if 0
- /* ideal value */
- float x = dsdx * dsdx + dtdx * dtdx;
- float y = dsdy * dsdy + dtdy * dtdy;
- float rho = x > y ? x : y;
- rho = sqrtf(rho);
-#else
- /* approximation */
- dsdx = fabsf(dsdx);
- dsdy = fabsf(dsdy);
- dtdx = fabsf(dtdx);
- dtdy = fabsf(dtdy);
- float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
-#endif
- float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
- return lambda;
-}
-
-
-/**
- * Blend two sets of colors according to weight.
- */
-static void
-blend_colors(vector float c0[4], const vector float c1[4], float weight)
-{
- vector float t = spu_splats(weight);
- vector float dc0 = spu_sub(c1[0], c0[0]);
- vector float dc1 = spu_sub(c1[1], c0[1]);
- vector float dc2 = spu_sub(c1[2], c0[2]);
- vector float dc3 = spu_sub(c1[3], c0[3]);
- c0[0] = spu_madd(dc0, t, c0[0]);
- c0[1] = spu_madd(dc1, t, c0[1]);
- c0[2] = spu_madd(dc2, t, c0[2]);
- c0[3] = spu_madd(dc3, t, c0[3]);
-}
-
-
-/**
- * Texture sampling with level of detail selection and possibly mipmap
- * interpolation.
- */
-void
-sample_texture_2d_lod(vector float s, vector float t,
- uint unit, uint level_ignored, uint face,
- vector float colors[4])
-{
- /*
- * Note that we're computing a lambda/lod here that's used for all
- * four pixels in the quad.
- */
- float lambda = compute_lambda_2d(unit, s, t);
-
- (void) face;
- (void) level_ignored;
-
- /* apply lod bias */
- lambda += spu.sampler[unit].lod_bias;
-
- /* clamp */
- if (lambda < spu.sampler[unit].min_lod)
- lambda = spu.sampler[unit].min_lod;
- else if (lambda > spu.sampler[unit].max_lod)
- lambda = spu.sampler[unit].max_lod;
-
- if (lambda <= 0.0f) {
- /* magnify */
- spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
- }
- else {
- /* minify */
- if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
- /* sample two mipmap levels and interpolate */
- int level = (int) lambda;
- if (level > (int) spu.texture[unit].max_level)
- level = spu.texture[unit].max_level;
- spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
- if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
- /* sample second mipmap level */
- float weight = lambda - (float) level;
- level++;
- if (level <= (int) spu.texture[unit].max_level) {
- vector float colors2[4];
- spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
- blend_colors(colors, colors2, weight);
- }
- }
- }
- else {
- /* sample one mipmap level */
- int level = (int) (lambda + 0.5f);
- if (level > (int) spu.texture[unit].max_level)
- level = spu.texture[unit].max_level;
- spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
- }
- }
-}
-
-
-/** XXX need a SIMD version of this */
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
-{
- /*
- major axis
- direction target sc tc ma
- ---------- ------------------------------- --- --- ---
- +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
- -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
- +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
- -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
- +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
- -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
- */
- const float arx = fabsf(rx);
- const float ary = fabsf(ry);
- const float arz = fabsf(rz);
- unsigned face;
- float sc, tc, ma;
-
- if (arx > ary && arx > arz) {
- if (rx >= 0.0F) {
- face = PIPE_TEX_FACE_POS_X;
- sc = -rz;
- tc = -ry;
- ma = arx;
- }
- else {
- face = PIPE_TEX_FACE_NEG_X;
- sc = rz;
- tc = -ry;
- ma = arx;
- }
- }
- else if (ary > arx && ary > arz) {
- if (ry >= 0.0F) {
- face = PIPE_TEX_FACE_POS_Y;
- sc = rx;
- tc = rz;
- ma = ary;
- }
- else {
- face = PIPE_TEX_FACE_NEG_Y;
- sc = rx;
- tc = -rz;
- ma = ary;
- }
- }
- else {
- if (rz > 0.0F) {
- face = PIPE_TEX_FACE_POS_Z;
- sc = rx;
- tc = -ry;
- ma = arz;
- }
- else {
- face = PIPE_TEX_FACE_NEG_Z;
- sc = -rx;
- tc = -ry;
- ma = arz;
- }
- }
-
- *newS = (sc / ma + 1.0F) * 0.5F;
- *newT = (tc / ma + 1.0F) * 0.5F;
-
- return face;
-}
-
-
-
-void
-sample_texture_cube(vector float s, vector float t, vector float r,
- uint unit, vector float colors[4])
-{
- uint p, faces[4], level = 0;
- float newS[4], newT[4];
-
- /* Compute cube faces referenced by the four sets of texcoords.
- * XXX we should SIMD-ize this.
- */
- for (p = 0; p < 4; p++) {
- float rx = spu_extract(s, p);
- float ry = spu_extract(t, p);
- float rz = spu_extract(r, p);
- faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
- }
-
- if (faces[0] == faces[1] &&
- faces[0] == faces[2] &&
- faces[0] == faces[3]) {
- /* GOOD! All four texcoords refer to the same cube face */
- s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
- t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
- spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
- }
- else {
- /* BAD! The four texcoords refer to different faces */
- for (p = 0; p < 4; p++) {
- vector float c[4];
-
- spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
- unit, level, faces[p], c);
-
- float red = spu_extract(c[0], p);
- float green = spu_extract(c[1], p);
- float blue = spu_extract(c[2], p);
- float alpha = spu_extract(c[3], p);
-
- colors[0] = spu_insert(red, colors[0], p);
- colors[1] = spu_insert(green, colors[1], p);
- colors[2] = spu_insert(blue, colors[2], p);
- colors[3] = spu_insert(alpha, colors[3], p);
- }
- }
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SPU_TEXTURE_H
-#define SPU_TEXTURE_H
-
-
-#include "pipe/p_compiler.h"
-
-
-extern void
-invalidate_tex_cache(void);
-
-
-extern void
-sample_texture_2d_nearest(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4]);
-
-
-extern void
-sample_texture_2d_bilinear(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4]);
-
-extern void
-sample_texture_2d_bilinear_int(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4]);
-
-
-extern void
-sample_texture_2d_lod(vector float s, vector float t,
- uint unit, uint level, uint face,
- vector float colors[4]);
-
-
-extern void
-sample_texture_cube(vector float s, vector float t, vector float r,
- uint unit, vector float colors[4]);
-
-
-#endif /* SPU_TEXTURE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2009-2010 VMware, Inc. All rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SPU_TGSI_EXEC_H
-#define SPU_TGSI_EXEC_H
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-
-#define NUM_CHANNELS 4 /* R,G,B,A */
-#define QUAD_SIZE 4 /* 4 pixel/quad */
-
-
-
-#define TGSI_EXEC_NUM_TEMPS 128
-#define TGSI_EXEC_NUM_IMMEDIATES 256
-
-/*
- * Locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TGSI_EXEC_TEMP_00000000_IDX (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_00000000_CHAN 0
-
-#define TGSI_EXEC_TEMP_7FFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_7FFFFFFF_CHAN 1
-
-#define TGSI_EXEC_TEMP_80000000_IDX (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_80000000_CHAN 2
-
-#define TGSI_EXEC_TEMP_FFFFFFFF_IDX (TGSI_EXEC_NUM_TEMPS + 0)
-#define TGSI_EXEC_TEMP_FFFFFFFF_CHAN 3
-
-#define TGSI_EXEC_TEMP_ONE_IDX (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_ONE_CHAN 0
-
-#define TGSI_EXEC_TEMP_TWO_IDX (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_TWO_CHAN 1
-
-#define TGSI_EXEC_TEMP_128_IDX (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_128_CHAN 2
-
-#define TGSI_EXEC_TEMP_MINUS_128_IDX (TGSI_EXEC_NUM_TEMPS + 1)
-#define TGSI_EXEC_TEMP_MINUS_128_CHAN 3
-
-#define TGSI_EXEC_TEMP_KILMASK_IDX (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_KILMASK_CHAN 0
-
-#define TGSI_EXEC_TEMP_OUTPUT_IDX (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_OUTPUT_CHAN 1
-
-#define TGSI_EXEC_TEMP_PRIMITIVE_IDX (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_PRIMITIVE_CHAN 2
-
-/* NVIDIA condition code (CC) vector
- */
-#define TGSI_EXEC_CC_GT 0x01
-#define TGSI_EXEC_CC_EQ 0x02
-#define TGSI_EXEC_CC_LT 0x04
-#define TGSI_EXEC_CC_UN 0x08
-
-#define TGSI_EXEC_CC_X_MASK 0x000000ff
-#define TGSI_EXEC_CC_X_SHIFT 0
-#define TGSI_EXEC_CC_Y_MASK 0x0000ff00
-#define TGSI_EXEC_CC_Y_SHIFT 8
-#define TGSI_EXEC_CC_Z_MASK 0x00ff0000
-#define TGSI_EXEC_CC_Z_SHIFT 16
-#define TGSI_EXEC_CC_W_MASK 0xff000000
-#define TGSI_EXEC_CC_W_SHIFT 24
-
-#define TGSI_EXEC_TEMP_CC_IDX (TGSI_EXEC_NUM_TEMPS + 2)
-#define TGSI_EXEC_TEMP_CC_CHAN 3
-
-#define TGSI_EXEC_TEMP_THREE_IDX (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_THREE_CHAN 0
-
-#define TGSI_EXEC_TEMP_HALF_IDX (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_TEMP_HALF_CHAN 1
-
-/* execution mask, each value is either 0 or ~0 */
-#define TGSI_EXEC_MASK_IDX (TGSI_EXEC_NUM_TEMPS + 3)
-#define TGSI_EXEC_MASK_CHAN 2
-
-/* 4 register buffer for various purposes */
-#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
-#define TGSI_EXEC_NUM_TEMP_R 4
-
-#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
-#define TGSI_EXEC_NUM_ADDRS 1
-
-/* predicate register */
-#define TGSI_EXEC_TEMP_P0 (TGSI_EXEC_NUM_TEMPS + 9)
-#define TGSI_EXEC_NUM_PREDS 1
-
-#define TGSI_EXEC_NUM_TEMP_EXTRAS 10
-
-
-
-#define TGSI_EXEC_MAX_NESTING 32
-#define TGSI_EXEC_MAX_COND_NESTING TGSI_EXEC_MAX_NESTING
-#define TGSI_EXEC_MAX_LOOP_NESTING TGSI_EXEC_MAX_NESTING
-#define TGSI_EXEC_MAX_SWITCH_NESTING TGSI_EXEC_MAX_NESTING
-#define TGSI_EXEC_MAX_CALL_NESTING TGSI_EXEC_MAX_NESTING
-
-/* The maximum number of input attributes per vertex. For 2D
- * input register files, this is the stride between two 1D
- * arrays.
- */
-#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17
-
-/* The maximum number of constant vectors per constant buffer.
- */
-#define TGSI_EXEC_MAX_CONST_BUFFER 4096
-
-/* The maximum number of vertices per primitive */
-#define TGSI_MAX_PRIM_VERTICES 6
-
-/* The maximum number of primitives to be generated */
-#define TGSI_MAX_PRIMITIVES 64
-
-/* The maximum total number of vertices */
-#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* TGSI_EXEC_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-
-#include "spu_tile.h"
-#include "spu_main.h"
-
-
-/**
- * Get tile of color or Z values from main memory, put into SPU memory.
- */
-void
-get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
-{
- const uint offset = ty * spu.fb.width_tiles + tx;
- const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
- const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start;
-
- src += offset * bytesPerTile;
-
- ASSERT(tx < spu.fb.width_tiles);
- ASSERT(ty < spu.fb.height_tiles);
- ASSERT_ALIGN16(tile);
- /*
- printf("get_tile: dest: %p src: 0x%x size: %d\n",
- tile, (unsigned int) src, bytesPerTile);
- */
- mfc_get(tile->ui, /* dest in local memory */
- (unsigned int) src, /* src in main memory */
- bytesPerTile,
- tag,
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-/**
- * Move tile of color or Z values from SPU memory to main memory.
- */
-void
-put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
-{
- const uint offset = ty * spu.fb.width_tiles + tx;
- const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4);
- ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start;
-
- dst += offset * bytesPerTile;
-
- ASSERT(tx < spu.fb.width_tiles);
- ASSERT(ty < spu.fb.height_tiles);
- ASSERT_ALIGN16(tile);
- /*
- printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n",
- spu.init.id,
- tile, (unsigned int) dst, bytesPerTile);
- */
- mfc_put((void *) tile->ui, /* src in local memory */
- (unsigned int) dst, /* dst in main memory */
- bytesPerTile,
- tag,
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-/**
- * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled
- * tiles back to the main framebuffer.
- */
-void
-really_clear_tiles(uint surfaceIndex)
-{
- const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- uint i;
-
- if (surfaceIndex == 0) {
- clear_c_tile(&spu.ctile);
-
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
- }
- }
- }
- else {
- clear_z_tile(&spu.ztile);
-
- for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
- uint tx = i % spu.fb.width_tiles;
- uint ty = i / spu.fb.width_tiles;
- if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
- put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
- }
- }
-
-#if 0
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
-#endif
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SPU_TILE_H
-#define SPU_TILE_H
-
-
-#include <libmisc.h>
-#include <spu_mfcio.h>
-#include "spu_main.h"
-#include "cell/common.h"
-
-
-
-extern void
-get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
-
-extern void
-put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
-
-extern void
-really_clear_tiles(uint surfaceIndex);
-
-
-static INLINE void
-clear_c_tile(tile_t *ctile)
-{
- memset32((uint*) ctile->ui,
- spu.fb.color_clear_value,
- TILE_SIZE * TILE_SIZE);
-}
-
-
-static INLINE void
-clear_z_tile(tile_t *ztile)
-{
- if (spu.fb.zsize == 2) {
- memset16((ushort*) ztile->us,
- spu.fb.depth_clear_value,
- TILE_SIZE * TILE_SIZE);
- }
- else {
- ASSERT(spu.fb.zsize != 0);
- memset32((uint*) ztile->ui,
- spu.fb.depth_clear_value,
- TILE_SIZE * TILE_SIZE);
- }
-}
-
-
-#endif /* SPU_TILE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Triangle rendering within a tile.
- */
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_format.h"
-#include "util/u_math.h"
-#include "spu_colorpack.h"
-#include "spu_main.h"
-#include "spu_shuffle.h"
-#include "spu_texture.h"
-#include "spu_tile.h"
-#include "spu_tri.h"
-
-
-/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
-typedef vector unsigned int mask_t;
-
-
-
-/**
- * Simplified types taken from other parts of Gallium
- */
-struct vertex_header {
- vector float data[1];
-};
-
-
-
-/* XXX fix this */
-#undef CEILF
-#define CEILF(X) ((float) (int) ((X) + 0.99999f))
-
-
-#define QUAD_TOP_LEFT 0
-#define QUAD_TOP_RIGHT 1
-#define QUAD_BOTTOM_LEFT 2
-#define QUAD_BOTTOM_RIGHT 3
-#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT)
-#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT)
-#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT)
-#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT)
-#define MASK_ALL 0xf
-
-
-#define CHAN0 0
-#define CHAN1 1
-#define CHAN2 2
-#define CHAN3 3
-
-
-#define DEBUG_VERTS 0
-
-/**
- * Triangle edge info
- */
-struct edge {
- union {
- struct {
- float dx; /**< X(v1) - X(v0), used only during setup */
- float dy; /**< Y(v1) - Y(v0), used only during setup */
- };
- vec_float4 ds; /**< vector accessor for dx and dy */
- };
- float dxdy; /**< dx/dy */
- float sx, sy; /**< first sample point coord */
- int lines; /**< number of lines on this edge */
-};
-
-
-struct interp_coef
-{
- vector float a0;
- vector float dadx;
- vector float dady;
-};
-
-
-/**
- * Triangle setup info (derived from draw_stage).
- * Also used for line drawing (taking some liberties).
- */
-struct setup_stage {
-
- /* Vertices are just an array of floats making up each attribute in
- * turn. Currently fixed at 4 floats, but should change in time.
- * Codegen will help cope with this.
- */
- union {
- struct {
- const struct vertex_header *vmin;
- const struct vertex_header *vmid;
- const struct vertex_header *vmax;
- const struct vertex_header *vprovoke;
- };
- qword vertex_headers;
- };
-
- struct edge ebot;
- struct edge etop;
- struct edge emaj;
-
- float oneOverArea; /* XXX maybe make into vector? */
-
- uint facing;
-
- uint tx, ty; /**< position of current tile (x, y) */
-
- union {
- struct {
- int cliprect_minx;
- int cliprect_miny;
- int cliprect_maxx;
- int cliprect_maxy;
- };
- qword cliprect;
- };
-
- struct interp_coef coef[PIPE_MAX_SHADER_INPUTS];
-
- struct {
- vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */
- int y;
- unsigned y_flags;
- unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */
- } span;
-};
-
-
-static struct setup_stage setup;
-
-
-static INLINE vector float
-splatx(vector float v)
-{
- return spu_splats(spu_extract(v, CHAN0));
-}
-
-static INLINE vector float
-splaty(vector float v)
-{
- return spu_splats(spu_extract(v, CHAN1));
-}
-
-static INLINE vector float
-splatz(vector float v)
-{
- return spu_splats(spu_extract(v, CHAN2));
-}
-
-static INLINE vector float
-splatw(vector float v)
-{
- return spu_splats(spu_extract(v, CHAN3));
-}
-
-
-/**
- * Setup fragment shader inputs by evaluating triangle's vertex
- * attribute coefficient info.
- * \param x quad x pos
- * \param y quad y pos
- * \param fragZ returns quad Z values
- * \param fragInputs returns fragment program inputs
- * Note: this code could be incorporated into the fragment program
- * itself to avoid the loop and switch.
- */
-static void
-eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[])
-{
- static const vector float deltaX = (const vector float) {0, 1, 0, 1};
- static const vector float deltaY = (const vector float) {0, 0, 1, 1};
-
- const uint posSlot = 0;
- const vector float pos = setup.coef[posSlot].a0;
- const vector float dposdx = setup.coef[posSlot].dadx;
- const vector float dposdy = setup.coef[posSlot].dady;
- const vector float fragX = spu_splats(x) + deltaX;
- const vector float fragY = spu_splats(y) + deltaY;
- vector float fragW, wInv;
- uint i;
-
- *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy);
- fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy);
- wInv = spu_re(fragW); /* 1 / w */
-
- /* loop over fragment program inputs */
- for (i = 0; i < spu.vertex_info.num_attribs; i++) {
- uint attr = i + 1;
- enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode;
-
- /* constant term */
- vector float a0 = setup.coef[attr].a0;
- vector float r0 = splatx(a0);
- vector float r1 = splaty(a0);
- vector float r2 = splatz(a0);
- vector float r3 = splatw(a0);
-
- if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) {
- /* linear term */
- vector float dadx = setup.coef[attr].dadx;
- vector float dady = setup.coef[attr].dady;
- /* Use SPU intrinsics here to get slightly better code.
- * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady);
- */
- r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0));
- r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1));
- r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2));
- r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3));
- if (interp == INTERP_PERSPECTIVE) {
- /* perspective term */
- r0 *= wInv;
- r1 *= wInv;
- r2 *= wInv;
- r3 *= wInv;
- }
- }
- fragInputs[CHAN0] = r0;
- fragInputs[CHAN1] = r1;
- fragInputs[CHAN2] = r2;
- fragInputs[CHAN3] = r3;
- fragInputs += 4;
- }
-}
-
-
-/**
- * Emit a quad (pass to next stage). No clipping is done.
- * Note: about 1/5 to 1/7 of the time, mask is zero and this function
- * should be skipped. But adding the test for that slows things down
- * overall.
- */
-static INLINE void
-emit_quad( int x, int y, mask_t mask)
-{
- /* If any bits in mask are set... */
- if (spu_extract(spu_orx(mask), 0)) {
- const int ix = x - setup.cliprect_minx;
- const int iy = y - setup.cliprect_miny;
-
- spu.cur_ctile_status = TILE_STATUS_DIRTY;
- spu.cur_ztile_status = TILE_STATUS_DIRTY;
-
- {
- /*
- * Run fragment shader, execute per-fragment ops, update fb/tile.
- */
- vector float inputs[4*4], outputs[2*4];
- vector unsigned int kill_mask;
- vector float fragZ;
-
- eval_inputs((float) x, (float) y, &fragZ, inputs);
-
- ASSERT(spu.fragment_program);
- ASSERT(spu.fragment_ops);
-
- /* Execute the current fragment program */
- kill_mask = spu.fragment_program(inputs, outputs, spu.constants);
-
- mask = spu_andc(mask, kill_mask);
-
- /* Execute per-fragment/quad operations, including:
- * alpha test, z test, stencil test, blend and framebuffer writing.
- * Note that there are two different fragment operations functions
- * that can be called, one for front-facing fragments, and one
- * for back-facing fragments. (Often the two are the same;
- * but in some cases, like two-sided stenciling, they can be
- * very different.) So choose the correct function depending
- * on the calculated facing.
- */
- spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile,
- fragZ,
- outputs[0*4+0],
- outputs[0*4+1],
- outputs[0*4+2],
- outputs[0*4+3],
- mask);
- }
- }
-}
-
-
-/**
- * Given an X or Y coordinate, return the block/quad coordinate that it
- * belongs to.
- */
-static INLINE int
-block(int x)
-{
- return x & ~1;
-}
-
-
-/**
- * Render a horizontal span of quads
- */
-static void
-flush_spans(void)
-{
- int minleft, maxright;
-
- const int l0 = spu_extract(setup.span.quad, 0);
- const int l1 = spu_extract(setup.span.quad, 1);
- const int r0 = spu_extract(setup.span.quad, 2);
- const int r1 = spu_extract(setup.span.quad, 3);
-
- switch (setup.span.y_flags) {
- case 0x3:
- /* both odd and even lines written (both quad rows) */
- minleft = MIN2(l0, l1);
- maxright = MAX2(r0, r1);
- break;
-
- case 0x1:
- /* only even line written (quad top row) */
- minleft = l0;
- maxright = r0;
- break;
-
- case 0x2:
- /* only odd line written (quad bottom row) */
- minleft = l1;
- maxright = r1;
- break;
-
- default:
- return;
- }
-
- /* OK, we're very likely to need the tile data now.
- * clear or finish waiting if needed.
- */
- if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
- /* wait for mfc_get() to complete */
- //printf("SPU: %u: waiting for ctile\n", spu.init.id);
- wait_on_mask(1 << TAG_READ_TILE_COLOR);
- spu.cur_ctile_status = TILE_STATUS_CLEAN;
- }
- else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
- //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
- clear_c_tile(&spu.ctile);
- spu.cur_ctile_status = TILE_STATUS_DIRTY;
- }
- ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
-
- if (spu.read_depth_stencil) {
- if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
- /* wait for mfc_get() to complete */
- //printf("SPU: %u: waiting for ztile\n", spu.init.id);
- wait_on_mask(1 << TAG_READ_TILE_Z);
- spu.cur_ztile_status = TILE_STATUS_CLEAN;
- }
- else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
- //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
- clear_z_tile(&spu.ztile);
- spu.cur_ztile_status = TILE_STATUS_DIRTY;
- }
- ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
- }
-
- /* XXX this loop could be moved into the above switch cases... */
-
- /* Setup for mask calculation */
- const vec_int4 quad_LlRr = setup.span.quad;
- const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8);
- const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B));
- const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B));
-
- const vec_int4 twos = spu_splats(2);
-
- const int x = block(minleft);
- vec_int4 xs = {x, x+1, x, x+1};
-
- for (; spu_extract(xs, 0) <= block(maxright); xs += twos) {
- /**
- * Computes mask to indicate which pixels in the 2x2 quad are actually
- * inside the triangle's bounds.
- */
-
- /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */
- const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs);
- const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs);
-
- /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */
- const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs);
-
- /* Combine results to create mask */
- const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs);
-
- emit_quad(spu_extract(xs, 0), setup.span.y, mask);
- }
-
- setup.span.y = 0;
- setup.span.y_flags = 0;
- /* Zero right elements */
- setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
-}
-
-
-#if DEBUG_VERTS
-static void
-print_vertex(const struct vertex_header *v)
-{
- uint i;
- fprintf(stderr, " Vertex: (%p)\n", v);
- for (i = 0; i < spu.vertex_info.num_attribs; i++) {
- fprintf(stderr, " %d: %f %f %f %f\n", i,
- spu_extract(v->data[i], 0),
- spu_extract(v->data[i], 1),
- spu_extract(v->data[i], 2),
- spu_extract(v->data[i], 3));
- }
-}
-#endif
-
-/* Returns the minimum of each slot of two vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n]);
- */
-static qword
-minfq(qword q0, qword q1)
-{
- const qword q0q1m = si_fcgt(q0, q1);
- return si_selb(q0, q1, q0q1m);
-}
-
-/* Returns the minimum of each slot of three vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n],q2[n]);
- */
-static qword
-min3fq(qword q0, qword q1, qword q2)
-{
- return minfq(minfq(q0, q1), q2);
-}
-
-/* Returns the maximum of each slot of two vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n],q2[n]);
- */
-static qword
-maxfq(qword q0, qword q1) {
- const qword q0q1m = si_fcgt(q0, q1);
- return si_selb(q1, q0, q0q1m);
-}
-
-/* Returns the maximum of each slot of three vec_float4s as qwords.
- * i.e. return[n] = min(q0[n],q1[n],q2[n]);
- */
-static qword
-max3fq(qword q0, qword q1, qword q2) {
- return maxfq(maxfq(q0, q1), q2);
-}
-
-/**
- * Sort vertices from top to bottom.
- * Compute area and determine front vs. back facing.
- * Do coarse clip test against tile bounds
- * \return FALSE if tri is totally outside tile, TRUE otherwise
- */
-static boolean
-setup_sort_vertices(const qword vs)
-{
- float area, sign;
-
-#if DEBUG_VERTS
- if (spu.init.id==0) {
- fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id);
- print_vertex(v0);
- print_vertex(v1);
- print_vertex(v2);
- }
-#endif
-
- {
- /* Load the float values for various processing... */
- const qword f0 = (qword)(((const struct vertex_header*)si_to_ptr(vs))->data[0]);
- const qword f1 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 4)))->data[0]);
- const qword f2 = (qword)(((const struct vertex_header*)si_to_ptr(si_rotqbyi(vs, 8)))->data[0]);
-
- /* Check if triangle is completely outside the tile bounds
- * Find the min and max x and y positions of the three poits */
- const qword minf = min3fq(f0, f1, f2);
- const qword maxf = max3fq(f0, f1, f2);
-
- /* Compare min and max against cliprect vals */
- const qword maxsmins = si_shufb(maxf, minf, SHUFB4(A,B,a,b));
- const qword outside = si_fcgt(maxsmins, si_csflt(setup.cliprect, 0));
-
- /* Use a little magic to work out of the tri is visible or not */
- if(si_to_uint(si_xori(si_gb(outside), 0xc))) return FALSE;
-
- /* determine bottom to top order of vertices */
- /* A table of shuffle patterns for putting vertex_header pointers into
- correct order. Quite magical. */
- const qword sort_order_patterns[] = {
- SHUFB4(A,B,C,C),
- SHUFB4(C,A,B,C),
- SHUFB4(A,C,B,C),
- SHUFB4(B,C,A,C),
- SHUFB4(B,A,C,C),
- SHUFB4(C,B,A,C) };
-
- /* Collate y values into two vectors for comparison.
- Using only one shuffle constant! ;) */
- const qword y_02_ = si_shufb(f0, f2, SHUFB4(0,B,b,C));
- const qword y_10_ = si_shufb(f1, f0, SHUFB4(0,B,b,C));
- const qword y_012 = si_shufb(y_02_, f1, SHUFB4(0,B,b,C));
- const qword y_120 = si_shufb(y_10_, f2, SHUFB4(0,B,b,C));
-
- /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
- const qword compare = si_fcgt(y_012, y_120);
- /* Compress the result of the comparison into 4 bits */
- const qword gather = si_gb(compare);
- /* Subtract one to attain the index into the LUT. Magical. */
- const unsigned int index = si_to_uint(gather) - 1;
-
- /* Load the appropriate pattern and construct the desired vector. */
- setup.vertex_headers = si_shufb(vs, vs, sort_order_patterns[index]);
-
- /* Using the result of the comparison, set sign.
- Very magical. */
- sign = ((si_to_uint(si_cntb(gather)) == 2) ? 1.0f : -1.0f);
- }
-
- setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]);
- setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]);
- setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]);
-
- /*
- * Compute triangle's area. Use 1/area to compute partial
- * derivatives of attributes later.
- */
- area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy;
-
- setup.oneOverArea = 1.0f / area;
-
- /* The product of area * sign indicates front/back orientation (0/1).
- * Just in case someone gets the bright idea of switching the front
- * and back constants without noticing that we're assuming their
- * values in this operation, also assert that the values are
- * what we think they are.
- */
- ASSERT(CELL_FACING_FRONT == 0);
- ASSERT(CELL_FACING_BACK == 1);
- setup.facing = (area * sign > 0.0f)
- ^ (!spu.rasterizer.front_ccw);
-
- return TRUE;
-}
-
-
-/**
- * Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- * The value value comes from vertex->data[slot].
- * The result will be put into setup.coef[slot].a0.
- * \param slot which attribute slot
- */
-static INLINE void
-const_coeff4(uint slot)
-{
- setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0};
- setup.coef[slot].a0 = setup.vprovoke->data[slot];
-}
-
-
-/**
- * As above, but interp setup all four vector components.
- */
-static INLINE void
-tri_linear_coeff4(uint slot)
-{
- const vector float vmin_d = setup.vmin->data[slot];
- const vector float vmid_d = setup.vmid->data[slot];
- const vector float vmax_d = setup.vmax->data[slot];
- const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
- const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
-
- vector float botda = vmid_d - vmin_d;
- vector float majda = vmax_d - vmin_d;
-
- vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
- spu_mul(botda, spu_splats(setup.emaj.dy)));
- vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
- spu_mul(majda, spu_splats(setup.ebot.dx)));
-
- setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
- setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
-
- vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
- vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
-
- setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
-}
-
-
-/**
- * Compute a0, dadx and dady for a perspective-corrected interpolant,
- * for a triangle.
- * We basically multiply the vertex value by 1/w before computing
- * the plane coefficients (a0, dadx, dady).
- * Later, when we compute the value at a particular fragment position we'll
- * divide the interpolated value by the interpolated W at that fragment.
- */
-static void
-tri_persp_coeff4(uint slot)
-{
- const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
- const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
-
- const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3));
- const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3));
- const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3));
-
- vector float vmin_d = setup.vmin->data[slot];
- vector float vmid_d = setup.vmid->data[slot];
- vector float vmax_d = setup.vmax->data[slot];
-
- vmin_d = spu_mul(vmin_d, vmin_w);
- vmid_d = spu_mul(vmid_d, vmid_w);
- vmax_d = spu_mul(vmax_d, vmax_w);
-
- vector float botda = vmid_d - vmin_d;
- vector float majda = vmax_d - vmin_d;
-
- vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
- spu_mul(botda, spu_splats(setup.emaj.dy)));
- vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
- spu_mul(majda, spu_splats(setup.ebot.dx)));
-
- setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea));
- setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea));
-
- vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx);
- vector float tempy = spu_mul(setup.coef[slot].dady, yyyy);
-
- setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy));
-}
-
-
-
-/**
- * Compute the setup.coef[] array dadx, dady, a0 values.
- * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
- */
-static void
-setup_tri_coefficients(void)
-{
- uint i;
-
- for (i = 0; i < spu.vertex_info.num_attribs; i++) {
- switch (spu.vertex_info.attrib[i].interp_mode) {
- case INTERP_NONE:
- break;
- case INTERP_CONSTANT:
- const_coeff4(i);
- break;
- case INTERP_POS:
- /* fall-through */
- case INTERP_LINEAR:
- tri_linear_coeff4(i);
- break;
- case INTERP_PERSPECTIVE:
- tri_persp_coeff4(i);
- break;
- default:
- ASSERT(0);
- }
- }
-}
-
-
-static void
-setup_tri_edges(void)
-{
- float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
- float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
-
- float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
- float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
- float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
-
- setup.emaj.sy = CEILF(vmin_y);
- setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
- setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
- setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
-
- setup.etop.sy = CEILF(vmid_y);
- setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
- setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
- setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
-
- setup.ebot.sy = CEILF(vmin_y);
- setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
- setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
- setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
-}
-
-
-/**
- * Render the upper or lower half of a triangle.
- * Scissoring/cliprect is applied here too.
- */
-static void
-subtriangle(struct edge *eleft, struct edge *eright, unsigned lines)
-{
- const int minx = setup.cliprect_minx;
- const int maxx = setup.cliprect_maxx;
- const int miny = setup.cliprect_miny;
- const int maxy = setup.cliprect_maxy;
- int y, start_y, finish_y;
- int sy = (int)eleft->sy;
-
- ASSERT((int)eleft->sy == (int) eright->sy);
-
- /* clip top/bottom */
- start_y = sy;
- finish_y = sy + lines;
-
- if (start_y < miny)
- start_y = miny;
-
- if (finish_y > maxy)
- finish_y = maxy;
-
- start_y -= sy;
- finish_y -= sy;
-
- /*
- printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
- */
-
- for (y = start_y; y < finish_y; y++) {
-
- /* avoid accumulating adds as floats don't have the precision to
- * accurately iterate large triangle edges that way. luckily we
- * can just multiply these days.
- *
- * this is all drowned out by the attribute interpolation anyway.
- */
- int left = (int)(eleft->sx + y * eleft->dxdy);
- int right = (int)(eright->sx + y * eright->dxdy);
-
- /* clip left/right */
- if (left < minx)
- left = minx;
- if (right > maxx)
- right = maxx;
-
- if (left < right) {
- int _y = sy + y;
- if (block(_y) != setup.span.y) {
- flush_spans();
- setup.span.y = block(_y);
- }
-
- int offset = _y&1;
- vec_int4 quad_LlRr = {left, left, right, right};
- /* Store left and right in 0 or 1 row of quad based on offset */
- setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset));
- setup.span.y_flags |= 1<<offset;
- }
- }
-
-
- /* save the values so that emaj can be restarted:
- */
- eleft->sx += lines * eleft->dxdy;
- eright->sx += lines * eright->dxdy;
- eleft->sy += lines;
- eright->sy += lines;
-}
-
-
-/**
- * Draw triangle into tile at (tx, ty) (tile coords)
- * The tile data should have already been fetched.
- */
-boolean
-tri_draw(const qword vs,
- uint tx, uint ty)
-{
- setup.tx = tx;
- setup.ty = ty;
-
- /* set clipping bounds to tile bounds */
- const qword clipbase = (qword)((vec_uint4){tx, ty});
- const qword clipmin = si_mpyui(clipbase, TILE_SIZE);
- const qword clipmax = si_ai(clipmin, TILE_SIZE);
- setup.cliprect = si_shufb(clipmin, clipmax, SHUFB4(A,B,a,b));
-
- if(!setup_sort_vertices(vs)) {
- return FALSE; /* totally clipped */
- }
-
- setup_tri_coefficients();
- setup_tri_edges();
-
- setup.span.y = 0;
- setup.span.y_flags = 0;
- /* Zero right elements */
- setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0));
-
- if (setup.oneOverArea < 0.0) {
- /* emaj on left */
- subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
- subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
- }
- else {
- /* emaj on right */
- subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
- subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
- }
-
- flush_spans();
-
- return TRUE;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#ifndef SPU_TRI_H
-#define SPU_TRI_H
-
-
-extern boolean
-tri_draw(const qword vs, uint tx, uint ty);
-
-
-#endif /* SPU_TRI_H */
+++ /dev/null
-
-#include "cell/common.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_debug.h"
-#include "tgsi/tgsi_parse.h"
-//#include "tgsi_build.h"
-#include "tgsi/tgsi_util.h"
-
-unsigned
-tgsi_util_get_src_register_swizzle(
- const struct tgsi_src_register *reg,
- unsigned component )
-{
- switch( component ) {
- case 0:
- return reg->SwizzleX;
- case 1:
- return reg->SwizzleY;
- case 2:
- return reg->SwizzleZ;
- case 3:
- return reg->SwizzleW;
- default:
- ASSERT( 0 );
- }
- return 0;
-}
-
-
-unsigned
-tgsi_util_get_full_src_register_swizzle(
- const struct tgsi_full_src_register *reg,
- unsigned component )
-{
- return tgsi_util_get_src_register_swizzle(
- reg->Register,
- component );
-}
-
-
-unsigned
-tgsi_util_get_full_src_register_sign_mode(
- const struct tgsi_full_src_register *reg,
- unsigned component )
-{
- unsigned sign_mode;
-
- if( reg->RegisterExtMod.Absolute ) {
- /* Consider only the post-abs negation. */
-
- if( reg->RegisterExtMod.Negate ) {
- sign_mode = TGSI_UTIL_SIGN_SET;
- }
- else {
- sign_mode = TGSI_UTIL_SIGN_CLEAR;
- }
- }
- else {
- /* Accumulate the three negations. */
-
- unsigned negate;
-
- negate = reg->Register.Negate;
- if( reg->RegisterExtMod.Negate ) {
- negate = !negate;
- }
-
- if( negate ) {
- sign_mode = TGSI_UTIL_SIGN_TOGGLE;
- }
- else {
- sign_mode = TGSI_UTIL_SIGN_KEEP;
- }
- }
-
- return sign_mode;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * (C) Copyright IBM Corporation 2008
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Ian Romanick <idr@us.ibm.com>
- */
-
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "spu_exec.h"
-#include "spu_vertex_shader.h"
-#include "spu_main.h"
-#include "spu_dcache.h"
-
-typedef void (*spu_fetch_func)(qword *out, const qword *in,
- const qword *shuffle_data);
-
-
-PIPE_ALIGN_VAR(16) static const qword
-fetch_shuffle_data[5] = {
- /* Shuffle used by CVT_64_FLOAT
- */
- {
- 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
- 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
- },
-
- /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED
- */
- {
- 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80,
- 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80,
- },
-
- /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED
- */
- {
- 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80,
- 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80,
- },
-
- /* High value shuffle used by trans4x4.
- */
- {
- 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
- 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17
- },
-
- /* Low value shuffle used by trans4x4.
- */
- {
- 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B,
- 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F
- }
-};
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static void generic_vertex_fetch(struct spu_vs_context *draw,
- struct spu_exec_machine *machine,
- const unsigned *elts,
- unsigned count)
-{
- unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
- unsigned attr;
-
- ASSERT(count <= 4);
-
-#if DRAW_DBG
- printf("SPU: %s count = %u, nr_attrs = %u\n",
- __FUNCTION__, count, nr_attrs);
-#endif
-
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (attr = 0; attr < nr_attrs; attr++) {
- const unsigned pitch = draw->vertex_fetch.pitch[attr];
- const uint64_t src = draw->vertex_fetch.src_ptr[attr];
- const spu_fetch_func fetch = (spu_fetch_func)
- (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]);
- unsigned i;
- unsigned idx;
- const unsigned bytes_per_entry = draw->vertex_fetch.size[attr];
- const unsigned quads_per_entry = (bytes_per_entry + 15) / 16;
- PIPE_ALIGN_VAR(16) qword in[2 * 4];
-
-
- /* Fetch four attributes for four vertices.
- */
- idx = 0;
- for (i = 0; i < count; i++) {
- const uint64_t addr = src + (elts[i] * pitch);
-
-#if DRAW_DBG
- printf("SPU: fetching = 0x%llx\n", addr);
-#endif
-
- spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry);
- idx += quads_per_entry;
- }
-
- /* Be nice and zero out any missing vertices.
- */
- (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword));
-
-
- /* Convert all 4 vertices to vectors of float.
- */
- (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data);
- }
-}
-
-
-void spu_update_vertex_fetch( struct spu_vs_context *draw )
-{
- draw->vertex_fetch.fetch_func = generic_vertex_fetch;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- * Ian Romanick <idr@us.ibm.com>
- */
-
-#include <spu_mfcio.h>
-
-#include "pipe/p_state.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_math.h"
-#include "draw/draw_private.h"
-#include "draw/draw_context.h"
-#include "cell/common.h"
-#include "spu_vertex_shader.h"
-#include "spu_exec.h"
-#include "spu_main.h"
-
-
-#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
-
-
-#define CLIP_RIGHT_BIT 0x01
-#define CLIP_LEFT_BIT 0x02
-#define CLIP_TOP_BIT 0x04
-#define CLIP_BOTTOM_BIT 0x08
-#define CLIP_FAR_BIT 0x10
-#define CLIP_NEAR_BIT 0x20
-
-
-static INLINE float
-dot4(const float *a, const float *b)
-{
- return (a[0]*b[0] +
- a[1]*b[1] +
- a[2]*b[2] +
- a[3]*b[3]);
-}
-
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
-
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
- */
-static void
-run_vertex_program(struct spu_vs_context *draw,
- unsigned elts[4], unsigned count,
- const uint64_t *vOut)
-{
- struct spu_exec_machine *machine = &draw->machine;
- unsigned int j;
-
- PIPE_ALIGN_VAR(16) struct spu_exec_vector inputs[PIPE_MAX_ATTRIBS];
- PIPE_ALIGN_VAR(16) struct spu_exec_vector outputs[PIPE_MAX_ATTRIBS];
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- ASSERT(count <= 4);
-
- machine->Processor = TGSI_PROCESSOR_VERTEX;
-
- ASSERT_ALIGN16(draw->constants);
- machine->Consts = (float (*)[4]) draw->constants;
-
- machine->Inputs = inputs;
- machine->Outputs = outputs;
-
- spu_vertex_fetch( draw, machine, elts, count );
-
- /* run shader */
- spu_exec_machine_run( machine );
-
-
- /* store machine results */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
- PIPE_ALIGN_VAR(16)
- unsigned char buffer[sizeof(struct vertex_header)
- + MAX_VERTEX_SIZE];
- struct vertex_header *const tmpOut =
- (struct vertex_header *) buffer;
- const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
- + (sizeof(float) * 4
- * draw->num_vs_outputs));
-
- mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
- wait_on_mask(1 << TAG_VERTEX_BUFFER);
-
-
- /* Handle attr[0] (position) specially:
- *
- * XXX: Computing the clipmask should be done in the vertex
- * program as a set of DP4 instructions appended to the
- * user-provided code.
- */
- x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
- draw->nr_planes);
- tmpOut->edgeflag = 1;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
-
- /* Viewport mapping */
- tmpOut->data[0][0] = x * scale[0] + trans[0];
- tmpOut->data[0][1] = y * scale[1] + trans[1];
- tmpOut->data[0][2] = z * scale[2] + trans[2];
- tmpOut->data[0][3] = w;
-
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
- }
-
- mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
- } /* loop over vertices */
-}
-
-
-PIPE_ALIGN_VAR(16) unsigned char
-immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32];
-
-
-void
-spu_bind_vertex_shader(struct spu_vs_context *draw,
- struct cell_shader_info *vs)
-{
- const unsigned immediate_addr = vs->immediates;
- const unsigned immediate_size =
- ROUNDUP16((sizeof(float) * 4 * vs->num_immediates)
- + (immediate_addr & 0x0f));
-
-
- mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
- TAG_VERTEX_BUFFER, 0, 0);
-
- draw->machine.Instructions = (struct tgsi_full_instruction *)
- vs->instructions;
- draw->machine.NumInstructions = vs->num_instructions;
-
- draw->machine.Declarations = (struct tgsi_full_declaration *)
- vs->declarations;
- draw->machine.NumDeclarations = vs->num_declarations;
-
- draw->num_vs_outputs = vs->num_outputs;
-
- /* specify the shader to interpret/execute */
- spu_exec_machine_init(&draw->machine,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/,
- PIPE_SHADER_VERTEX);
-
- wait_on_mask(1 << TAG_VERTEX_BUFFER);
-
- (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
- sizeof(float) * 4 * vs->num_immediates);
-}
-
-
-void
-spu_execute_vertex_shader(struct spu_vs_context *draw,
- const struct cell_command_vs *vs)
-{
- unsigned i;
-
- (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes);
- draw->nr_planes = vs->nr_planes;
- draw->vertex_fetch.nr_attrs = vs->nr_attrs;
-
- for (i = 0; i < vs->num_elts; i += 4) {
- const unsigned batch_size = MIN2(vs->num_elts - i, 4);
-
- run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
- }
-}
+++ /dev/null
-#ifndef SPU_VERTEX_SHADER_H
-#define SPU_VERTEX_SHADER_H
-
-#include "cell/common.h"
-#include "pipe/p_format.h"
-#include "spu_exec.h"
-
-struct spu_vs_context;
-
-typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
- struct spu_exec_machine *machine,
- const unsigned *elts,
- unsigned count );
-
-struct spu_vs_context {
- struct pipe_viewport_state viewport;
-
- struct {
- uint64_t src_ptr[PIPE_MAX_ATTRIBS];
- unsigned pitch[PIPE_MAX_ATTRIBS];
- unsigned size[PIPE_MAX_ATTRIBS];
- unsigned code_offset[PIPE_MAX_ATTRIBS];
- unsigned nr_attrs;
- boolean dirty;
-
- spu_full_fetch_func fetch_func;
- void *code;
- } vertex_fetch;
-
- /* Clip derived state:
- */
- float plane[12][4];
- unsigned nr_planes;
-
- struct spu_exec_machine machine;
- const float (*constants)[4];
-
- unsigned num_vs_outputs;
-};
-
-extern void spu_update_vertex_fetch(struct spu_vs_context *draw);
-
-static INLINE void spu_vertex_fetch(struct spu_vs_context *draw,
- struct spu_exec_machine *machine,
- const unsigned *elts,
- unsigned count)
-{
- if (draw->vertex_fetch.dirty) {
- spu_update_vertex_fetch(draw);
- draw->vertex_fetch.dirty = 0;
- }
-
- (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count);
-}
-
-struct cell_command_vs;
-
-extern void
-spu_bind_vertex_shader(struct spu_vs_context *draw,
- struct cell_shader_info *vs);
-
-extern void
-spu_execute_vertex_shader(struct spu_vs_context *draw,
- const struct cell_command_vs *vs);
-
-#endif /* SPU_VERTEX_SHADER_H */
-DGALLIUM_RBUG \
-DGALLIUM_TRACE \
-DGALLIUM_GALAHAD
-#-DGALLIUM_CELL will be defined by the config */
XLIB_TARGET_SOURCES = \
xlib.c
XLIB_TARGET_OBJECTS = $(XLIB_TARGET_SOURCES:.c=.o)
-# Note: CELL_SPU_LIB is only defined for cell configs
LIBS = \
$(GALLIUM_DRIVERS) \
$(TOP)/src/mapi/glapi/libglapi.a \
$(TOP)/src/mesa/libmesagallium.a \
$(GALLIUM_AUXILIARIES) \
- $(CELL_SPU_LIB) \
# LLVM
if env['llvm']:
env.Append(CPPDEFINES = ['GALLIUM_LLVMPIPE'])
env.Prepend(LIBS = [llvmpipe])
-
-if False:
- # TODO: Detect Cell SDK
- env.Append(CPPDEFINES = 'GALLIUM_CELL')
- env.Prepend(LIBS = [cell])
# libGL.so.1.5
libgl_1_5 = env.SharedLibrary(
/* Helper function to build a subset of a driver stack consisting of
- * one of the software rasterizers (cell, llvmpipe, softpipe) and the
+ * one of the software rasterizers (llvmpipe, softpipe) and the
* xlib winsys.
*/
static struct pipe_screen *