+++ /dev/null
-TOP = ../../..
-include $(TOP)/configs/current
-
-
-ifeq ($(CONFIG_NAME), linux-llvm)
-LLVM_DIR = llvm
-endif
-
-SUBDIRS = pipebuffer $(LLVM_DIR)
-
-
-default: subdirs
-
-
-subdirs:
- @for dir in $(SUBDIRS) ; do \
- if [ -d $$dir ] ; then \
- (cd $$dir && $(MAKE)) || exit 1 ; \
- fi \
- done
-
-
-clean:
- rm -f `find . -name \*.[oa]`
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Zack Rusin <zack@tungstengraphics.com>
- */
-
-#include "cso_cache.h"
-#include "cso_hash.h"
-
-#if 1
-static unsigned hash_key(const void *key, unsigned key_size)
-{
- unsigned *ikey = (unsigned *)key;
- unsigned hash = 0, i;
-
- assert(key_size % 4 == 0);
-
- /* I'm sure this can be improved on:
- */
- for (i = 0; i < key_size/4; i++)
- hash ^= ikey[i];
-
- return hash;
-}
-#else
-static unsigned hash_key(const unsigned char *p, int n)
-{
- unsigned h = 0;
- unsigned g;
-
- while (n--) {
- h = (h << 4) + *p++;
- if ((g = (h & 0xf0000000)) != 0)
- h ^= g >> 23;
- h &= ~g;
- }
- return h;
-}
-#endif
-
-unsigned cso_construct_key(void *item, int item_size)
-{
- return hash_key((item), item_size);
-}
-
-static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
-{
- struct cso_hash *hash = 0;
-
- switch(type) {
- case CSO_BLEND:
- hash = sc->blend_hash;
- break;
- case CSO_SAMPLER:
- hash = sc->sampler_hash;
- break;
- case CSO_DEPTH_STENCIL_ALPHA:
- hash = sc->depth_stencil_hash;
- break;
- case CSO_RASTERIZER:
- hash = sc->rasterizer_hash;
- break;
- case CSO_FRAGMENT_SHADER:
- hash = sc->fs_hash;
- break;
- case CSO_VERTEX_SHADER:
- hash = sc->vs_hash;
- break;
- }
-
- return hash;
-}
-
-static int _cso_size_for_type(enum cso_cache_type type)
-{
- switch(type) {
- case CSO_BLEND:
- return sizeof(struct pipe_blend_state);
- case CSO_SAMPLER:
- return sizeof(struct pipe_sampler_state);
- case CSO_DEPTH_STENCIL_ALPHA:
- return sizeof(struct pipe_depth_stencil_alpha_state);
- case CSO_RASTERIZER:
- return sizeof(struct pipe_rasterizer_state);
- case CSO_FRAGMENT_SHADER:
- return sizeof(struct pipe_shader_state);
- case CSO_VERTEX_SHADER:
- return sizeof(struct pipe_shader_state);
- }
- return 0;
-}
-
-struct cso_hash_iter
-cso_insert_state(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type,
- void *state)
-{
- struct cso_hash *hash = _cso_hash_for_type(sc, type);
- return cso_hash_insert(hash, hash_key, state);
-}
-
-struct cso_hash_iter
-cso_find_state(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type)
-{
- struct cso_hash *hash = _cso_hash_for_type(sc, type);
-
- return cso_hash_find(hash, hash_key);
-}
-
-struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type,
- void *templ)
-{
- struct cso_hash_iter iter = cso_find_state(sc, hash_key, type);
- int size = _cso_size_for_type(type);
- while (!cso_hash_iter_is_null(iter)) {
- void *iter_data = cso_hash_iter_data(iter);
- if (!memcmp(iter_data, templ, size))
- return iter;
- iter = cso_hash_iter_next(iter);
- }
- return iter;
-}
-
-void * cso_take_state(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type)
-{
- struct cso_hash *hash = _cso_hash_for_type(sc, type);
- return cso_hash_take(hash, hash_key);
-}
-
-struct cso_cache *cso_cache_create(void)
-{
- struct cso_cache *sc = malloc(sizeof(struct cso_cache));
-
- sc->blend_hash = cso_hash_create();
- sc->sampler_hash = cso_hash_create();
- sc->depth_stencil_hash = cso_hash_create();
- sc->rasterizer_hash = cso_hash_create();
- sc->fs_hash = cso_hash_create();
- sc->vs_hash = cso_hash_create();
-
- return sc;
-}
-
-void cso_cache_delete(struct cso_cache *sc)
-{
- assert(sc);
- cso_hash_delete(sc->blend_hash);
- cso_hash_delete(sc->sampler_hash);
- cso_hash_delete(sc->depth_stencil_hash);
- cso_hash_delete(sc->rasterizer_hash);
- cso_hash_delete(sc->fs_hash);
- cso_hash_delete(sc->vs_hash);
- free(sc);
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin <zack@tungstengraphics.com>
- */
-
-#ifndef CSO_CACHE_H
-#define CSO_CACHE_H
-
-#include "pipe/p_context.h"
-#include "pipe/p_state.h"
-
-
-struct cso_hash;
-
-struct cso_cache {
- struct cso_hash *blend_hash;
- struct cso_hash *depth_stencil_hash;
- struct cso_hash *fs_hash;
- struct cso_hash *vs_hash;
- struct cso_hash *rasterizer_hash;
- struct cso_hash *sampler_hash;
-};
-
-struct cso_blend {
- struct pipe_blend_state state;
- void *data;
-};
-
-struct cso_depth_stencil_alpha {
- struct pipe_depth_stencil_alpha_state state;
- void *data;
-};
-
-struct cso_rasterizer {
- struct pipe_rasterizer_state state;
- void *data;
-};
-
-struct cso_fragment_shader {
- struct pipe_shader_state state;
- void *data;
-};
-
-struct cso_vertex_shader {
- struct pipe_shader_state state;
- void *data;
-};
-
-struct cso_sampler {
- struct pipe_sampler_state state;
- void *data;
-};
-
-
-enum cso_cache_type {
- CSO_BLEND,
- CSO_SAMPLER,
- CSO_DEPTH_STENCIL_ALPHA,
- CSO_RASTERIZER,
- CSO_FRAGMENT_SHADER,
- CSO_VERTEX_SHADER
-};
-
-unsigned cso_construct_key(void *item, int item_size);
-
-struct cso_cache *cso_cache_create(void);
-void cso_cache_delete(struct cso_cache *sc);
-
-struct cso_hash_iter cso_insert_state(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type,
- void *state);
-struct cso_hash_iter cso_find_state(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type);
-struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
- unsigned hash_key, enum cso_cache_type type,
- void *templ);
-void * cso_take_state(struct cso_cache *sc, unsigned hash_key,
- enum cso_cache_type type);
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin <zack@tungstengraphics.com>
- */
-
-#include "cso_hash.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#define MAX(a, b) ((a > b) ? (a) : (b))
-
-static const int MinNumBits = 4;
-
-static const unsigned char prime_deltas[] = {
- 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3,
- 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0
-};
-
-static int primeForNumBits(int numBits)
-{
- return (1 << numBits) + prime_deltas[numBits];
-}
-
-/*
- Returns the smallest integer n such that
- primeForNumBits(n) >= hint.
-*/
-static int countBits(int hint)
-{
- int numBits = 0;
- int bits = hint;
-
- while (bits > 1) {
- bits >>= 1;
- numBits++;
- }
-
- if (numBits >= (int)sizeof(prime_deltas)) {
- numBits = sizeof(prime_deltas) - 1;
- } else if (primeForNumBits(numBits) < hint) {
- ++numBits;
- }
- return numBits;
-}
-
-struct cso_node {
- struct cso_node *next;
- unsigned key;
- void *value;
-};
-
-struct cso_hash_data {
- struct cso_node *fakeNext;
- struct cso_node **buckets;
- int size;
- int nodeSize;
- short userNumBits;
- short numBits;
- int numBuckets;
-};
-
-struct cso_hash {
- union {
- struct cso_hash_data *d;
- struct cso_node *e;
- } data;
-};
-
-static void *cso_data_allocate_node(struct cso_hash_data *hash)
-{
- return malloc(hash->nodeSize);
-}
-
-static void cso_data_free_node(struct cso_node *node)
-{
- /* XXX still a leak here.
- * Need to cast value ptr to original cso type, then free the
- * driver-specific data hanging off of it. For example:
- struct cso_sampler *csamp = (struct cso_sampler *) node->value;
- free(csamp->data);
- */
- free(node->value);
- free(node);
-}
-
-static struct cso_node *
-cso_hash_create_node(struct cso_hash *hash,
- unsigned akey, void *avalue,
- struct cso_node **anextNode)
-{
- struct cso_node *node = cso_data_allocate_node(hash->data.d);
- node->key = akey;
- node->value = avalue;
-
- node->next = (struct cso_node*)(*anextNode);
- *anextNode = node;
- ++hash->data.d->size;
- return node;
-}
-
-static void cso_data_rehash(struct cso_hash_data *hash, int hint)
-{
- if (hint < 0) {
- hint = countBits(-hint);
- if (hint < MinNumBits)
- hint = MinNumBits;
- hash->userNumBits = hint;
- while (primeForNumBits(hint) < (hash->size >> 1))
- ++hint;
- } else if (hint < MinNumBits) {
- hint = MinNumBits;
- }
-
- if (hash->numBits != hint) {
- struct cso_node *e = (struct cso_node *)(hash);
- struct cso_node **oldBuckets = hash->buckets;
- int oldNumBuckets = hash->numBuckets;
- int i = 0;
-
- hash->numBits = hint;
- hash->numBuckets = primeForNumBits(hint);
- hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets);
- for (i = 0; i < hash->numBuckets; ++i)
- hash->buckets[i] = e;
-
- for (i = 0; i < oldNumBuckets; ++i) {
- struct cso_node *firstNode = oldBuckets[i];
- while (firstNode != e) {
- unsigned h = firstNode->key;
- struct cso_node *lastNode = firstNode;
- while (lastNode->next != e && lastNode->next->key == h)
- lastNode = lastNode->next;
-
- struct cso_node *afterLastNode = lastNode->next;
- struct cso_node **beforeFirstNode = &hash->buckets[h % hash->numBuckets];
- while (*beforeFirstNode != e)
- beforeFirstNode = &(*beforeFirstNode)->next;
- lastNode->next = *beforeFirstNode;
- *beforeFirstNode = firstNode;
- firstNode = afterLastNode;
- }
- }
- free(oldBuckets);
- }
-}
-
-static void cso_data_might_grow(struct cso_hash_data *hash)
-{
- if (hash->size >= hash->numBuckets)
- cso_data_rehash(hash, hash->numBits + 1);
-}
-
-static void cso_data_has_shrunk(struct cso_hash_data *hash)
-{
- if (hash->size <= (hash->numBuckets >> 3) &&
- hash->numBits > hash->userNumBits) {
- int max = MAX(hash->numBits-2, hash->userNumBits);
- cso_data_rehash(hash, max);
- }
-}
-
-static struct cso_node *cso_data_first_node(struct cso_hash_data *hash)
-{
- struct cso_node *e = (struct cso_node *)(hash);
- struct cso_node **bucket = hash->buckets;
- int n = hash->numBuckets;
- while (n--) {
- if (*bucket != e)
- return *bucket;
- ++bucket;
- }
- return e;
-}
-
-static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey)
-{
- struct cso_node **node;
-
- if (hash->data.d->numBuckets) {
- node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]);
- assert(*node == hash->data.e || (*node)->next);
- while (*node != hash->data.e && (*node)->key != akey)
- node = &(*node)->next;
- } else {
- node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e));
- }
- return node;
-}
-
-struct cso_hash_iter cso_hash_insert(struct cso_hash *hash,
- unsigned key, void *data)
-{
- cso_data_might_grow(hash->data.d);
-
- struct cso_node **nextNode = cso_hash_find_node(hash, key);
- struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode);
- struct cso_hash_iter iter = {hash, node};
- return iter;
-}
-
-struct cso_hash * cso_hash_create(void)
-{
- struct cso_hash *hash = malloc(sizeof(struct cso_hash));
- hash->data.d = malloc(sizeof(struct cso_hash_data));
- hash->data.d->fakeNext = 0;
- hash->data.d->buckets = 0;
- hash->data.d->size = 0;
- hash->data.d->nodeSize = sizeof(struct cso_node);
- hash->data.d->userNumBits = MinNumBits;
- hash->data.d->numBits = 0;
- hash->data.d->numBuckets = 0;
-
- return hash;
-}
-
-void cso_hash_delete(struct cso_hash *hash)
-{
- struct cso_node *e_for_x = (struct cso_node *)(hash->data.d);
- struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets);
- int n = hash->data.d->numBuckets;
- while (n--) {
- struct cso_node *cur = *bucket++;
- while (cur != e_for_x) {
- struct cso_node *next = cur->next;
- cso_data_free_node(cur);
- cur = next;
- }
- }
- free(hash->data.d->buckets);
- free(hash->data.d);
- free(hash);
-}
-
-struct cso_hash_iter cso_hash_find(struct cso_hash *hash,
- unsigned key)
-{
- struct cso_node **nextNode = cso_hash_find_node(hash, key);
- struct cso_hash_iter iter = {hash, *nextNode};
- return iter;
-}
-
-unsigned cso_hash_iter_key(struct cso_hash_iter iter)
-{
- if (!iter.node || iter.hash->data.e == iter.node)
- return 0;
- return iter.node->key;
-}
-
-void * cso_hash_iter_data(struct cso_hash_iter iter)
-{
- if (!iter.node || iter.hash->data.e == iter.node)
- return 0;
- return iter.node->value;
-}
-
-static struct cso_node *cso_hash_data_next(struct cso_node *node)
-{
- union {
- struct cso_node *next;
- struct cso_node *e;
- struct cso_hash_data *d;
- } a;
- a.next = node->next;
- if (!a.next) {
- fprintf(stderr, "iterating beyond the last element\n");
- return 0;
- }
- if (a.next->next)
- return a.next;
-
- int start = (node->key % a.d->numBuckets) + 1;
- struct cso_node **bucket = a.d->buckets + start;
- int n = a.d->numBuckets - start;
- while (n--) {
- if (*bucket != a.e)
- return *bucket;
- ++bucket;
- }
- return a.e;
-}
-
-
-static struct cso_node *cso_hash_data_prev(struct cso_node *node)
-{
- union {
- struct cso_node *e;
- struct cso_hash_data *d;
- } a;
-
- a.e = node;
- while (a.e->next)
- a.e = a.e->next;
-
- int start;
- if (node == a.e)
- start = a.d->numBuckets - 1;
- else
- start = node->key % a.d->numBuckets;
-
- struct cso_node *sentinel = node;
- struct cso_node **bucket = a.d->buckets + start;
- while (start >= 0) {
- if (*bucket != sentinel) {
- struct cso_node *prev = *bucket;
- while (prev->next != sentinel)
- prev = prev->next;
- return prev;
- }
-
- sentinel = a.e;
- --bucket;
- --start;
- }
- fprintf(stderr, "iterating backward beyond first element\n");
- return a.e;
-}
-
-struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter)
-{
- struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)};
- return next;
-}
-
-int cso_hash_iter_is_null(struct cso_hash_iter iter)
-{
- if (!iter.node || iter.node == iter.hash->data.e)
- return 1;
- return 0;
-}
-
-void * cso_hash_take(struct cso_hash *hash,
- unsigned akey)
-{
- struct cso_node **node = cso_hash_find_node(hash, akey);
- if (*node != hash->data.e) {
- void *t = (*node)->value;
- struct cso_node *next = (*node)->next;
- cso_data_free_node(*node);
- *node = next;
- --hash->data.d->size;
- cso_data_has_shrunk(hash->data.d);
- return t;
- }
- return 0;
-}
-
-struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter)
-{
- struct cso_hash_iter prev = {iter.hash,
- cso_hash_data_prev(iter.node)};
- return prev;
-}
-
-struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash)
-{
- struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)};
- return iter;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin <zack@tungstengraphics.com>
- */
-
-#ifndef CSO_HASH_H
-#define CSO_HASH_H
-
-struct cso_hash;
-struct cso_node;
-
-struct cso_hash_iter {
- struct cso_hash *hash;
- struct cso_node *node;
-};
-
-struct cso_hash *cso_hash_create(void);
-void cso_hash_delete(struct cso_hash *hash);
-
-struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key,
- void *data);
-void *cso_hash_take(struct cso_hash *hash, unsigned key);
-
-struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash);
-struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key);
-
-
-int cso_hash_iter_is_null(struct cso_hash_iter iter);
-unsigned cso_hash_iter_key(struct cso_hash_iter iter);
-void *cso_hash_iter_data(struct cso_hash_iter iter);
-
-struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter);
-struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter);
-
-#endif
+++ /dev/null
-default:
- cd .. ; make
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief Clipping stage
- *
- * \author Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-
-#include "draw_context.h"
-#include "draw_private.h"
-
-
-#ifndef IS_NEGATIVE
-#define IS_NEGATIVE(X) ((X) < 0.0)
-#endif
-
-#ifndef DIFFERENT_SIGNS
-#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F)
-#endif
-
-#ifndef MAX_CLIPPED_VERTICES
-#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1)
-#endif
-
-
-
-struct clipper {
- struct draw_stage stage; /**< base class */
-
- /* Basically duplicate some of the flatshading logic here:
- */
- boolean flat;
- uint num_color_attribs;
- uint color_attribs[4]; /* front/back primary/secondary colors */
-
- float (*plane)[4];
-};
-
-
-/* This is a bit confusing:
- */
-static INLINE struct clipper *clipper_stage( struct draw_stage *stage )
-{
- return (struct clipper *)stage;
-}
-
-
-#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT)))
-
-
-/* All attributes are float[4], so this is easy:
- */
-static void interp_attr( float *fdst,
- float t,
- const float *fin,
- const float *fout )
-{
- fdst[0] = LINTERP( t, fout[0], fin[0] );
- fdst[1] = LINTERP( t, fout[1], fin[1] );
- fdst[2] = LINTERP( t, fout[2], fin[2] );
- fdst[3] = LINTERP( t, fout[3], fin[3] );
-}
-
-static void copy_colors( struct draw_stage *stage,
- struct vertex_header *dst,
- const struct vertex_header *src )
-{
- const struct clipper *clipper = clipper_stage(stage);
- uint i;
- for (i = 0; i < clipper->num_color_attribs; i++) {
- const uint attr = clipper->color_attribs[i];
- COPY_4FV(dst->data[attr], src->data[attr]);
- }
-}
-
-
-
-/* Interpolate between two vertices to produce a third.
- */
-static void interp( const struct clipper *clip,
- struct vertex_header *dst,
- float t,
- const struct vertex_header *out,
- const struct vertex_header *in )
-{
- const unsigned nr_attrs = clip->stage.draw->num_vs_outputs;
- unsigned j;
-
- /* Vertex header.
- */
- {
- dst->clipmask = 0;
- dst->edgeflag = 0;
- dst->pad = 0;
- dst->vertex_id = UNDEFINED_VERTEX_ID;
- }
-
- /* Clip coordinates: interpolate normally
- */
- {
- interp_attr(dst->clip, t, in->clip, out->clip);
- }
-
- /* Do the projective divide and insert window coordinates:
- */
- {
- const float *pos = dst->clip;
- const float *scale = clip->stage.draw->viewport.scale;
- const float *trans = clip->stage.draw->viewport.translate;
- const float oow = 1.0f / pos[3];
-
- dst->data[0][0] = pos[0] * oow * scale[0] + trans[0];
- dst->data[0][1] = pos[1] * oow * scale[1] + trans[1];
- dst->data[0][2] = pos[2] * oow * scale[2] + trans[2];
- dst->data[0][3] = oow;
- }
-
- /* Other attributes
- * Note: start at 1 to skip winpos (data[0]) since we just computed
- * it above.
- */
- for (j = 1; j < nr_attrs; j++) {
- interp_attr(dst->data[j], t, in->data[j], out->data[j]);
- }
-}
-
-
-static void emit_poly( struct draw_stage *stage,
- struct vertex_header **inlist,
- unsigned n,
- const struct prim_header *origPrim)
-{
- struct prim_header header;
- unsigned i;
-
- /* later stages may need the determinant, but only the sign matters */
- header.det = origPrim->det;
-
- for (i = 2; i < n; i++) {
- header.v[0] = inlist[i-1];
- header.v[1] = inlist[i];
- header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
-
- {
- unsigned tmp1 = header.v[1]->edgeflag;
- unsigned tmp2 = header.v[2]->edgeflag;
-
- if (i != n-1) header.v[1]->edgeflag = 0;
- if (i != 2) header.v[2]->edgeflag = 0;
-
- header.edgeflags = ((header.v[0]->edgeflag << 0) |
- (header.v[1]->edgeflag << 1) |
- (header.v[2]->edgeflag << 2));
-
- stage->next->tri( stage->next, &header );
-
- header.v[1]->edgeflag = tmp1;
- header.v[2]->edgeflag = tmp2;
- }
- }
-}
-
-
-
-
-/* Clip a triangle against the viewport and user clip planes.
- */
-static void
-do_clip_tri( struct draw_stage *stage,
- struct prim_header *header,
- unsigned clipmask )
-{
- struct clipper *clipper = clipper_stage( stage );
- struct vertex_header *a[MAX_CLIPPED_VERTICES];
- struct vertex_header *b[MAX_CLIPPED_VERTICES];
- struct vertex_header **inlist = a;
- struct vertex_header **outlist = b;
- unsigned tmpnr = 0;
- unsigned n = 3;
- unsigned i;
-
- inlist[0] = header->v[0];
- inlist[1] = header->v[1];
- inlist[2] = header->v[2];
-
- while (clipmask && n >= 3) {
- const unsigned plane_idx = ffs(clipmask)-1;
- const float *plane = clipper->plane[plane_idx];
- struct vertex_header *vert_prev = inlist[0];
- float dp_prev = dot4( vert_prev->clip, plane );
- unsigned outcount = 0;
-
- clipmask &= ~(1<<plane_idx);
-
- inlist[n] = inlist[0]; /* prevent rotation of vertices */
-
- for (i = 1; i <= n; i++) {
- struct vertex_header *vert = inlist[i];
-
- float dp = dot4( vert->clip, plane );
-
- if (!IS_NEGATIVE(dp_prev)) {
- outlist[outcount++] = vert_prev;
- }
-
- if (DIFFERENT_SIGNS(dp, dp_prev)) {
- struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++];
- outlist[outcount++] = new_vert;
-
- if (IS_NEGATIVE(dp)) {
- /* Going out of bounds. Avoid division by zero as we
- * know dp != dp_prev from DIFFERENT_SIGNS, above.
- */
- float t = dp / (dp - dp_prev);
- interp( clipper, new_vert, t, vert, vert_prev );
-
- /* Force edgeflag true in this case:
- */
- new_vert->edgeflag = 1;
- } else {
- /* Coming back in.
- */
- float t = dp_prev / (dp_prev - dp);
- interp( clipper, new_vert, t, vert_prev, vert );
-
- /* Copy starting vert's edgeflag:
- */
- new_vert->edgeflag = vert_prev->edgeflag;
- }
- }
-
- vert_prev = vert;
- dp_prev = dp;
- }
-
- {
- struct vertex_header **tmp = inlist;
- inlist = outlist;
- outlist = tmp;
- n = outcount;
- }
- }
-
- /* If flat-shading, copy color to new provoking vertex.
- */
- if (clipper->flat && inlist[0] != header->v[2]) {
- if (1) {
- inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
- }
-
- copy_colors(stage, inlist[0], header->v[2]);
- }
-
-
-
- /* Emit the polygon as triangles to the setup stage:
- */
- if (n >= 3)
- emit_poly( stage, inlist, n, header );
-}
-
-
-/* Clip a line against the viewport and user clip planes.
- */
-static void
-do_clip_line( struct draw_stage *stage,
- struct prim_header *header,
- unsigned clipmask )
-{
- const struct clipper *clipper = clipper_stage( stage );
- struct vertex_header *v0 = header->v[0];
- struct vertex_header *v1 = header->v[1];
- const float *pos0 = v0->clip;
- const float *pos1 = v1->clip;
- float t0 = 0.0F;
- float t1 = 0.0F;
- struct prim_header newprim;
-
- while (clipmask) {
- const unsigned plane_idx = ffs(clipmask)-1;
- const float *plane = clipper->plane[plane_idx];
- const float dp0 = dot4( pos0, plane );
- const float dp1 = dot4( pos1, plane );
-
- if (dp1 < 0.0F) {
- float t = dp1 / (dp1 - dp0);
- t1 = MAX2(t1, t);
- }
-
- if (dp0 < 0.0F) {
- float t = dp0 / (dp0 - dp1);
- t0 = MAX2(t0, t);
- }
-
- if (t0 + t1 >= 1.0F)
- return; /* discard */
-
- clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */
- }
-
- if (v0->clipmask) {
- interp( clipper, stage->tmp[0], t0, v0, v1 );
-
- if (clipper->flat)
- copy_colors(stage, stage->tmp[0], v0);
-
- newprim.v[0] = stage->tmp[0];
- }
- else {
- newprim.v[0] = v0;
- }
-
- if (v1->clipmask) {
- interp( clipper, stage->tmp[1], t1, v1, v0 );
- newprim.v[1] = stage->tmp[1];
- }
- else {
- newprim.v[1] = v1;
- }
-
- stage->next->line( stage->next, &newprim );
-}
-
-
-static void
-clip_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- if (header->v[0]->clipmask == 0)
- stage->next->point( stage->next, header );
-}
-
-
-static void
-clip_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- unsigned clipmask = (header->v[0]->clipmask |
- header->v[1]->clipmask);
-
- if (clipmask == 0) {
- /* no clipping needed */
- stage->next->line( stage->next, header );
- }
- else if ((header->v[0]->clipmask &
- header->v[1]->clipmask) == 0) {
- do_clip_line(stage, header, clipmask);
- }
- /* else, totally clipped */
-}
-
-
-static void
-clip_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- unsigned clipmask = (header->v[0]->clipmask |
- header->v[1]->clipmask |
- header->v[2]->clipmask);
-
- if (clipmask == 0) {
- /* no clipping needed */
- stage->next->tri( stage->next, header );
- }
- else if ((header->v[0]->clipmask &
- header->v[1]->clipmask &
- header->v[2]->clipmask) == 0) {
- do_clip_tri(stage, header, clipmask);
- }
-}
-
-/* Update state. Could further delay this until we hit the first
- * primitive that really requires clipping.
- */
-static void
-clip_init_state( struct draw_stage *stage )
-{
- struct clipper *clipper = clipper_stage( stage );
-
- clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
-
- if (clipper->flat) {
- const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
- uint i;
-
- clipper->num_color_attribs = 0;
- for (i = 0; i < vs->num_outputs; i++) {
- if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
- vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
- clipper->color_attribs[clipper->num_color_attribs++] = i;
- }
- }
- }
-
- stage->tri = clip_tri;
- stage->line = clip_line;
-}
-
-
-
-static void clip_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- clip_init_state( stage );
- stage->tri( stage, header );
-}
-
-static void clip_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- clip_init_state( stage );
- stage->line( stage, header );
-}
-
-
-static void clip_flush( struct draw_stage *stage,
- unsigned flags )
-{
- stage->tri = clip_first_tri;
- stage->line = clip_first_line;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void clip_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void clip_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Allocate a new clipper stage.
- * \return pointer to new stage object
- */
-struct draw_stage *draw_clip_stage( struct draw_context *draw )
-{
- struct clipper *clipper = CALLOC_STRUCT(clipper);
-
- draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 );
-
- clipper->stage.draw = draw;
- clipper->stage.point = clip_point;
- clipper->stage.line = clip_first_line;
- clipper->stage.tri = clip_first_tri;
- clipper->stage.flush = clip_flush;
- clipper->stage.reset_stipple_counter = clip_reset_stipple_counter;
- clipper->stage.destroy = clip_destroy;
-
- clipper->plane = draw->plane;
-
- return &clipper->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "pipe/p_util.h"
-#include "draw_context.h"
-#include "draw_private.h"
-
-
-
-struct draw_context *draw_create( void )
-{
- struct draw_context *draw = CALLOC_STRUCT( draw_context );
-
-#if defined(__i386__) || defined(__386__)
- draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL;
-#else
- draw->use_sse = FALSE;
-#endif
-
- /* create pipeline stages */
- draw->pipeline.wide = draw_wide_stage( draw );
- draw->pipeline.stipple = draw_stipple_stage( draw );
- draw->pipeline.unfilled = draw_unfilled_stage( draw );
- draw->pipeline.twoside = draw_twoside_stage( draw );
- draw->pipeline.offset = draw_offset_stage( draw );
- draw->pipeline.clip = draw_clip_stage( draw );
- draw->pipeline.flatshade = draw_flatshade_stage( draw );
- draw->pipeline.cull = draw_cull_stage( draw );
- draw->pipeline.validate = draw_validate_stage( draw );
- draw->pipeline.first = draw->pipeline.validate;
-
- ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
- ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
- ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
- ASSIGN_4V( draw->plane[3], 0, 1, 0, 1 );
- ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
- ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
- draw->nr_planes = 6;
-
- /* Statically allocate maximum sized vertices for the cache - could be cleverer...
- */
- {
- uint i;
- const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f;
- char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16);
-
- for (i = 0; i < Elements(draw->vcache.vertex); i++)
- draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size);
- }
-
- draw->shader_queue_flush = draw_vertex_shader_queue_flush;
-
- draw->convert_wide_points = TRUE;
- draw->convert_wide_lines = TRUE;
-
- draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
-
- draw_vertex_cache_invalidate( draw );
- draw_set_mapped_element_buffer( draw, 0, NULL );
-
- return draw;
-}
-
-
-void draw_destroy( struct draw_context *draw )
-{
- draw->pipeline.wide->destroy( draw->pipeline.wide );
- draw->pipeline.stipple->destroy( draw->pipeline.stipple );
- draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
- draw->pipeline.twoside->destroy( draw->pipeline.twoside );
- draw->pipeline.offset->destroy( draw->pipeline.offset );
- draw->pipeline.clip->destroy( draw->pipeline.clip );
- draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
- draw->pipeline.cull->destroy( draw->pipeline.cull );
- draw->pipeline.validate->destroy( draw->pipeline.validate );
- if (draw->pipeline.rasterize)
- draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
- tgsi_exec_machine_free_data(&draw->machine);
- align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */
- FREE( draw );
-}
-
-
-
-void draw_flush( struct draw_context *draw )
-{
- draw_do_flush( draw, DRAW_FLUSH_BACKEND );
-}
-
-
-
-/**
- * Register new primitive rasterization/rendering state.
- * This causes the drawing pipeline to be rebuilt.
- */
-void draw_set_rasterizer_state( struct draw_context *draw,
- const struct pipe_rasterizer_state *raster )
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
-
- draw->rasterizer = raster;
-}
-
-
-/**
- * Plug in the primitive rendering/rasterization stage (which is the last
- * stage in the drawing pipeline).
- * This is provided by the device driver.
- */
-void draw_set_rasterize_stage( struct draw_context *draw,
- struct draw_stage *stage )
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
-
- draw->pipeline.rasterize = stage;
-}
-
-
-/**
- * Set the draw module's clipping state.
- */
-void draw_set_clip_state( struct draw_context *draw,
- const struct pipe_clip_state *clip )
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
-
- assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
- memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
- draw->nr_planes = 6 + clip->nr;
-}
-
-
-/**
- * Set the draw module's viewport state.
- */
-void draw_set_viewport_state( struct draw_context *draw,
- const struct pipe_viewport_state *viewport )
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->viewport = *viewport; /* struct copy */
-}
-
-
-
-void
-draw_set_vertex_buffer(struct draw_context *draw,
- unsigned attr,
- const struct pipe_vertex_buffer *buffer)
-{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- assert(attr < PIPE_ATTRIB_MAX);
- draw->vertex_buffer[attr] = *buffer;
-}
-
-
-void
-draw_set_vertex_element(struct draw_context *draw,
- unsigned attr,
- const struct pipe_vertex_element *element)
-{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- assert(attr < PIPE_ATTRIB_MAX);
- draw->vertex_element[attr] = *element;
-}
-
-
-/**
- * Tell drawing context where to find mapped vertex buffers.
- */
-void
-draw_set_mapped_vertex_buffer(struct draw_context *draw,
- unsigned attr, const void *buffer)
-{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- draw->user.vbuffer[attr] = buffer;
-}
-
-
-void
-draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer)
-{
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
- draw->user.constants = buffer;
-}
-
-
-/**
- * Tells the draw module whether to convert wide points (size != 1)
- * into triangles.
- */
-void
-draw_convert_wide_points(struct draw_context *draw, boolean enable)
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->convert_wide_points = enable;
-}
-
-
-/**
- * Tells the draw module whether to convert wide lines (width != 1)
- * into triangles.
- */
-void
-draw_convert_wide_lines(struct draw_context *draw, boolean enable)
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->convert_wide_lines = enable;
-}
-
-
-/**
- * Allocate space for temporary post-transform vertices, such as for clipping.
- */
-void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
-{
- assert(!stage->tmp);
-
- stage->nr_tmps = nr;
-
- if (nr) {
- ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
- unsigned i;
-
- stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
-
- for (i = 0; i < nr; i++)
- stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
- }
-}
-
-
-void draw_free_temp_verts( struct draw_stage *stage )
-{
- if (stage->tmp) {
- FREE( stage->tmp[0] );
- FREE( stage->tmp );
- stage->tmp = NULL;
- }
-}
-
-
-boolean draw_use_sse(struct draw_context *draw)
-{
- return (boolean) draw->use_sse;
-}
-
-
-void draw_reset_vertex_ids(struct draw_context *draw)
-{
- struct draw_stage *stage = draw->pipeline.first;
-
- while (stage) {
- unsigned i;
-
- for (i = 0; i < stage->nr_tmps; i++)
- stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
-
- stage = stage->next;
- }
-
- draw_vertex_cache_reset_vertex_ids(draw);
-}
+++ /dev/null
-
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief Public interface into the drawing module.
- */
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#ifndef DRAW_CONTEXT_H
-#define DRAW_CONTEXT_H
-
-
-#include "pipe/p_state.h"
-
-
-struct vertex_buffer;
-struct vertex_info;
-struct draw_context;
-struct draw_stage;
-struct draw_vertex_shader;
-
-
-/**
- * Clipmask flags
- */
-/*@{*/
-#define CLIP_RIGHT_BIT 0x01
-#define CLIP_LEFT_BIT 0x02
-#define CLIP_TOP_BIT 0x04
-#define CLIP_BOTTOM_BIT 0x08
-#define CLIP_NEAR_BIT 0x10
-#define CLIP_FAR_BIT 0x20
-/*@}*/
-
-/**
- * Bitshift for each clip flag
- */
-/*@{*/
-#define CLIP_RIGHT_SHIFT 0
-#define CLIP_LEFT_SHIFT 1
-#define CLIP_TOP_SHIFT 2
-#define CLIP_BOTTOM_SHIFT 3
-#define CLIP_NEAR_SHIFT 4
-#define CLIP_FAR_SHIFT 5
-/*@}*/
-
-
-struct draw_context *draw_create( void );
-
-void draw_destroy( struct draw_context *draw );
-
-void draw_set_viewport_state( struct draw_context *draw,
- const struct pipe_viewport_state *viewport );
-
-void draw_set_clip_state( struct draw_context *pipe,
- const struct pipe_clip_state *clip );
-
-void draw_set_rasterizer_state( struct draw_context *draw,
- const struct pipe_rasterizer_state *raster );
-
-void draw_set_rasterize_stage( struct draw_context *draw,
- struct draw_stage *stage );
-
-void draw_convert_wide_points(struct draw_context *draw, boolean enable);
-
-void draw_convert_wide_lines(struct draw_context *draw, boolean enable);
-
-
-struct draw_vertex_shader *
-draw_create_vertex_shader(struct draw_context *draw,
- const struct pipe_shader_state *shader);
-void draw_bind_vertex_shader(struct draw_context *draw,
- struct draw_vertex_shader *dvs);
-void draw_delete_vertex_shader(struct draw_context *draw,
- struct draw_vertex_shader *dvs);
-
-boolean draw_use_sse(struct draw_context *draw);
-
-void draw_set_vertex_buffer(struct draw_context *draw,
- unsigned attr,
- const struct pipe_vertex_buffer *buffer);
-
-void draw_set_vertex_element(struct draw_context *draw,
- unsigned attr,
- const struct pipe_vertex_element *element);
-
-void draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements );
-
-void draw_set_mapped_vertex_buffer(struct draw_context *draw,
- unsigned attr, const void *buffer);
-
-void draw_set_mapped_constant_buffer(struct draw_context *draw,
- const void *buffer);
-
-
-/***********************************************************************
- * draw_prim.c
- */
-
-void draw_arrays(struct draw_context *draw, unsigned prim,
- unsigned start, unsigned count);
-
-void draw_flush(struct draw_context *draw);
-
-/***********************************************************************
- * draw_debug.c
- */
-boolean draw_validate_prim( unsigned prim, unsigned length );
-unsigned draw_trim_prim( unsigned mode, unsigned count );
-
-
-
-#endif /* DRAW_CONTEXT_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief Drawing stage for polygon culling
- */
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "draw_private.h"
-
-
-struct cull_stage {
- struct draw_stage stage;
- unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */
-};
-
-
-static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
-{
- return (struct cull_stage *)stage;
-}
-
-
-
-
-static void cull_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* Window coords: */
- const float *v0 = header->v[0]->data[0];
- const float *v1 = header->v[1]->data[0];
- const float *v2 = header->v[2]->data[0];
-
- /* edge vectors e = v0 - v2, f = v1 - v2 */
- const float ex = v0[0] - v2[0];
- const float ey = v0[1] - v2[1];
- const float fx = v1[0] - v2[0];
- const float fy = v1[1] - v2[1];
-
- /* det = cross(e,f).z */
- header->det = ex * fy - ey * fx;
-
- if (header->det != 0) {
- /* if (det < 0 then Z points toward camera and triangle is
- * counter-clockwise winding.
- */
- unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
-
- if ((winding & cull_stage(stage)->winding) == 0) {
- /* triangle is not culled, pass to next stage */
- stage->next->tri( stage->next, header );
- }
- }
-}
-
-static void cull_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct cull_stage *cull = cull_stage(stage);
-
- cull->winding = stage->draw->rasterizer->cull_mode;
-
- stage->tri = cull_tri;
- stage->tri( stage, header );
-}
-
-
-
-static void cull_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void cull_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void cull_flush( struct draw_stage *stage, unsigned flags )
-{
- stage->tri = cull_first_tri;
- stage->next->flush( stage->next, flags );
-}
-
-static void cull_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void cull_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Create a new polygon culling stage.
- */
-struct draw_stage *draw_cull_stage( struct draw_context *draw )
-{
- struct cull_stage *cull = CALLOC_STRUCT(cull_stage);
-
- draw_alloc_temp_verts( &cull->stage, 0 );
-
- cull->stage.draw = draw;
- cull->stage.next = NULL;
- cull->stage.point = cull_point;
- cull->stage.line = cull_line;
- cull->stage.tri = cull_first_tri;
- cull->stage.flush = cull_flush;
- cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
- cull->stage.destroy = cull_destroy;
-
- return &cull->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-
-static void
-draw_prim_info(unsigned prim, unsigned *first, unsigned *incr)
-{
- assert(prim >= PIPE_PRIM_POINTS);
- assert(prim <= PIPE_PRIM_POLYGON);
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- *first = 1;
- *incr = 1;
- break;
- case PIPE_PRIM_LINES:
- *first = 2;
- *incr = 2;
- break;
- case PIPE_PRIM_LINE_STRIP:
- *first = 2;
- *incr = 1;
- break;
- case PIPE_PRIM_LINE_LOOP:
- *first = 2;
- *incr = 1;
- break;
- case PIPE_PRIM_TRIANGLES:
- *first = 3;
- *incr = 3;
- break;
- case PIPE_PRIM_TRIANGLE_STRIP:
- *first = 3;
- *incr = 1;
- break;
- case PIPE_PRIM_TRIANGLE_FAN:
- case PIPE_PRIM_POLYGON:
- *first = 3;
- *incr = 1;
- break;
- case PIPE_PRIM_QUADS:
- *first = 4;
- *incr = 4;
- break;
- case PIPE_PRIM_QUAD_STRIP:
- *first = 4;
- *incr = 2;
- break;
- default:
- assert(0);
- *first = 1;
- *incr = 1;
- break;
- }
-}
-
-
-unsigned
-draw_trim_prim( unsigned mode, unsigned count )
-{
- unsigned length, first, incr;
-
- draw_prim_info( mode, &first, &incr );
-
- if (count < first)
- length = 0;
- else
- length = count - (count - first) % incr;
-
- return length;
-}
-
-
-boolean
-draw_validate_prim( unsigned mode, unsigned count )
-{
- return (count > 0 &&
- count == draw_trim_prim( mode, count ));
-}
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
-
-/** subclass of draw_stage */
-struct flat_stage
-{
- struct draw_stage stage;
-
- uint num_color_attribs;
- uint color_attribs[4]; /* front/back primary/secondary colors */
-};
-
-
-static INLINE struct flat_stage *
-flat_stage(struct draw_stage *stage)
-{
- return (struct flat_stage *) stage;
-}
-
-
-/** Copy all the color attributes from 'src' vertex to 'dst' vertex */
-static INLINE void copy_colors( struct draw_stage *stage,
- struct vertex_header *dst,
- const struct vertex_header *src )
-{
- const struct flat_stage *flat = flat_stage(stage);
- uint i;
- for (i = 0; i < flat->num_color_attribs; i++) {
- const uint attr = flat->color_attribs[i];
- COPY_4FV(dst->data[attr], src->data[attr]);
- }
-}
-
-
-/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */
-static INLINE void copy_colors2( struct draw_stage *stage,
- struct vertex_header *dst0,
- struct vertex_header *dst1,
- const struct vertex_header *src )
-{
- const struct flat_stage *flat = flat_stage(stage);
- uint i;
- for (i = 0; i < flat->num_color_attribs; i++) {
- const uint attr = flat->color_attribs[i];
- COPY_4FV(dst0->data[attr], src->data[attr]);
- COPY_4FV(dst1->data[attr], src->data[attr]);
- }
-}
-
-
-/**
- * Flatshade tri. Required for clipping and when unfilled tris are
- * active, otherwise handled by hardware.
- */
-static void flatshade_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct prim_header tmp;
-
- tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
- tmp.v[0] = dup_vert(stage, header->v[0], 0);
- tmp.v[1] = dup_vert(stage, header->v[1], 1);
- tmp.v[2] = header->v[2];
-
- copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]);
-
- stage->next->tri( stage->next, &tmp );
-}
-
-
-/**
- * Flatshade line. Required for clipping.
- */
-static void flatshade_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct prim_header tmp;
-
- tmp.v[0] = dup_vert(stage, header->v[0], 0);
- tmp.v[1] = header->v[1];
-
- copy_colors(stage, tmp.v[0], tmp.v[1]);
-
- stage->next->line( stage->next, &tmp );
-}
-
-
-static void flatshade_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void flatshade_init_state( struct draw_stage *stage )
-{
- struct flat_stage *flat = flat_stage(stage);
- const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
- uint i;
-
- /* Find which vertex shader outputs are colors, make a list */
- flat->num_color_attribs = 0;
- for (i = 0; i < vs->num_outputs; i++) {
- if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
- vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
- flat->color_attribs[flat->num_color_attribs++] = i;
- }
- }
-
- stage->line = flatshade_line;
- stage->tri = flatshade_tri;
-}
-
-static void flatshade_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- flatshade_init_state( stage );
- stage->tri( stage, header );
-}
-
-static void flatshade_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- flatshade_init_state( stage );
- stage->line( stage, header );
-}
-
-
-static void flatshade_flush( struct draw_stage *stage,
- unsigned flags )
-{
- stage->tri = flatshade_first_tri;
- stage->line = flatshade_first_line;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void flatshade_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void flatshade_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Create flatshading drawing stage.
- */
-struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
-{
- struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
-
- draw_alloc_temp_verts( &flatshade->stage, 2 );
-
- flatshade->stage.draw = draw;
- flatshade->stage.next = NULL;
- flatshade->stage.point = flatshade_point;
- flatshade->stage.line = flatshade_first_line;
- flatshade->stage.tri = flatshade_first_tri;
- flatshade->stage.flush = flatshade_flush;
- flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter;
- flatshade->stage.destroy = flatshade_destroy;
-
- return &flatshade->stage;
-}
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief polygon offset state
- *
- * \author Keith Whitwell <keith@tungstengraphics.com>
- * \author Brian Paul
- */
-
-#include "pipe/p_util.h"
-#include "draw_private.h"
-
-
-
-struct offset_stage {
- struct draw_stage stage;
-
- float scale;
- float units;
-};
-
-
-
-static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
-{
- return (struct offset_stage *) stage;
-}
-
-
-
-
-
-/**
- * Offset tri Z. Some hardware can handle this, but not usually when
- * doing unfilled rendering.
- */
-static void do_offset_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct offset_stage *offset = offset_stage(stage);
- float inv_det = 1.0f / header->det;
-
- /* Window coords:
- */
- float *v0 = header->v[0]->data[0];
- float *v1 = header->v[1]->data[0];
- float *v2 = header->v[2]->data[0];
-
- /* edge vectors e = v0 - v2, f = v1 - v2 */
- float ex = v0[0] - v2[0];
- float ey = v0[1] - v2[1];
- float ez = v0[2] - v2[2];
- float fx = v1[0] - v2[0];
- float fy = v1[1] - v2[1];
- float fz = v1[2] - v2[2];
-
- /* (a,b) = cross(e,f).xy */
- float a = ey*fz - ez*fy;
- float b = ez*fx - ex*fz;
-
- float dzdx = FABSF(a * inv_det);
- float dzdy = FABSF(b * inv_det);
-
- float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale;
-
- /*
- * Note: we're applying the offset and clamping per-vertex.
- * Ideally, the offset is applied per-fragment prior to fragment shading.
- */
- v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f);
- v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f);
- v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f);
-
- stage->next->tri( stage->next, header );
-}
-
-
-static void offset_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct prim_header tmp;
-
- tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
- tmp.v[0] = dup_vert(stage, header->v[0], 0);
- tmp.v[1] = dup_vert(stage, header->v[1], 1);
- tmp.v[2] = dup_vert(stage, header->v[2], 2);
-
- do_offset_tri( stage, &tmp );
-}
-
-
-static void offset_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct offset_stage *offset = offset_stage(stage);
- float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */
-
- offset->units = stage->draw->rasterizer->offset_units * mrd;
- offset->scale = stage->draw->rasterizer->offset_scale;
-
- stage->tri = offset_tri;
- stage->tri( stage, header );
-}
-
-
-static void offset_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void offset_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void offset_flush( struct draw_stage *stage,
- unsigned flags )
-{
- stage->tri = offset_first_tri;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void offset_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void offset_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Create polygon offset drawing stage.
- */
-struct draw_stage *draw_offset_stage( struct draw_context *draw )
-{
- struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
-
- draw_alloc_temp_verts( &offset->stage, 3 );
-
- offset->stage.draw = draw;
- offset->stage.next = NULL;
- offset->stage.point = offset_point;
- offset->stage.line = offset_line;
- offset->stage.tri = offset_first_tri;
- offset->stage.flush = offset_flush;
- offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
- offset->stage.destroy = offset_destroy;
-
- return &offset->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_debug.h"
-
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-
-#define RP_NONE 0
-#define RP_POINT 1
-#define RP_LINE 2
-#define RP_TRI 3
-
-
-static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
- RP_POINT,
- RP_LINE,
- RP_LINE,
- RP_LINE,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI,
- RP_TRI
-};
-
-
-static void draw_prim_queue_flush( struct draw_context *draw )
-{
- unsigned i;
-
- if (0)
- debug_printf("Flushing with %d prims, %d verts\n",
- draw->pq.queue_nr, draw->vs.queue_nr);
-
- assert (draw->pq.queue_nr != 0);
-
- /* NOTE: we cannot save draw->pipeline->first in a local var because
- * draw->pipeline->first is often changed by the first call to tri(),
- * line(), etc.
- */
- if (draw->rasterizer->line_stipple_enable) {
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
- }
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
- }
- break;
- case RP_POINT:
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- }
- }
- else {
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- case RP_POINT:
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
- }
- }
-
- draw->pq.queue_nr = 0;
- draw_vertex_cache_unreference( draw );
-}
-
-
-
-void draw_do_flush( struct draw_context *draw, unsigned flags )
-{
- if (0)
- debug_printf("Flushing with %d verts, %d prims\n",
- draw->vs.queue_nr,
- draw->pq.queue_nr );
-
-
- if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
- if (draw->vs.queue_nr)
- (*draw->shader_queue_flush)(draw);
-
- if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
- if (draw->pq.queue_nr)
- draw_prim_queue_flush(draw);
-
- if (flags >= DRAW_FLUSH_VERTEX_CACHE) {
- draw_vertex_cache_invalidate(draw);
-
- if (flags >= DRAW_FLUSH_STATE_CHANGE) {
- draw->pipeline.first->flush( draw->pipeline.first, flags );
- draw->pipeline.first = draw->pipeline.validate;
- draw->reduced_prim = ~0;
- }
- }
- }
- }
-}
-
-
-
-/* Return a pointer to a freshly queued primitive header. Ensure that
- * there is room in the vertex cache for a maximum of "nr_verts" new
- * vertices. Flush primitive and/or vertex queues if necessary to
- * make space.
- */
-static struct prim_header *get_queued_prim( struct draw_context *draw,
- unsigned nr_verts )
-{
- if (!draw_vertex_cache_check_space( draw, nr_verts )) {
-// debug_printf("v");
- draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE );
- }
- else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) {
-// debug_printf("p");
- draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE );
- }
-
- assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH);
-
- return &draw->pq.queue[draw->pq.queue_nr++];
-}
-
-
-
-/**
- * Add a point to the primitive queue.
- * \param i0 index into user's vertex arrays
- */
-static void do_point( struct draw_context *draw,
- unsigned i0 )
-{
- struct prim_header *prim = get_queued_prim( draw, 1 );
-
- prim->reset_line_stipple = 0;
- prim->edgeflags = 1;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
-}
-
-
-/**
- * Add a line to the primitive queue.
- * \param i0 index into user's vertex arrays
- * \param i1 index into user's vertex arrays
- */
-static void do_line( struct draw_context *draw,
- boolean reset_stipple,
- unsigned i0,
- unsigned i1 )
-{
- struct prim_header *prim = get_queued_prim( draw, 2 );
-
- prim->reset_line_stipple = reset_stipple;
- prim->edgeflags = 1;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
- prim->v[1] = draw->vcache.get_vertex( draw, i1 );
-}
-
-/**
- * Add a triangle to the primitive queue.
- */
-static void do_triangle( struct draw_context *draw,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- struct prim_header *prim = get_queued_prim( draw, 3 );
-
- prim->reset_line_stipple = 1;
- prim->edgeflags = ~0;
- prim->pad = 0;
- prim->v[0] = draw->vcache.get_vertex( draw, i0 );
- prim->v[1] = draw->vcache.get_vertex( draw, i1 );
- prim->v[2] = draw->vcache.get_vertex( draw, i2 );
-}
-
-static void do_ef_triangle( struct draw_context *draw,
- boolean reset_stipple,
- unsigned ef_mask,
- unsigned i0,
- unsigned i1,
- unsigned i2 )
-{
- struct prim_header *prim = get_queued_prim( draw, 3 );
- struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 );
- struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 );
- struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 );
-
- prim->reset_line_stipple = reset_stipple;
-
- prim->edgeflags = ef_mask & ((v0->edgeflag << 0) |
- (v1->edgeflag << 1) |
- (v2->edgeflag << 2));
- prim->pad = 0;
- prim->v[0] = v0;
- prim->v[1] = v1;
- prim->v[2] = v2;
-}
-
-
-static void do_ef_quad( struct draw_context *draw,
- unsigned v0,
- unsigned v1,
- unsigned v2,
- unsigned v3 )
-{
- const unsigned omitEdge2 = ~(1 << 1);
- const unsigned omitEdge3 = ~(1 << 2);
- do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 );
- do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 );
-}
-
-static void do_quad( struct draw_context *draw,
- unsigned v0,
- unsigned v1,
- unsigned v2,
- unsigned v3 )
-{
- do_triangle( draw, v0, v1, v3 );
- do_triangle( draw, v1, v2, v3 );
-}
-
-
-/**
- * Main entrypoint to draw some number of points/lines/triangles
- */
-static void
-draw_prim( struct draw_context *draw,
- unsigned prim, unsigned start, unsigned count )
-{
- unsigned i;
- boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL);
-
-// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
-
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i ++) {
- do_point( draw,
- start + i );
- }
- break;
-
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2) {
- do_line( draw,
- TRUE,
- start + i + 0,
- start + i + 1);
- }
- break;
-
- case PIPE_PRIM_LINE_LOOP:
- if (count >= 2) {
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1, /* XXX: only if vb not split */
- start + i - 1,
- start + i );
- }
-
- do_line( draw,
- 0,
- start + count - 1,
- start + 0 );
- }
- break;
-
- case PIPE_PRIM_LINE_STRIP:
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1,
- start + i - 1,
- start + i );
- }
- break;
-
- case PIPE_PRIM_TRIANGLES:
- if (unfilled) {
- for (i = 0; i+2 < count; i += 3) {
- do_ef_triangle( draw,
- 1,
- ~0,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- else {
- for (i = 0; i+2 < count; i += 3) {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_STRIP:
- for (i = 0; i+2 < count; i++) {
- if (i & 1) {
- do_triangle( draw,
- start + i + 1,
- start + i + 0,
- start + i + 2 );
- }
- else {
- do_triangle( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- break;
-
- case PIPE_PRIM_TRIANGLE_FAN:
- if (count >= 3) {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + 0,
- start + i + 1,
- start + i + 2 );
- }
- }
- break;
-
-
- case PIPE_PRIM_QUADS:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 4) {
- do_ef_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
- }
- }
- else {
- for (i = 0; i+3 < count; i += 4) {
- do_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
- }
- }
- break;
-
- case PIPE_PRIM_QUAD_STRIP:
- if (unfilled) {
- for (i = 0; i+3 < count; i += 2) {
- do_ef_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
- }
- }
- else {
- for (i = 0; i+3 < count; i += 2) {
- do_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
- }
- }
- break;
-
- case PIPE_PRIM_POLYGON:
- if (unfilled) {
- unsigned ef_mask = (1<<2) | (1<<0);
-
- for (i = 0; i+2 < count; i++) {
-
- if (i + 3 >= count)
- ef_mask |= (1<<1);
-
- do_ef_triangle( draw,
- i == 0,
- ef_mask,
- start + i + 1,
- start + i + 2,
- start + 0);
-
- ef_mask &= ~(1<<2);
- }
- }
- else {
- for (i = 0; i+2 < count; i++) {
- do_triangle( draw,
- start + i + 1,
- start + i + 2,
- start + 0);
- }
- }
- break;
-
- default:
- assert(0);
- break;
- }
-}
-
-
-
-
-/**
- * Draw vertex arrays
- * This is the main entrypoint into the drawing module.
- * \param prim one of PIPE_PRIM_x
- * \param start index of first vertex to draw
- * \param count number of vertices to draw
- */
-void
-draw_arrays(struct draw_context *draw, unsigned prim,
- unsigned start, unsigned count)
-{
- if (reduced_prim[prim] != draw->reduced_prim) {
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
- draw->reduced_prim = reduced_prim[prim];
- }
-
- /* drawing done here: */
- draw_prim(draw, prim, start, count);
-}
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Private data structures, etc for the draw module.
- */
-
-
-/**
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- */
-
-
-#ifndef DRAW_PRIVATE_H
-#define DRAW_PRIVATE_H
-
-
-#include "pipe/p_state.h"
-#include "pipe/p_defines.h"
-
-#include "x86/rtasm/x86sse.h"
-#include "tgsi/exec/tgsi_exec.h"
-
-
-struct gallivm_prog;
-struct gallivm_cpu_engine;
-
-/**
- * Basic vertex info.
- * Carry some useful information around with the vertices in the prim pipe.
- */
-struct vertex_header {
- unsigned clipmask:12;
- unsigned edgeflag:1;
- unsigned pad:3;
- unsigned vertex_id:16;
-
- float clip[4];
-
- float data[][4]; /* Note variable size */
-};
-
-/* NOTE: It should match vertex_id size above */
-#define UNDEFINED_VERTEX_ID 0xffff
-
-/* XXX This is too large */
-#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
-
-
-
-/**
- * Basic info for a point/line/triangle primitive.
- */
-struct prim_header {
- float det; /**< front/back face determinant */
- unsigned reset_line_stipple:1;
- unsigned edgeflags:3;
- unsigned pad:28;
- struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */
-};
-
-
-
-struct draw_context;
-
-/**
- * Base class for all primitive drawing stages.
- */
-struct draw_stage
-{
- struct draw_context *draw; /**< parent context */
-
- struct draw_stage *next; /**< next stage in pipeline */
-
- struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
- unsigned nr_tmps;
-
- void (*point)( struct draw_stage *,
- struct prim_header * );
-
- void (*line)( struct draw_stage *,
- struct prim_header * );
-
- void (*tri)( struct draw_stage *,
- struct prim_header * );
-
- void (*flush)( struct draw_stage *,
- unsigned flags );
-
- void (*reset_stipple_counter)( struct draw_stage * );
-
- void (*destroy)( struct draw_stage * );
-};
-
-
-#define PRIM_QUEUE_LENGTH 16
-#define VCACHE_SIZE 32
-#define VCACHE_OVERFLOW 4
-#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */
-
-/**
- * Private version of the compiled vertex_shader
- */
-struct draw_vertex_shader {
- const struct pipe_shader_state *state;
-#if defined(__i386__) || defined(__386__)
- struct x86_function sse2_program;
-#endif
-#ifdef MESA_LLVM
- struct gallivm_prog *llvm_prog;
-#endif
-};
-
-
-/* Internal function for vertex fetch.
- */
-typedef void (*fetch_func)(const void *ptr, float *attrib);
-typedef void (*full_fetch_func)( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count );
-
-
-
-/**
- * Private context for the drawing module.
- */
-struct draw_context
-{
- /** Drawing/primitive pipeline stages */
- struct {
- struct draw_stage *first; /**< one of the following */
-
- struct draw_stage *validate;
-
- /* stages (in logical order) */
- struct draw_stage *flatshade;
- struct draw_stage *clip;
- struct draw_stage *cull;
- struct draw_stage *twoside;
- struct draw_stage *offset;
- struct draw_stage *unfilled;
- struct draw_stage *stipple;
- struct draw_stage *wide;
- struct draw_stage *rasterize;
- } pipeline;
-
- /* pipe state that we need: */
- const struct pipe_rasterizer_state *rasterizer;
- struct pipe_viewport_state viewport;
- struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
- struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
- const struct draw_vertex_shader *vertex_shader;
-
- uint num_vs_outputs; /**< convenience, from vertex_shader */
-
- /* user-space vertex data, buffers */
- struct {
- /** vertex element/index buffer (ex: glDrawElements) */
- const void *elts;
- /** bytes per index (0, 1, 2 or 4) */
- unsigned eltSize;
-
- /** vertex arrays */
- const void *vbuffer[PIPE_ATTRIB_MAX];
-
- /** constant buffer (for vertex shader) */
- const void *constants;
- } user;
-
- /* Clip derived state:
- */
- float plane[12][4];
- unsigned nr_planes;
-
- boolean convert_wide_points; /**< convert wide points to tris? */
- boolean convert_wide_lines; /**< convert side lines to tris? */
-
- unsigned reduced_prim;
-
- /** TGSI program interpreter runtime state */
- struct tgsi_exec_machine machine;
-
- /* Vertex fetch internal state
- */
- struct {
- const ubyte *src_ptr[PIPE_ATTRIB_MAX];
- unsigned pitch[PIPE_ATTRIB_MAX];
- fetch_func fetch[PIPE_ATTRIB_MAX];
- unsigned nr_attrs;
- full_fetch_func fetch_func;
- } vertex_fetch;
-
- /* Post-tnl vertex cache:
- */
- struct {
- unsigned referenced; /**< bitfield */
- unsigned idx[VCACHE_SIZE + VCACHE_OVERFLOW];
- struct vertex_header *vertex[VCACHE_SIZE + VCACHE_OVERFLOW];
- unsigned overflow;
-
- /** To find space in the vertex cache: */
- struct vertex_header *(*get_vertex)( struct draw_context *draw,
- unsigned i );
- } vcache;
-
- /* Vertex shader queue:
- */
- struct {
- struct {
- unsigned elt; /**< index into the user's vertex arrays */
- struct vertex_header *dest; /**< points into vcache.vertex[] array */
- } queue[VS_QUEUE_LENGTH];
- unsigned queue_nr;
- } vs;
-
- /**
- * Run the vertex shader on all vertices in the vertex queue.
- */
- void (*shader_queue_flush)(struct draw_context *draw);
-
- /* Prim pipeline queue:
- */
- struct {
- /* Need to queue up primitives until their vertices have been
- * transformed by a vs queue flush.
- */
- struct prim_header queue[PRIM_QUEUE_LENGTH];
- unsigned queue_nr;
- } pq;
-
- int use_sse : 1;
-#ifdef MESA_LLVM
- struct gallivm_cpu_engine *engine;
-#endif
-
- void *driver_private;
-};
-
-
-
-extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
-extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
-extern struct draw_stage *draw_offset_stage( struct draw_context *context );
-extern struct draw_stage *draw_clip_stage( struct draw_context *context );
-extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
-extern struct draw_stage *draw_cull_stage( struct draw_context *context );
-extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
-extern struct draw_stage *draw_wide_stage( struct draw_context *context );
-extern struct draw_stage *draw_validate_stage( struct draw_context *context );
-
-
-extern void draw_free_temp_verts( struct draw_stage *stage );
-
-extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
-
-extern void draw_reset_vertex_ids( struct draw_context *draw );
-
-
-extern int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts );
-
-extern void draw_vertex_cache_invalidate( struct draw_context *draw );
-extern void draw_vertex_cache_unreference( struct draw_context *draw );
-extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw );
-
-
-extern void draw_vertex_shader_queue_flush( struct draw_context *draw );
-#ifdef MESA_LLVM
-extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw );
-#endif
-
-struct tgsi_exec_machine;
-
-extern void draw_update_vertex_fetch( struct draw_context *draw );
-
-
-#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
-#define DRAW_FLUSH_PRIM_QUEUE 0x2
-#define DRAW_FLUSH_VERTEX_CACHE 0x4
-#define DRAW_FLUSH_STATE_CHANGE 0x8
-#define DRAW_FLUSH_BACKEND 0x10
-
-
-void draw_do_flush( struct draw_context *draw, unsigned flags );
-
-
-
-/**
- * Get a writeable copy of a vertex.
- * \param stage drawing stage info
- * \param vert the vertex to copy (source)
- * \param idx index into stage's tmp[] array to put the copy (dest)
- * \return pointer to the copied vertex
- */
-static INLINE struct vertex_header *
-dup_vert( struct draw_stage *stage,
- const struct vertex_header *vert,
- unsigned idx )
-{
- struct vertex_header *tmp = stage->tmp[idx];
- const uint vsize = sizeof(struct vertex_header)
- + stage->draw->num_vs_outputs * 4 * sizeof(float);
- memcpy(tmp, vert, vsize);
- tmp->vertex_id = UNDEFINED_VERTEX_ID;
- return tmp;
-}
-
-static INLINE float
-dot4(const float *a, const float *b)
-{
- float result = (a[0]*b[0] +
- a[1]*b[1] +
- a[2]*b[2] +
- a[3]*b[3]);
-
- return result;
-}
-
-#endif /* DRAW_PRIVATE_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-/* Implement line stipple by cutting lines up into smaller lines.
- * There are hundreds of ways to implement line stipple, this is one
- * choice that should work in all situations, requires no state
- * manipulations, but with a penalty in terms of large amounts of
- * generated geometry.
- */
-
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
-
-/** Subclass of draw_stage */
-struct stipple_stage {
- struct draw_stage stage;
- float counter;
- uint pattern;
- uint factor;
-};
-
-
-static INLINE struct stipple_stage *
-stipple_stage(struct draw_stage *stage)
-{
- return (struct stipple_stage *) stage;
-}
-
-
-/**
- * Compute interpolated vertex attributes for 'dst' at position 't'
- * between 'v0' and 'v1'.
- * XXX using linear interpolation for all attribs at this time.
- */
-static void
-screen_interp( struct draw_context *draw,
- struct vertex_header *dst,
- float t,
- const struct vertex_header *v0,
- const struct vertex_header *v1 )
-{
- uint attr;
- for (attr = 0; attr < draw->num_vs_outputs; attr++) {
- const float *val0 = v0->data[attr];
- const float *val1 = v1->data[attr];
- float *newv = dst->data[attr];
- uint i;
- for (i = 0; i < 4; i++) {
- newv[i] = val0[i] + t * (val1[i] - val0[i]);
- }
- }
-}
-
-
-static void
-emit_segment(struct draw_stage *stage, struct prim_header *header,
- float t0, float t1)
-{
- struct vertex_header *v0new = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1new = dup_vert(stage, header->v[1], 1);
- struct prim_header newprim = *header;
-
- if (t0 > 0.0) {
- screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] );
- newprim.v[0] = v0new;
- }
-
- if (t1 < 1.0) {
- screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] );
- newprim.v[1] = v1new;
- }
-
- stage->next->line( stage->next, &newprim );
-}
-
-
-static INLINE unsigned
-stipple_test(int counter, ushort pattern, int factor)
-{
- int b = (counter / factor) & 0xf;
- return (1 << b) & pattern;
-}
-
-
-static void
-stipple_line(struct draw_stage *stage, struct prim_header *header)
-{
- struct stipple_stage *stipple = stipple_stage(stage);
- struct vertex_header *v0 = header->v[0];
- struct vertex_header *v1 = header->v[1];
- const float *pos0 = v0->data[0];
- const float *pos1 = v1->data[0];
- float start = 0;
- int state = 0;
-
- float x0 = pos0[0];
- float x1 = pos1[0];
- float y0 = pos0[1];
- float y1 = pos1[1];
-
- float dx = x0 > x1 ? x0 - x1 : x1 - x0;
- float dy = y0 > y1 ? y0 - y1 : y1 - y0;
-
- float length = MAX2(dx, dy);
- int i;
-
- /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table.
- */
- for (i = 0; i < length; i++) {
- int result = stipple_test( (int) stipple->counter+i,
- (ushort) stipple->pattern, stipple->factor );
- if (result != state) {
- /* changing from "off" to "on" or vice versa */
- if (state) {
- if (start != i) {
- /* finishing an "on" segment */
- emit_segment( stage, header, start / length, i / length );
- }
- }
- else {
- /* starting an "on" segment */
- start = (float) i;
- }
- state = result;
- }
- }
-
- if (state && start < length)
- emit_segment( stage, header, start / length, 1.0 );
-
- stipple->counter += length;
-}
-
-
-static void
-reset_stipple_counter(struct draw_stage *stage)
-{
- struct stipple_stage *stipple = stipple_stage(stage);
- stipple->counter = 0;
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void
-stipple_first_line(struct draw_stage *stage,
- struct prim_header *header)
-{
- struct stipple_stage *stipple = stipple_stage(stage);
- struct draw_context *draw = stage->draw;
-
- stipple->pattern = draw->rasterizer->line_stipple_pattern;
- stipple->factor = draw->rasterizer->line_stipple_factor + 1;
-
- stage->line = stipple_line;
- stage->line( stage, header );
-}
-
-
-static void
-stipple_flush(struct draw_stage *stage, unsigned flags)
-{
- stage->line = stipple_first_line;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void
-passthrough_point(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void
-passthrough_tri(struct draw_stage *stage, struct prim_header *header)
-{
- stage->next->tri(stage->next, header);
-}
-
-
-static void
-stipple_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Create line stippler stage
- */
-struct draw_stage *draw_stipple_stage( struct draw_context *draw )
-{
- struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
-
- draw_alloc_temp_verts( &stipple->stage, 2 );
-
- stipple->stage.draw = draw;
- stipple->stage.next = NULL;
- stipple->stage.point = passthrough_point;
- stipple->stage.line = stipple_first_line;
- stipple->stage.tri = passthrough_tri;
- stipple->stage.reset_stipple_counter = reset_stipple_counter;
- stipple->stage.flush = stipple_flush;
- stipple->stage.destroy = stipple_destroy;
-
- return &stipple->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
-
-struct twoside_stage {
- struct draw_stage stage;
- float sign; /**< +1 or -1 */
- uint attrib_front0, attrib_back0;
- uint attrib_front1, attrib_back1;
-};
-
-
-static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
-{
- return (struct twoside_stage *)stage;
-}
-
-
-
-
-/**
- * Copy back color(s) to front color(s).
- */
-static INLINE struct vertex_header *
-copy_bfc( struct twoside_stage *twoside,
- const struct vertex_header *v,
- unsigned idx )
-{
- struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx );
-
- if (twoside->attrib_back0) {
- COPY_4FV(tmp->data[twoside->attrib_front0],
- tmp->data[twoside->attrib_back0]);
- }
- if (twoside->attrib_back1) {
- COPY_4FV(tmp->data[twoside->attrib_front1],
- tmp->data[twoside->attrib_back1]);
- }
-
- return tmp;
-}
-
-
-/* Twoside tri:
- */
-static void twoside_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct twoside_stage *twoside = twoside_stage(stage);
-
- if (header->det * twoside->sign < 0.0) {
- /* this is a back-facing triangle */
- struct prim_header tmp;
-
- tmp.det = header->det;
- tmp.edgeflags = header->edgeflags;
- /* copy back attribs to front attribs */
- tmp.v[0] = copy_bfc(twoside, header->v[0], 0);
- tmp.v[1] = copy_bfc(twoside, header->v[1], 1);
- tmp.v[2] = copy_bfc(twoside, header->v[2], 2);
-
- stage->next->tri( stage->next, &tmp );
- }
- else {
- stage->next->tri( stage->next, header );
- }
-}
-
-
-static void twoside_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* pass-through */
- stage->next->line( stage->next, header );
-}
-
-
-static void twoside_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- /* pass-through */
- stage->next->point( stage->next, header );
-}
-
-
-static void twoside_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct twoside_stage *twoside = twoside_stage(stage);
- const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
- uint i;
-
- twoside->attrib_front0 = 0;
- twoside->attrib_front1 = 0;
- twoside->attrib_back0 = 0;
- twoside->attrib_back1 = 0;
-
- /* Find which vertex shader outputs are front/back colors */
- for (i = 0; i < vs->num_outputs; i++) {
- if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
- if (vs->output_semantic_index[i] == 0)
- twoside->attrib_front0 = i;
- else
- twoside->attrib_front1 = i;
- }
- if (vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
- if (vs->output_semantic_index[i] == 0)
- twoside->attrib_back0 = i;
- else
- twoside->attrib_back1 = i;
- }
- }
-
- if (!twoside->attrib_back0)
- twoside->attrib_front0 = 0;
-
- if (!twoside->attrib_back1)
- twoside->attrib_front1 = 0;
-
- /*
- * We'll multiply the primitive's determinant by this sign to determine
- * if the triangle is back-facing (negative).
- * sign = -1 for CCW, +1 for CW
- */
- twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f;
-
- stage->tri = twoside_tri;
- stage->tri( stage, header );
-}
-
-
-static void twoside_flush( struct draw_stage *stage, unsigned flags )
-{
- stage->tri = twoside_first_tri;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void twoside_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void twoside_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Create twoside pipeline stage.
- */
-struct draw_stage *draw_twoside_stage( struct draw_context *draw )
-{
- struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
-
- draw_alloc_temp_verts( &twoside->stage, 3 );
-
- twoside->stage.draw = draw;
- twoside->stage.next = NULL;
- twoside->stage.point = twoside_point;
- twoside->stage.line = twoside_line;
- twoside->stage.tri = twoside_first_tri;
- twoside->stage.flush = twoside_flush;
- twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
- twoside->stage.destroy = twoside_destroy;
-
- return &twoside->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \brief Drawing stage for handling glPolygonMode(line/point).
- * Convert triangles to points or lines as needed.
- */
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "draw_private.h"
-
-
-struct unfilled_stage {
- struct draw_stage stage;
-
- /** [0] = front face, [1] = back face.
- * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE,
- * and PIPE_POLYGON_MODE_POINT,
- */
- unsigned mode[2];
-};
-
-
-static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
-{
- return (struct unfilled_stage *)stage;
-}
-
-
-
-static void point( struct draw_stage *stage,
- struct vertex_header *v0 )
-{
- struct prim_header tmp;
- tmp.v[0] = v0;
- stage->next->point( stage->next, &tmp );
-}
-
-static void line( struct draw_stage *stage,
- struct vertex_header *v0,
- struct vertex_header *v1 )
-{
- struct prim_header tmp;
- tmp.v[0] = v0;
- tmp.v[1] = v1;
- stage->next->line( stage->next, &tmp );
-}
-
-
-static void points( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct vertex_header *v0 = header->v[0];
- struct vertex_header *v1 = header->v[1];
- struct vertex_header *v2 = header->v[2];
-
- if (header->edgeflags & 0x1) point( stage, v0 );
- if (header->edgeflags & 0x2) point( stage, v1 );
- if (header->edgeflags & 0x4) point( stage, v2 );
-}
-
-
-static void lines( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct vertex_header *v0 = header->v[0];
- struct vertex_header *v1 = header->v[1];
- struct vertex_header *v2 = header->v[2];
-
-#if 0
- assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag);
- assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag);
- assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag);
-#endif
-
- if (header->edgeflags & 0x1) line( stage, v0, v1 );
- if (header->edgeflags & 0x2) line( stage, v1, v2 );
- if (header->edgeflags & 0x4) line( stage, v2, v0 );
-}
-
-
-/* Unfilled tri:
- *
- * Note edgeflags in the vertex struct is not sufficient as we will
- * need to manipulate them when decomposing primitives???
- */
-static void unfilled_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct unfilled_stage *unfilled = unfilled_stage(stage);
- unsigned mode = unfilled->mode[header->det >= 0.0];
-
- switch (mode) {
- case PIPE_POLYGON_MODE_FILL:
- stage->next->tri( stage->next, header );
- break;
- case PIPE_POLYGON_MODE_LINE:
- lines( stage, header );
- break;
- case PIPE_POLYGON_MODE_POINT:
- points( stage, header );
- break;
- default:
- abort();
- }
-}
-
-
-static void unfilled_first_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct unfilled_stage *unfilled = unfilled_stage(stage);
-
- unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */
- unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */
-
- stage->tri = unfilled_tri;
- stage->tri( stage, header );
-}
-
-
-static void unfilled_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line( stage->next, header );
-}
-
-
-static void unfilled_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-
-static void unfilled_flush( struct draw_stage *stage,
- unsigned flags )
-{
- stage->next->flush( stage->next, flags );
-
- stage->tri = unfilled_first_tri;
-}
-
-
-static void unfilled_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void unfilled_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-/**
- * Create unfilled triangle stage.
- */
-struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
-{
- struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage);
-
- draw_alloc_temp_verts( &unfilled->stage, 0 );
-
- unfilled->stage.draw = draw;
- unfilled->stage.next = NULL;
- unfilled->stage.tmp = NULL;
- unfilled->stage.point = unfilled_point;
- unfilled->stage.line = unfilled_line;
- unfilled->stage.tri = unfilled_first_tri;
- unfilled->stage.flush = unfilled_flush;
- unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
- unfilled->stage.destroy = unfilled_destroy;
-
- return &unfilled->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "draw_private.h"
-
-
-
-
-
-/**
- * Rebuild the rendering pipeline.
- */
-static struct draw_stage *validate_pipeline( struct draw_stage *stage )
-{
- struct draw_context *draw = stage->draw;
- struct draw_stage *next = draw->pipeline.rasterize;
- int need_det = 0;
- int precalc_flat = 0;
-
- /* Set the validate's next stage to the rasterize stage, so that it
- * can be found later if needed for flushing.
- */
- stage->next = next;
-
- /*
- * NOTE: we build up the pipeline in end-to-start order.
- *
- * TODO: make the current primitive part of the state and build
- * shorter pipelines for lines & points.
- */
-
- if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) ||
- (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) ||
- draw->rasterizer->point_sprite) {
- draw->pipeline.wide->next = next;
- next = draw->pipeline.wide;
- }
-
- if (draw->rasterizer->line_stipple_enable) {
- draw->pipeline.stipple->next = next;
- next = draw->pipeline.stipple;
- precalc_flat = 1; /* only needed for lines really */
- }
-
- if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
- draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) {
- draw->pipeline.unfilled->next = next;
- next = draw->pipeline.unfilled;
- precalc_flat = 1; /* only needed for triangles really */
- need_det = 1;
- }
-
- if (draw->rasterizer->flatshade && precalc_flat) {
- draw->pipeline.flatshade->next = next;
- next = draw->pipeline.flatshade;
- }
-
- if (draw->rasterizer->offset_cw ||
- draw->rasterizer->offset_ccw) {
- draw->pipeline.offset->next = next;
- next = draw->pipeline.offset;
- need_det = 1;
- }
-
- if (draw->rasterizer->light_twoside) {
- draw->pipeline.twoside->next = next;
- next = draw->pipeline.twoside;
- need_det = 1;
- }
-
- /* Always run the cull stage as we calculate determinant there
- * also.
- *
- * This can actually be a win as culling out the triangles can lead
- * to less work emitting vertices, smaller vertex buffers, etc.
- * It's difficult to say whether this will be true in general.
- */
- if (need_det || draw->rasterizer->cull_mode) {
- draw->pipeline.cull->next = next;
- next = draw->pipeline.cull;
- }
-
- /* Clip stage
- */
- if (!draw->rasterizer->bypass_clipping)
- {
- draw->pipeline.clip->next = next;
- next = draw->pipeline.clip;
- }
-
-
- draw->pipeline.first = next;
- return next;
-}
-
-static void validate_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct draw_stage *pipeline = validate_pipeline( stage );
- pipeline->tri( pipeline, header );
-}
-
-static void validate_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct draw_stage *pipeline = validate_pipeline( stage );
- pipeline->line( pipeline, header );
-}
-
-static void validate_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct draw_stage *pipeline = validate_pipeline( stage );
- pipeline->point( pipeline, header );
-}
-
-static void validate_reset_stipple_counter( struct draw_stage *stage )
-{
- struct draw_stage *pipeline = validate_pipeline( stage );
- pipeline->reset_stipple_counter( pipeline );
-}
-
-static void validate_flush( struct draw_stage *stage,
- unsigned flags )
-{
- /* May need to pass a backend flush on to the rasterize stage.
- */
- if (stage->next)
- stage->next->flush( stage->next, flags );
-}
-
-
-static void validate_destroy( struct draw_stage *stage )
-{
- FREE( stage );
-}
-
-
-/**
- * Create validate pipeline stage.
- */
-struct draw_stage *draw_validate_stage( struct draw_context *draw )
-{
- struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
-
- stage->draw = draw;
- stage->next = NULL;
- stage->point = validate_point;
- stage->line = validate_line;
- stage->tri = validate_tri;
- stage->flush = validate_flush;
- stage->reset_stipple_counter = validate_reset_stipple_counter;
- stage->destroy = validate_destroy;
-
- return stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Vertex buffer drawing stage.
- *
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
- * \author Keith Whitwell <keith@tungstengraphics.com>
- */
-
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-
-#include "draw_vbuf.h"
-#include "draw_private.h"
-#include "draw_vertex.h"
-#include "draw_vf.h"
-
-
-/**
- * Vertex buffer emit stage.
- */
-struct vbuf_stage {
- struct draw_stage stage; /**< This must be first (base class) */
-
- struct vbuf_render *render;
-
- const struct vertex_info *vinfo;
-
- /** Vertex size in bytes */
- unsigned vertex_size;
-
- struct draw_vertex_fetch *vf;
-
- /* FIXME: we have no guarantee that 'unsigned' is 32bit */
-
- /** Vertices in hardware format */
- unsigned *vertices;
- unsigned *vertex_ptr;
- unsigned max_vertices;
- unsigned nr_vertices;
-
- /** Indices */
- ushort *indices;
- unsigned max_indices;
- unsigned nr_indices;
-
- /** Pipe primitive */
- unsigned prim;
-};
-
-
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct vbuf_stage *
-vbuf_stage( struct draw_stage *stage )
-{
- assert(stage);
- return (struct vbuf_stage *)stage;
-}
-
-
-static void vbuf_flush_indices( struct vbuf_stage *vbuf );
-static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
-static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
-
-
-static INLINE boolean
-overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
-{
- unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
- return (used + bytes) > bufsz;
-}
-
-
-static INLINE void
-check_space( struct vbuf_stage *vbuf, unsigned nr )
-{
- if (vbuf->nr_vertices + nr > vbuf->max_vertices ) {
- vbuf_flush_vertices(vbuf);
- vbuf_alloc_vertices(vbuf);
- }
-
- if (vbuf->nr_indices + nr > vbuf->max_indices )
- vbuf_flush_indices(vbuf);
-}
-
-
-#if 0
-static INLINE void
-dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
-{
- assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
- unsigned i, j, k;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- debug_printf("EMIT_OMIT:");
- break;
- case EMIT_ALL:
- assert(i == 0);
- assert(j == 0);
- debug_printf("EMIT_ALL:\t");
- for(k = 0; k < vinfo->size*4; ++k)
- debug_printf("%02x ", *data++);
- break;
- case EMIT_1F:
- debug_printf("EMIT_1F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_1F_PSIZE:
- debug_printf("EMIT_1F_PSIZE:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_2F:
- debug_printf("EMIT_2F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_3F:
- debug_printf("EMIT_3F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- data += sizeof(float);
- break;
- case EMIT_4F:
- debug_printf("EMIT_4F:\t");
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- debug_printf("%f ", *(float *)data); data += sizeof(float);
- break;
- case EMIT_4UB:
- debug_printf("EMIT_4UB:\t");
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- debug_printf("%u ", *data++);
- break;
- default:
- assert(0);
- }
- debug_printf("\n");
- }
- debug_printf("\n");
-}
-#endif
-
-
-/**
- * Extract the needed fields from post-transformed vertex and emit
- * a hardware(driver) vertex.
- * Recall that the vertices are constructed by the 'draw' module and
- * have a couple of slots at the beginning (1-dword header, 4-dword
- * clip pos) that we ignore here. We only use the vertex->data[] fields.
- */
-static INLINE void
-emit_vertex( struct vbuf_stage *vbuf,
- struct vertex_header *vertex )
-{
-#if 0
- debug_printf("emit vertex %d to %p\n",
- vbuf->nr_vertices, vbuf->vertex_ptr);
-#endif
-
- if(vertex->vertex_id != UNDEFINED_VERTEX_ID) {
- if(vertex->vertex_id < vbuf->nr_vertices)
- return;
- else
- debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n",
- vertex->vertex_id, vbuf->nr_vertices);
- return;
- }
-
- vertex->vertex_id = vbuf->nr_vertices++;
-
- if(!vbuf->vf) {
- const struct vertex_info *vinfo = vbuf->vinfo;
- uint i;
- uint count = 0; /* for debug/sanity */
-
- assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- uint j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- /* no-op */
- break;
- case EMIT_ALL:
- /* just copy the whole vertex as-is to the vbuf */
- assert(i == 0);
- assert(j == 0);
- memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4);
- vbuf->vertex_ptr += vinfo->size;
- count += vinfo->size;
- break;
- case EMIT_1F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- count++;
- break;
- case EMIT_1F_PSIZE:
- *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size);
- count++;
- break;
- case EMIT_2F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- count += 2;
- break;
- case EMIT_3F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- count += 3;
- break;
- case EMIT_4F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][3]);
- count += 4;
- break;
- case EMIT_4UB:
- *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ),
- float_to_ubyte( vertex->data[j][1] ),
- float_to_ubyte( vertex->data[j][0] ),
- float_to_ubyte( vertex->data[j][3] ));
- count += 1;
- break;
- default:
- assert(0);
- }
- }
- assert(count == vinfo->size);
-#if 0
- {
- static float data[256];
- draw_vf_emit_vertex(vbuf->vf, vertex, data);
- if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) {
- debug_printf("With VF:\n");
- dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data);
- debug_printf("Without VF:\n");
- dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size);
- assert(0);
- }
- }
-#endif
- }
- else {
- draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr);
-
- vbuf->vertex_ptr += vbuf->vertex_size/4;
- }
-}
-
-
-static void
-vbuf_tri( struct draw_stage *stage,
- struct prim_header *prim )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
- unsigned i;
-
- check_space( vbuf, 3 );
-
- for (i = 0; i < 3; i++) {
- emit_vertex( vbuf, prim->v[i] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
- }
-}
-
-
-static void
-vbuf_line( struct draw_stage *stage,
- struct prim_header *prim )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
- unsigned i;
-
- check_space( vbuf, 2 );
-
- for (i = 0; i < 2; i++) {
- emit_vertex( vbuf, prim->v[i] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
- }
-}
-
-
-static void
-vbuf_point( struct draw_stage *stage,
- struct prim_header *prim )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
-
- check_space( vbuf, 1 );
-
- emit_vertex( vbuf, prim->v[0] );
-
- vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id;
-}
-
-
-/**
- * Set the prim type for subsequent vertices.
- * This may result in a new vertex size. The existing vbuffer (if any)
- * will be flushed if needed and a new one allocated.
- */
-static void
-vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim )
-{
- const struct vertex_info *vinfo;
- unsigned vertex_size;
-
- assert(newprim == PIPE_PRIM_POINTS ||
- newprim == PIPE_PRIM_LINES ||
- newprim == PIPE_PRIM_TRIANGLES);
-
- vbuf->prim = newprim;
- vbuf->render->set_primitive(vbuf->render, newprim);
-
- vinfo = vbuf->render->get_vertex_info(vbuf->render);
- vertex_size = vinfo->size * sizeof(float);
-
- if (vertex_size != vbuf->vertex_size)
- vbuf_flush_vertices(vbuf);
-
- vbuf->vinfo = vinfo;
- vbuf->vertex_size = vertex_size;
- if(vbuf->vf)
- draw_vf_set_vertex_info(vbuf->vf,
- vbuf->vinfo,
- vbuf->stage.draw->rasterizer->point_size);
-
- if (!vbuf->vertices)
- vbuf_alloc_vertices(vbuf);
-}
-
-
-static void
-vbuf_first_tri( struct draw_stage *stage,
- struct prim_header *prim )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
-
- vbuf_flush_indices( vbuf );
- stage->tri = vbuf_tri;
- vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES);
- stage->tri( stage, prim );
-}
-
-
-static void
-vbuf_first_line( struct draw_stage *stage,
- struct prim_header *prim )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
-
- vbuf_flush_indices( vbuf );
- stage->line = vbuf_line;
- vbuf_set_prim(vbuf, PIPE_PRIM_LINES);
- stage->line( stage, prim );
-}
-
-
-static void
-vbuf_first_point( struct draw_stage *stage,
- struct prim_header *prim )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
-
- vbuf_flush_indices( vbuf );
- stage->point = vbuf_point;
- vbuf_set_prim(vbuf, PIPE_PRIM_POINTS);
- stage->point( stage, prim );
-}
-
-
-static void
-vbuf_flush_indices( struct vbuf_stage *vbuf )
-{
- if(!vbuf->nr_indices)
- return;
-
- assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
- vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
-
- switch(vbuf->prim) {
- case PIPE_PRIM_POINTS:
- break;
- case PIPE_PRIM_LINES:
- assert(vbuf->nr_indices % 2 == 0);
- break;
- case PIPE_PRIM_TRIANGLES:
- assert(vbuf->nr_indices % 3 == 0);
- break;
- default:
- assert(0);
- }
-
- vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
-
- vbuf->nr_indices = 0;
-
- /* don't need to reset point/line/tri functions */
-#if 0
- stage->point = vbuf_first_point;
- stage->line = vbuf_first_line;
- stage->tri = vbuf_first_tri;
-#endif
-}
-
-
-/**
- * Flush existing vertex buffer and allocate a new one.
- *
- * XXX: We separate flush-on-index-full and flush-on-vb-full, but may
- * raise issues uploading vertices if the hardware wants to flush when
- * we flush.
- */
-static void
-vbuf_flush_vertices( struct vbuf_stage *vbuf )
-{
- if(vbuf->vertices) {
- vbuf_flush_indices(vbuf);
-
- /* Reset temporary vertices ids */
- if(vbuf->nr_vertices)
- draw_reset_vertex_ids( vbuf->stage.draw );
-
- /* Free the vertex buffer */
- vbuf->render->release_vertices(vbuf->render,
- vbuf->vertices,
- vbuf->vertex_size,
- vbuf->nr_vertices);
- vbuf->max_vertices = vbuf->nr_vertices = 0;
- vbuf->vertex_ptr = vbuf->vertices = NULL;
-
- }
-}
-
-
-static void
-vbuf_alloc_vertices( struct vbuf_stage *vbuf )
-{
- assert(!vbuf->nr_indices);
- assert(!vbuf->vertices);
-
- /* Allocate a new vertex buffer */
- vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
- vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
- (ushort) vbuf->vertex_size,
- (ushort) vbuf->max_vertices);
- vbuf->vertex_ptr = vbuf->vertices;
-}
-
-
-
-static void
-vbuf_flush( struct draw_stage *stage, unsigned flags )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
-
- vbuf_flush_indices( vbuf );
-
- stage->point = vbuf_first_point;
- stage->line = vbuf_first_line;
- stage->tri = vbuf_first_tri;
-
- if (flags & DRAW_FLUSH_BACKEND)
- vbuf_flush_vertices( vbuf );
-}
-
-
-static void
-vbuf_reset_stipple_counter( struct draw_stage *stage )
-{
- /* XXX: Need to do something here for hardware with linestipple.
- */
- (void) stage;
-}
-
-
-static void vbuf_destroy( struct draw_stage *stage )
-{
- struct vbuf_stage *vbuf = vbuf_stage( stage );
-
- if(vbuf->indices)
- align_free( vbuf->indices );
-
- if(vbuf->vf)
- draw_vf_destroy( vbuf->vf );
-
- if (vbuf->render)
- vbuf->render->destroy( vbuf->render );
-
- FREE( stage );
-}
-
-
-/**
- * Create a new primitive vbuf/render stage.
- */
-struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
- struct vbuf_render *render )
-{
- struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
-
- if(!vbuf)
- return NULL;
-
- vbuf->stage.draw = draw;
- vbuf->stage.point = vbuf_first_point;
- vbuf->stage.line = vbuf_first_line;
- vbuf->stage.tri = vbuf_first_tri;
- vbuf->stage.flush = vbuf_flush;
- vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
- vbuf->stage.destroy = vbuf_destroy;
-
- vbuf->render = render;
-
- assert(render->max_indices < UNDEFINED_VERTEX_ID);
- vbuf->max_indices = render->max_indices;
- vbuf->indices = (ushort *)
- align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 );
- if(!vbuf->indices)
- vbuf_destroy(&vbuf->stage);
-
- vbuf->vertices = NULL;
- vbuf->vertex_ptr = vbuf->vertices;
-
- vbuf->prim = ~0;
-
- if(!GETENV("GALLIUM_NOVF"))
- vbuf->vf = draw_vf_create();
-
- return &vbuf->stage;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Vertex buffer drawing stage.
- *
- * \author Keith Whitwell <keith@tungstengraphics.com>
- * \author José Fonseca <jrfonsec@tungstengraphics.com>
- */
-
-#ifndef DRAW_VBUF_H_
-#define DRAW_VBUF_H_
-
-
-#include "pipe/p_util.h"
-
-
-struct draw_context;
-struct vertex_info;
-
-
-/**
- * Interface for hardware vertex buffer rendering.
- */
-struct vbuf_render {
-
- /**
- * Driver limits. May be tuned lower to improve cache hits on
- * index list.
- */
- unsigned max_indices;
- unsigned max_vertex_buffer_bytes;
-
- /**
- * Get the hardware vertex format.
- *
- * XXX: have this in draw_context instead?
- */
- const struct vertex_info *(*get_vertex_info)( struct vbuf_render * );
-
- /**
- * Request a destination for vertices.
- * Hardware renderers will use ttm memory, others will just malloc
- * something.
- */
- void *(*allocate_vertices)( struct vbuf_render *,
- ushort vertex_size,
- ushort nr_vertices );
-
- /**
- * Notify the renderer of the current primitive when it changes.
- * Prim is restricted to TRIANGLES, LINES and POINTS.
- */
- void (*set_primitive)( struct vbuf_render *, unsigned prim );
-
- /**
- * DrawElements, note indices are ushort:
- */
- void (*draw)( struct vbuf_render *,
- const ushort *indices,
- uint nr_indices );
-
- /**
- * Called when vbuf is done with this set of vertices:
- */
- void (*release_vertices)( struct vbuf_render *,
- void *vertices,
- unsigned vertex_size,
- unsigned vertices_used );
-
- void (*destroy)( struct vbuf_render * );
-};
-
-
-
-struct draw_stage *
-draw_vbuf_stage( struct draw_context *draw,
- struct vbuf_render *render );
-
-
-#endif /*DRAW_VBUF_H_*/
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/*
- * Functions for specifying the post-transformation vertex layout.
- *
- * Author:
- * Brian Paul
- * Keith Whitwell
- */
-
-
-#include "draw/draw_private.h"
-#include "draw/draw_vertex.h"
-
-
-/**
- * Compute the size of a vertex, in dwords/floats, to update the
- * vinfo->size field.
- */
-void
-draw_compute_vertex_size(struct vertex_info *vinfo)
-{
- uint i;
-
- vinfo->size = 0;
- for (i = 0; i < vinfo->num_attribs; i++) {
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- break;
- case EMIT_4UB:
- /* fall-through */
- case EMIT_1F_PSIZE:
- /* fall-through */
- case EMIT_1F:
- vinfo->size += 1;
- break;
- case EMIT_2F:
- vinfo->size += 2;
- break;
- case EMIT_3F:
- vinfo->size += 3;
- break;
- case EMIT_4F:
- vinfo->size += 4;
- break;
- case EMIT_ALL:
- /* fall-through */
- default:
- assert(0);
- }
- }
-
- assert(vinfo->size * 4 <= MAX_VERTEX_SIZE);
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Post-transform vertex format info. The vertex_info struct is used by
- * the draw_vbuf code to emit hardware-specific vertex layouts into hw
- * vertex buffers.
- *
- * Author:
- * Brian Paul
- */
-
-
-#ifndef DRAW_VERTEX_H
-#define DRAW_VERTEX_H
-
-
-#include "pipe/p_state.h"
-
-
-/**
- * Vertex attribute emit modes
- */
-enum attrib_emit {
- EMIT_OMIT, /**< don't emit the attribute */
- EMIT_ALL, /**< emit whole post-xform vertex, w/ header */
- EMIT_1F,
- EMIT_1F_PSIZE, /**< insert constant point size */
- EMIT_2F,
- EMIT_3F,
- EMIT_4F,
- EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */
-};
-
-
-/**
- * Attribute interpolation mode
- */
-enum interp_mode {
- INTERP_NONE, /**< never interpolate vertex header info */
- INTERP_POS, /**< special case for frag position */
- INTERP_CONSTANT,
- INTERP_LINEAR,
- INTERP_PERSPECTIVE
-};
-
-
-/**
- * Information about hardware/rasterization vertex layout.
- */
-struct vertex_info
-{
- uint num_attribs;
- uint hwfmt[4]; /**< hardware format info for this format */
- enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS];
- enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */
- uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */
- uint size; /**< total vertex size in dwords */
-};
-
-
-
-/**
- * Add another attribute to the given vertex_info object.
- * \param src_index indicates which post-transformed vertex attrib slot
- * corresponds to this attribute.
- * \return slot in which the attribute was added
- */
-static INLINE uint
-draw_emit_vertex_attr(struct vertex_info *vinfo,
- enum attrib_emit emit, enum interp_mode interp,
- uint src_index)
-{
- const uint n = vinfo->num_attribs;
- assert(n < PIPE_MAX_SHADER_INPUTS);
- vinfo->emit[n] = emit;
- vinfo->interp_mode[n] = interp;
- vinfo->src_index[n] = src_index;
- vinfo->num_attribs++;
- return n;
-}
-
-
-extern void draw_compute_vertex_size(struct vertex_info *vinfo);
-
-
-#endif /* DRAW_VERTEX_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-void draw_vertex_cache_invalidate( struct draw_context *draw )
-{
- assert(draw->pq.queue_nr == 0);
- assert(draw->vs.queue_nr == 0);
- assert(draw->vcache.referenced == 0);
-
- memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
-}
-
-
-/**
- * Check if vertex is in cache, otherwise add it. It won't go through
- * VS yet, not until there is a flush operation or the VS queue fills up.
- *
- * Note that cache entries are basically just two pointers: the first
- * an index into the user's vertex arrays, the second a location in
- * the vertex shader cache for the post-transformed vertex.
- *
- * \return pointer to location of (post-transformed) vertex header in the cache
- */
-static struct vertex_header *get_vertex( struct draw_context *draw,
- unsigned i )
-{
- unsigned slot = (i + (i>>5)) % VCACHE_SIZE;
-
- assert(slot < 32); /* so we don't exceed the bitfield size below */
-
- /* Cache miss?
- */
- if (draw->vcache.idx[slot] != i) {
-
- /* If slot is in use, use the overflow area:
- */
- if (draw->vcache.referenced & (1 << slot)) {
- slot = VCACHE_SIZE + draw->vcache.overflow++;
- }
-
- assert(slot < Elements(draw->vcache.idx));
-
- draw->vcache.idx[slot] = i;
-
- /* Add to vertex shader queue:
- */
- assert(draw->vs.queue_nr < VS_QUEUE_LENGTH);
- draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot];
- draw->vs.queue[draw->vs.queue_nr].elt = i;
- draw->vs.queue_nr++;
-
- /* Need to set the vertex's edge flag here. If we're being called
- * by do_ef_triangle(), that function needs edge flag info!
- */
- draw->vcache.vertex[slot]->clipmask = 0;
- draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */
- draw->vcache.vertex[slot]->pad = 0;
- draw->vcache.vertex[slot]->vertex_id = UNDEFINED_VERTEX_ID;
- }
-
-
- /* primitive flushing may have cleared the bitfield but did not
- * clear the idx[] array values. Set the bit now. This fixes a
- * bug found when drawing long triangle fans.
- */
- draw->vcache.referenced |= (1 << slot);
- return draw->vcache.vertex[slot];
-}
-
-
-static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const unsigned *elts = (const unsigned *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const ushort *elts = (const ushort *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw,
- unsigned i )
-{
- const ubyte *elts = (const ubyte *) draw->user.elts;
- return get_vertex( draw, elts[i] );
-}
-
-
-void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw )
-{
- unsigned i;
-
- for (i = 0; i < Elements(draw->vcache.vertex); i++)
- draw->vcache.vertex[i]->vertex_id = UNDEFINED_VERTEX_ID;
-}
-
-
-void draw_vertex_cache_unreference( struct draw_context *draw )
-{
- draw->vcache.referenced = 0;
- draw->vcache.overflow = 0;
-}
-
-
-int draw_vertex_cache_check_space( struct draw_context *draw,
- unsigned nr_verts )
-{
- if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) {
- /* The vs queue is sized so that this can never happen:
- */
- assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH);
- return TRUE;
- }
- else
- return FALSE;
-}
-
-
-
-/**
- * Tell the drawing context about the index/element buffer to use
- * (ala glDrawElements)
- * If no element buffer is to be used (i.e. glDrawArrays) then this
- * should be called with eltSize=0 and elements=NULL.
- *
- * \param draw the drawing context
- * \param eltSize size of each element (1, 2 or 4 bytes)
- * \param elements the element buffer ptr
- */
-void
-draw_set_mapped_element_buffer( struct draw_context *draw,
- unsigned eltSize, void *elements )
-{
-// draw_statechange( draw );
-
- /* choose the get_vertex() function to use */
- switch (eltSize) {
- case 0:
- draw->vcache.get_vertex = get_vertex;
- break;
- case 1:
- draw->vcache.get_vertex = get_ubyte_elt_vertex;
- break;
- case 2:
- draw->vcache.get_vertex = get_ushort_elt_vertex;
- break;
- case 4:
- draw->vcache.get_vertex = get_uint_elt_vertex;
- break;
- default:
- assert(0);
- }
- draw->user.elts = elements;
- draw->user.eltSize = eltSize;
-}
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-#include "draw_context.h"
-
-
-#define DRAW_DBG 0
-
-
-/**
- * Fetch a float[4] vertex attribute from memory, doing format/type
- * conversion as needed.
- *
- * This is probably needed/dupliocated elsewhere, eg format
- * conversion, texture sampling etc.
- */
-#define FETCH_ATTRIB( NAME, SZ, CVT ) \
-static void \
-fetch_##NAME(const void *ptr, float *attrib) \
-{ \
- static const float defaults[4] = { 0,0,0,1 }; \
- int i; \
- \
- for (i = 0; i < SZ; i++) { \
- attrib[i] = CVT; \
- } \
- \
- for (; i < 4; i++) { \
- attrib[i] = defaults[i]; \
- } \
-}
-
-#define CVT_64_FLOAT (float) ((double *) ptr)[i]
-#define CVT_32_FLOAT ((float *) ptr)[i]
-
-#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
-#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
-#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
-
-#define CVT_8_SSCALED (float) ((char *) ptr)[i]
-#define CVT_16_SSCALED (float) ((short *) ptr)[i]
-#define CVT_32_SSCALED (float) ((int *) ptr)[i]
-
-#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
-#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
-#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
-
-#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
-#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
-#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
-
-FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
-FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
-FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
-
-FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
-FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
-FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
-
-FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
-FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
-FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
-
-FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
-FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
-FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
-
-FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
-FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
-FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
-
-FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
-FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
-FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
-
-FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
-FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
-FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
-
-FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
-FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
-FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
-
-FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
-FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
-FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
-
-FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
-FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
-FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
-
-FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
-FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
-FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
-
-FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
-FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
-FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
-
-FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
-FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
-
-FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
-FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
-FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
-
-FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
-//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
-
-
-
-static fetch_func get_fetch_func( enum pipe_format format )
-{
-#if 0
- {
- char tmp[80];
- pf_sprint_name(tmp, format);
- debug_printf("%s: %s\n", __FUNCTION__, tmp);
- }
-#endif
-
- switch (format) {
- case PIPE_FORMAT_R64_FLOAT:
- return fetch_R64_FLOAT;
- case PIPE_FORMAT_R64G64_FLOAT:
- return fetch_R64G64_FLOAT;
- case PIPE_FORMAT_R64G64B64_FLOAT:
- return fetch_R64G64B64_FLOAT;
- case PIPE_FORMAT_R64G64B64A64_FLOAT:
- return fetch_R64G64B64A64_FLOAT;
-
- case PIPE_FORMAT_R32_FLOAT:
- return fetch_R32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return fetch_R32G32_FLOAT;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return fetch_R32G32B32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return fetch_R32G32B32A32_FLOAT;
-
- case PIPE_FORMAT_R32_UNORM:
- return fetch_R32_UNORM;
- case PIPE_FORMAT_R32G32_UNORM:
- return fetch_R32G32_UNORM;
- case PIPE_FORMAT_R32G32B32_UNORM:
- return fetch_R32G32B32_UNORM;
- case PIPE_FORMAT_R32G32B32A32_UNORM:
- return fetch_R32G32B32A32_UNORM;
-
- case PIPE_FORMAT_R32_USCALED:
- return fetch_R32_USCALED;
- case PIPE_FORMAT_R32G32_USCALED:
- return fetch_R32G32_USCALED;
- case PIPE_FORMAT_R32G32B32_USCALED:
- return fetch_R32G32B32_USCALED;
- case PIPE_FORMAT_R32G32B32A32_USCALED:
- return fetch_R32G32B32A32_USCALED;
-
- case PIPE_FORMAT_R32_SNORM:
- return fetch_R32_SNORM;
- case PIPE_FORMAT_R32G32_SNORM:
- return fetch_R32G32_SNORM;
- case PIPE_FORMAT_R32G32B32_SNORM:
- return fetch_R32G32B32_SNORM;
- case PIPE_FORMAT_R32G32B32A32_SNORM:
- return fetch_R32G32B32A32_SNORM;
-
- case PIPE_FORMAT_R32_SSCALED:
- return fetch_R32_SSCALED;
- case PIPE_FORMAT_R32G32_SSCALED:
- return fetch_R32G32_SSCALED;
- case PIPE_FORMAT_R32G32B32_SSCALED:
- return fetch_R32G32B32_SSCALED;
- case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return fetch_R32G32B32A32_SSCALED;
-
- case PIPE_FORMAT_R16_UNORM:
- return fetch_R16_UNORM;
- case PIPE_FORMAT_R16G16_UNORM:
- return fetch_R16G16_UNORM;
- case PIPE_FORMAT_R16G16B16_UNORM:
- return fetch_R16G16B16_UNORM;
- case PIPE_FORMAT_R16G16B16A16_UNORM:
- return fetch_R16G16B16A16_UNORM;
-
- case PIPE_FORMAT_R16_USCALED:
- return fetch_R16_USCALED;
- case PIPE_FORMAT_R16G16_USCALED:
- return fetch_R16G16_USCALED;
- case PIPE_FORMAT_R16G16B16_USCALED:
- return fetch_R16G16B16_USCALED;
- case PIPE_FORMAT_R16G16B16A16_USCALED:
- return fetch_R16G16B16A16_USCALED;
-
- case PIPE_FORMAT_R16_SNORM:
- return fetch_R16_SNORM;
- case PIPE_FORMAT_R16G16_SNORM:
- return fetch_R16G16_SNORM;
- case PIPE_FORMAT_R16G16B16_SNORM:
- return fetch_R16G16B16_SNORM;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- return fetch_R16G16B16A16_SNORM;
-
- case PIPE_FORMAT_R16_SSCALED:
- return fetch_R16_SSCALED;
- case PIPE_FORMAT_R16G16_SSCALED:
- return fetch_R16G16_SSCALED;
- case PIPE_FORMAT_R16G16B16_SSCALED:
- return fetch_R16G16B16_SSCALED;
- case PIPE_FORMAT_R16G16B16A16_SSCALED:
- return fetch_R16G16B16A16_SSCALED;
-
- case PIPE_FORMAT_R8_UNORM:
- return fetch_R8_UNORM;
- case PIPE_FORMAT_R8G8_UNORM:
- return fetch_R8G8_UNORM;
- case PIPE_FORMAT_R8G8B8_UNORM:
- return fetch_R8G8B8_UNORM;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- return fetch_R8G8B8A8_UNORM;
-
- case PIPE_FORMAT_R8_USCALED:
- return fetch_R8_USCALED;
- case PIPE_FORMAT_R8G8_USCALED:
- return fetch_R8G8_USCALED;
- case PIPE_FORMAT_R8G8B8_USCALED:
- return fetch_R8G8B8_USCALED;
- case PIPE_FORMAT_R8G8B8A8_USCALED:
- return fetch_R8G8B8A8_USCALED;
-
- case PIPE_FORMAT_R8_SNORM:
- return fetch_R8_SNORM;
- case PIPE_FORMAT_R8G8_SNORM:
- return fetch_R8G8_SNORM;
- case PIPE_FORMAT_R8G8B8_SNORM:
- return fetch_R8G8B8_SNORM;
- case PIPE_FORMAT_R8G8B8A8_SNORM:
- return fetch_R8G8B8A8_SNORM;
-
- case PIPE_FORMAT_R8_SSCALED:
- return fetch_R8_SSCALED;
- case PIPE_FORMAT_R8G8_SSCALED:
- return fetch_R8G8_SSCALED;
- case PIPE_FORMAT_R8G8B8_SSCALED:
- return fetch_R8G8B8_SSCALED;
- case PIPE_FORMAT_R8G8B8A8_SSCALED:
- return fetch_R8G8B8A8_SSCALED;
-
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return fetch_A8R8G8B8_UNORM;
-
- case 0:
- return NULL; /* not sure why this is needed */
-
- default:
- assert(0);
- return NULL;
- }
-}
-
-
-static void
-transpose_4x4( float *out, const float *in )
-{
- /* This can be achieved in 12 sse instructions, plus the final
- * stores I guess. This is probably a bit more than that - maybe
- * 32 or so?
- */
- out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12];
- out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13];
- out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14];
- out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15];
-}
-
-
-
-static void fetch_xyz_rgb( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- int i;
-
- assert(count <= 4);
-
-// debug_printf("%s\n", __FUNCTION__);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
-
- for (i = 0; i < 4; i++) {
- {
- const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
- float *out = &machine->Inputs[0].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
- float *out = &machine->Inputs[1].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
- }
-}
-
-
-
-
-static void fetch_xyz_rgb_st( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- const unsigned *pitch = draw->vertex_fetch.pitch;
- const ubyte **src = draw->vertex_fetch.src_ptr;
- int i;
-
- assert(count <= 4);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
-
- for (i = 0; i < 4; i++) {
- {
- const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
- float *out = &machine->Inputs[0].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
- float *out = &machine->Inputs[1].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = in[2];
- out[12] = 1.0f;
- }
-
- {
- const float *in = (const float *)(src[2] + elts[i] * pitch[2]);
- float *out = &machine->Inputs[2].xyzw[0].f[i];
- out[0] = in[0];
- out[4] = in[1];
- out[8] = 0.0f;
- out[12] = 1.0f;
- }
- }
-}
-
-
-
-
-/**
- * Fetch vertex attributes for 'count' vertices.
- */
-static void generic_vertex_fetch( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
-{
- unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
- unsigned attr;
-
- assert(count <= 4);
-
-// debug_printf("%s %d\n", __FUNCTION__, count);
-
- /* loop over vertex attributes (vertex shader inputs)
- */
- for (attr = 0; attr < nr_attrs; attr++) {
-
- const unsigned pitch = draw->vertex_fetch.pitch[attr];
- const ubyte *src = draw->vertex_fetch.src_ptr[attr];
- const fetch_func fetch = draw->vertex_fetch.fetch[attr];
- unsigned i;
- float p[4][4];
-
-
- /* Fetch four attributes for four vertices.
- *
- * Could fetch directly into AOS format, but this is meant to be
- * a prototype for an sse implementation, which would have
- * difficulties doing that.
- */
- for (i = 0; i < count; i++)
- fetch( src + elts[i] * pitch, p[i] );
-
- /* Be nice and zero out any missing vertices:
- */
- for ( ; i < 4; i++)
- p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0;
-
- /* Transpose/swizzle into sse-friendly format. Currently
- * assuming that all vertex shader inputs are float[4], but this
- * isn't true -- if the vertex shader only wants tex0.xy, we
- * could optimize for that.
- *
- * To do so fully without codegen would probably require an
- * excessive number of fetch functions, but we could at least
- * minimize the transpose step:
- */
- transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p );
- }
-}
-
-
-
-void draw_update_vertex_fetch( struct draw_context *draw )
-{
- unsigned nr_attrs, i;
-
-// debug_printf("%s\n", __FUNCTION__);
-
- /* this may happend during context init */
- if (!draw->vertex_shader)
- return;
-
- nr_attrs = draw->vertex_shader->state->num_inputs;
-
- for (i = 0; i < nr_attrs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
-
- draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset;
-
- draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
- draw->vertex_fetch.fetch[i] = get_fetch_func( format );
- }
-
- draw->vertex_fetch.nr_attrs = nr_attrs;
-
- draw->vertex_fetch.fetch_func = generic_vertex_fetch;
-
- switch (nr_attrs) {
- case 2:
- if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT)
- draw->vertex_fetch.fetch_func = fetch_xyz_rgb;
- break;
- case 3:
- if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
- draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT)
- draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st;
- break;
- default:
- break;
- }
-
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Keith Whitwell <keith@tungstengraphics.com>
- * Brian Paul
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#if defined(__i386__) || defined(__386__)
-#include "tgsi/exec/tgsi_sse2.h"
-#endif
-#include "draw_private.h"
-#include "draw_context.h"
-
-#include "x86/rtasm/x86sse.h"
-#include "llvm/gallivm.h"
-
-
-#define DBG_VS 0
-
-
-static INLINE unsigned
-compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
-{
- unsigned mask = 0;
- unsigned i;
-
- /* Do the hardwired planes first:
- */
- if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
- if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
- if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
- if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
- if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
- if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
-
- /* Followed by any remaining ones:
- */
- for (i = 6; i < nr; i++) {
- if (dot4(clip, plane[i]) < 0)
- mask |= (1<<i);
- }
-
- return mask;
-}
-
-
-typedef void (XSTDCALL *codegen_function) (
- const struct tgsi_exec_vector *input,
- struct tgsi_exec_vector *output,
- float (*constant)[4],
- struct tgsi_exec_vector *temporary );
-
-
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
- */
-static void
-run_vertex_program(struct draw_context *draw,
- unsigned elts[4], unsigned count,
- struct vertex_header *vOut[])
-{
- struct tgsi_exec_machine *machine = &draw->machine;
- unsigned int j;
-
- ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
- ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(count <= 4);
- assert(draw->vertex_shader->state->output_semantic_name[0]
- == TGSI_SEMANTIC_POSITION);
-
- /* Consts does not require 16 byte alignment. */
- machine->Consts = (float (*)[4]) draw->user.constants;
-
- machine->Inputs = ALIGN16_ASSIGN(inputs);
- machine->Outputs = ALIGN16_ASSIGN(outputs);
-
- draw->vertex_fetch.fetch_func( draw, machine, elts, count );
-
- /* run shader */
-#ifdef MESA_LLVM
- if (1) {
- struct gallivm_prog *prog = draw->vertex_shader->llvm_prog;
- gallivm_cpu_vs_exec(prog,
- machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps);
- } else
-#elif defined(__i386__) || defined(__386__)
- if (draw->use_sse) {
- /* SSE */
- /* cast away const */
- struct draw_vertex_shader *shader
- = (struct draw_vertex_shader *)draw->vertex_shader;
- codegen_function func
- = (codegen_function) x86_get_func( &shader->sse2_program );
-
- if (func)
- func(
- machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps );
- else
- /* interpreter */
- tgsi_exec_machine_run( machine );
- }
- else
-#endif
- {
- /* interpreter */
- tgsi_exec_machine_run( machine );
- }
-
- /* store machine results */
- for (j = 0; j < count; j++) {
- unsigned slot;
- float x, y, z, w;
-
- /* Handle attr[0] (position) specially:
- *
- * XXX: Computing the clipmask should be done in the vertex
- * program as a set of DP4 instructions appended to the
- * user-provided code.
- */
- x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
- y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
- z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
- w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
-
- vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
- vOut[j]->edgeflag = 1;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
-
- /* Viewport mapping */
- vOut[j]->data[0][0] = x * scale[0] + trans[0];
- vOut[j]->data[0][1] = y * scale[1] + trans[1];
- vOut[j]->data[0][2] = z * scale[2] + trans[2];
- vOut[j]->data[0][3] = w;
-
-#if DBG_VS
- debug_printf("output[%d]win: %f %f %f %f\n", j,
- vOut[j]->data[0][0],
- vOut[j]->data[0][1],
- vOut[j]->data[0][2],
- vOut[j]->data[0][3]);
-#endif
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
-#if DBG_VS
- debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot,
- vOut[j]->data[slot][0],
- vOut[j]->data[slot][1],
- vOut[j]->data[slot][2],
- vOut[j]->data[slot][3]);
-#endif
- }
- } /* loop over vertices */
-}
-
-
-/**
- * Run the vertex shader on all vertices in the vertex queue.
- * Called by the draw module when the vertx cache needs to be flushed.
- */
-void
-draw_vertex_shader_queue_flush(struct draw_context *draw)
-{
- unsigned i;
-
- assert(draw->vs.queue_nr != 0);
-
- /* XXX: do this on statechange:
- */
- draw_update_vertex_fetch( draw );
-
-// fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
-
- /* run vertex shader on vertex cache entries, four per invokation */
- for (i = 0; i < draw->vs.queue_nr; i += 4) {
- struct vertex_header *dests[4];
- unsigned elts[4];
- int j, n = MIN2(4, draw->vs.queue_nr - i);
-
- for (j = 0; j < n; j++) {
- elts[j] = draw->vs.queue[i + j].elt;
- dests[j] = draw->vs.queue[i + j].dest;
- }
-
- for ( ; j < 4; j++) {
- elts[j] = elts[0];
- dests[j] = dests[0];
- }
-
- assert(n > 0);
- assert(n <= 4);
-
- run_vertex_program(draw, elts, n, dests);
- }
-
- draw->vs.queue_nr = 0;
-}
-
-
-struct draw_vertex_shader *
-draw_create_vertex_shader(struct draw_context *draw,
- const struct pipe_shader_state *shader)
-{
- struct draw_vertex_shader *vs;
-
- vs = CALLOC_STRUCT( draw_vertex_shader );
- if (vs == NULL) {
- return NULL;
- }
-
- vs->state = shader;
-
-#ifdef MESA_LLVM
- struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
- gallivm_ir_set_layout(ir, GALLIVM_SOA);
- gallivm_ir_set_components(ir, 4);
- gallivm_ir_fill_from_tgsi(ir, shader->tokens);
- vs->llvm_prog = gallivm_ir_compile(ir);
- gallivm_ir_delete(ir);
-
- draw->engine = gallivm_global_cpu_engine();
- if (!draw->engine) {
- draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
- }
- else {
- gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
- }
-#elif defined(__i386__) || defined(__386__)
- if (draw->use_sse) {
- /* cast-away const */
- struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
-
- x86_init_func( &vs->sse2_program );
- if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens,
- &vs->sse2_program )) {
- x86_release_func( (struct x86_function *) &vs->sse2_program );
- fprintf(stdout /*err*/,
- "tgsi_emit_sse2() failed, falling back to interpreter\n");
- }
- }
-#endif
-
- return vs;
-}
-
-
-void
-draw_bind_vertex_shader(struct draw_context *draw,
- struct draw_vertex_shader *dvs)
-{
- draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
-
- draw->vertex_shader = dvs;
- draw->num_vs_outputs = dvs->state->num_outputs;
-
- /* specify the vertex program to interpret/execute */
- tgsi_exec_machine_init(&draw->machine,
- draw->vertex_shader->state->tokens,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/ );
-}
-
-
-void
-draw_delete_vertex_shader(struct draw_context *draw,
- struct draw_vertex_shader *dvs)
-{
-#if defined(__i386__) || defined(__386__)
- x86_release_func( (struct x86_function *) &dvs->sse2_program );
-#endif
-
- FREE( dvs );
-}
+++ /dev/null
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-
-#include <stddef.h>
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_util.h"
-
-#include "draw_vf.h"
-
-
-#define DRAW_VF_DBG 0
-
-
-/* TODO: remove this */
-extern void
-_mesa_exec_free( void *addr );
-
-
-static boolean match_fastpath( struct draw_vertex_fetch *vf,
- const struct draw_vf_fastpath *fp)
-{
- unsigned j;
-
- if (vf->attr_count != fp->attr_count)
- return FALSE;
-
- for (j = 0; j < vf->attr_count; j++)
- if (vf->attr[j].format != fp->attr[j].format ||
- vf->attr[j].inputsize != fp->attr[j].size ||
- vf->attr[j].vertoffset != fp->attr[j].offset)
- return FALSE;
-
- if (fp->match_strides) {
- if (vf->vertex_stride != fp->vertex_stride)
- return FALSE;
-
- for (j = 0; j < vf->attr_count; j++)
- if (vf->attr[j].inputstride != fp->attr[j].stride)
- return FALSE;
- }
-
- return TRUE;
-}
-
-static boolean search_fastpath_emit( struct draw_vertex_fetch *vf )
-{
- struct draw_vf_fastpath *fp = vf->fastpath;
-
- for ( ; fp ; fp = fp->next) {
- if (match_fastpath(vf, fp)) {
- vf->emit = fp->func;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-void draw_vf_register_fastpath( struct draw_vertex_fetch *vf,
- boolean match_strides )
-{
- struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath);
- unsigned i;
-
- fastpath->vertex_stride = vf->vertex_stride;
- fastpath->attr_count = vf->attr_count;
- fastpath->match_strides = match_strides;
- fastpath->func = vf->emit;
- fastpath->attr = (struct draw_vf_attr_type *)
- MALLOC(vf->attr_count * sizeof(fastpath->attr[0]));
-
- for (i = 0; i < vf->attr_count; i++) {
- fastpath->attr[i].format = vf->attr[i].format;
- fastpath->attr[i].stride = vf->attr[i].inputstride;
- fastpath->attr[i].size = vf->attr[i].inputsize;
- fastpath->attr[i].offset = vf->attr[i].vertoffset;
- }
-
- fastpath->next = vf->fastpath;
- vf->fastpath = fastpath;
-}
-
-
-
-
-/***********************************************************************
- * Build codegen functions or return generic ones:
- */
-static void choose_emit_func( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *dest)
-{
- vf->emit = NULL;
-
- /* Does this match an existing (hardwired, codegen or known-bad)
- * fastpath?
- */
- if (search_fastpath_emit(vf)) {
- /* Use this result. If it is null, then it is already known
- * that the current state will fail for codegen and there is no
- * point trying again.
- */
- }
- else if (vf->codegen_emit) {
- vf->codegen_emit( vf );
- }
-
- if (!vf->emit) {
- draw_vf_generate_hardwired_emit(vf);
- }
-
- /* Otherwise use the generic version:
- */
- if (!vf->emit)
- vf->emit = draw_vf_generic_emit;
-
- vf->emit( vf, count, dest );
-}
-
-
-
-
-
-/***********************************************************************
- * Public entrypoints, mostly dispatch to the above:
- */
-
-
-
-static unsigned
-draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
- const struct draw_vf_attr_map *map,
- unsigned nr,
- unsigned vertex_stride )
-{
- unsigned offset = 0;
- unsigned i, j;
-
- assert(nr < PIPE_ATTRIB_MAX);
-
- for (j = 0, i = 0; i < nr; i++) {
- const unsigned format = map[i].format;
- if (format == DRAW_EMIT_PAD) {
-#if (DRAW_VF_DBG)
- debug_printf("%d: pad %d, offset %d\n", i,
- map[i].offset, offset);
-#endif
-
- offset += map[i].offset;
-
- }
- else {
- vf->attr[j].attrib = map[i].attrib;
- vf->attr[j].format = format;
- vf->attr[j].insert = draw_vf_format_info[format].insert;
- vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize;
- vf->attr[j].vertoffset = offset;
- vf->attr[j].isconst = draw_vf_format_info[format].isconst;
- if(vf->attr[j].isconst)
- memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize);
-
-#if (DRAW_VF_DBG)
- debug_printf("%d: %s, offset %d\n", i,
- draw_vf_format_info[format].name,
- vf->attr[j].vertoffset);
-#endif
-
- offset += draw_vf_format_info[format].attrsize;
- j++;
- }
- }
-
- vf->attr_count = j;
- vf->vertex_stride = vertex_stride ? vertex_stride : offset;
- vf->emit = choose_emit_func;
-
- assert(vf->vertex_stride >= offset);
- return vf->vertex_stride;
-}
-
-
-void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
- const struct vertex_info *vinfo,
- float point_size )
-{
- unsigned i, j, k;
- struct draw_vf_attr *a = vf->attr;
- struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS];
- unsigned count = 0; /* for debug/sanity */
- unsigned nr_attrs = 0;
-
- for (i = 0; i < vinfo->num_attribs; i++) {
- j = vinfo->src_index[i];
- switch (vinfo->emit[i]) {
- case EMIT_OMIT:
- /* no-op */
- break;
- case EMIT_ALL: {
- /* just copy the whole vertex as-is to the vbuf */
- unsigned s = vinfo->size;
- assert(i == 0);
- assert(j == 0);
- /* copy the vertex header */
- /* XXX: we actually don't copy the header, just pad it */
- attrs[nr_attrs].attrib = 0;
- attrs[nr_attrs].format = DRAW_EMIT_PAD;
- attrs[nr_attrs].offset = offsetof(struct vertex_header, data);
- s -= offsetof(struct vertex_header, data)/4;
- count += offsetof(struct vertex_header, data)/4;
- nr_attrs++;
- /* copy the vertex data */
- for(k = 0; k < (s & ~0x3); k += 4) {
- attrs[nr_attrs].attrib = k/4;
- attrs[nr_attrs].format = DRAW_EMIT_4F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 4;
- }
- /* tail */
- /* XXX: actually, this shouldn't be needed */
- attrs[nr_attrs].attrib = k/4;
- attrs[nr_attrs].offset = 0;
- switch(s & 0x3) {
- case 0:
- break;
- case 1:
- attrs[nr_attrs].format = DRAW_EMIT_1F;
- nr_attrs++;
- count += 1;
- break;
- case 2:
- attrs[nr_attrs].format = DRAW_EMIT_2F;
- nr_attrs++;
- count += 2;
- break;
- case 3:
- attrs[nr_attrs].format = DRAW_EMIT_3F;
- nr_attrs++;
- count += 3;
- break;
- }
- break;
- }
- case EMIT_1F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_1F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count++;
- break;
- case EMIT_1F_PSIZE:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_1F_CONST;
- attrs[nr_attrs].offset = 0;
- attrs[nr_attrs].data.f[0] = point_size;
- nr_attrs++;
- count++;
- break;
- case EMIT_2F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_2F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 2;
- break;
- case EMIT_3F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_3F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 3;
- break;
- case EMIT_4F:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_4F;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 4;
- break;
- case EMIT_4UB:
- attrs[nr_attrs].attrib = j;
- attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA;
- attrs[nr_attrs].offset = 0;
- nr_attrs++;
- count += 1;
- break;
- default:
- assert(0);
- }
- }
-
- assert(count == vinfo->size);
-
- draw_vf_set_vertex_attributes(vf,
- attrs,
- nr_attrs,
- vinfo->size * sizeof(float) );
-
- for (j = 0; j < vf->attr_count; j++) {
- a[j].inputsize = 4;
- a[j].do_insert = a[j].insert[4 - 1];
- if(a[j].isconst) {
- a[j].inputptr = a[j].data;
- a[j].inputstride = 0;
- }
- }
-}
-
-
-#if 0
-/* Set attribute pointers, adjusted for start position:
- */
-void draw_vf_set_sources( struct draw_vertex_fetch *vf,
- GLvector4f * const sources[],
- unsigned start )
-{
- struct draw_vf_attr *a = vf->attr;
- unsigned j;
-
- for (j = 0; j < vf->attr_count; j++) {
- const GLvector4f *vptr = sources[a[j].attrib];
-
- if ((a[j].inputstride != vptr->stride) ||
- (a[j].inputsize != vptr->size))
- vf->emit = choose_emit_func;
-
- a[j].inputstride = vptr->stride;
- a[j].inputsize = vptr->size;
- a[j].do_insert = a[j].insert[vptr->size - 1];
- a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride;
- }
-}
-#endif
-
-
-/**
- * Emit a vertex to dest.
- */
-void draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
- struct vertex_header *vertex,
- void *dest )
-{
- struct draw_vf_attr *a = vf->attr;
- unsigned j;
-
- for (j = 0; j < vf->attr_count; j++) {
- if (!a[j].isconst) {
- a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0];
- a[j].inputstride = 0; /* XXX: one-vertex-max ATM */
- }
- }
-
- vf->emit( vf, 1, (uint8_t*) dest );
-}
-
-
-
-struct draw_vertex_fetch *draw_vf_create( void )
-{
- struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch);
- unsigned i;
-
- for (i = 0; i < PIPE_ATTRIB_MAX; i++)
- vf->attr[i].vf = vf;
-
- vf->identity[0] = 0.0;
- vf->identity[1] = 0.0;
- vf->identity[2] = 0.0;
- vf->identity[3] = 1.0;
-
- vf->codegen_emit = NULL;
-
-#ifdef USE_SSE_ASM
- if (!GETENV("GALLIUM_NO_CODEGEN"))
- vf->codegen_emit = draw_vf_generate_sse_emit;
-#endif
-
- return vf;
-}
-
-
-void draw_vf_destroy( struct draw_vertex_fetch *vf )
-{
- struct draw_vf_fastpath *fp, *tmp;
-
- for (fp = vf->fastpath ; fp ; fp = tmp) {
- tmp = fp->next;
- FREE(fp->attr);
-
- /* KW: At the moment, fp->func is constrained to be allocated by
- * _mesa_exec_alloc(), as the hardwired fastpaths in
- * t_vertex_generic.c are handled specially. It would be nice
- * to unify them, but this probably won't change until this
- * module gets another overhaul.
- */
- //_mesa_exec_free((void *) fp->func);
- FREE(fp);
- }
-
- vf->fastpath = NULL;
- FREE(vf);
-}
+++ /dev/null
-/*
- * Copyright 2008 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-/**
- * Vertex fetch/store/convert code. This functionality is used in two places:
- * 1. Vertex fetch/convert - to grab vertex data from incoming vertex
- * arrays and convert to format needed by vertex shaders.
- * 2. Vertex store/emit - to convert simple float[][4] vertex attributes
- * (which is the organization used throughout the draw/prim pipeline) to
- * hardware-specific formats and emit into hardware vertex buffers.
- *
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-#ifndef DRAW_VF_H
-#define DRAW_VF_H
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-
-#include "draw_vertex.h"
-#include "draw_private.h" /* for vertex_header */
-
-
-enum draw_vf_attr_format {
- DRAW_EMIT_1F,
- DRAW_EMIT_2F,
- DRAW_EMIT_3F,
- DRAW_EMIT_4F,
- DRAW_EMIT_3F_XYW, /**< for projective texture */
- DRAW_EMIT_1UB_1F, /**< for fog coordinate */
- DRAW_EMIT_3UB_3F_RGB, /**< for specular color */
- DRAW_EMIT_3UB_3F_BGR, /**< for specular color */
- DRAW_EMIT_4UB_4F_RGBA, /**< for color */
- DRAW_EMIT_4UB_4F_BGRA, /**< for color */
- DRAW_EMIT_4UB_4F_ARGB, /**< for color */
- DRAW_EMIT_4UB_4F_ABGR, /**< for color */
- DRAW_EMIT_1F_CONST,
- DRAW_EMIT_2F_CONST,
- DRAW_EMIT_3F_CONST,
- DRAW_EMIT_4F_CONST,
- DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */
- DRAW_EMIT_MAX
-};
-
-struct draw_vf_attr_map
-{
- /** Input attribute number */
- unsigned attrib;
-
- enum draw_vf_attr_format format;
-
- unsigned offset;
-
- /**
- * Constant data for DRAW_EMIT_*_CONST
- */
- union {
- uint8_t ub[4];
- float f[4];
- } data;
-};
-
-struct draw_vertex_fetch;
-
-
-
-#if 0
-unsigned
-draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
- const struct draw_vf_attr_map *map,
- unsigned nr,
- unsigned vertex_stride );
-#endif
-
-void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
- const struct vertex_info *vinfo,
- float point_size );
-
-#if 0
-void
-draw_vf_set_sources( struct draw_vertex_fetch *vf,
- GLvector4f * const attrib[],
- unsigned start );
-#endif
-
-void
-draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
- struct vertex_header *vertex,
- void *dest );
-
-struct draw_vertex_fetch *
-draw_vf_create( void );
-
-void
-draw_vf_destroy( struct draw_vertex_fetch *vf );
-
-
-
-/***********************************************************************
- * Internal functions and structs:
- */
-
-struct draw_vf_attr;
-
-typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a,
- float *out,
- const uint8_t *v );
-
-typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a,
- uint8_t *v,
- const float *in );
-
-typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *dest );
-
-
-
-/**
- * Describes how to convert/move a vertex attribute from a vertex
- * array to a vertex structure.
- */
-struct draw_vf_attr
-{
- struct draw_vertex_fetch *vf;
-
- unsigned format;
- unsigned inputsize;
- unsigned inputstride;
- unsigned vertoffset; /**< position of the attrib in the vertex struct */
-
- boolean isconst; /**< read from const data below */
- uint8_t data[16];
-
- unsigned attrib; /**< which vertex attrib (0=position, etc) */
- unsigned vertattrsize; /**< size of the attribute in bytes */
-
- uint8_t *inputptr;
- const draw_vf_insert_func *insert;
- draw_vf_insert_func do_insert;
- draw_vf_extract_func extract;
-};
-
-struct draw_vertex_fetch
-{
- struct draw_vf_attr attr[PIPE_ATTRIB_MAX];
- unsigned attr_count;
- unsigned vertex_stride;
-
- draw_vf_emit_func emit;
-
- /* Parameters and constants for codegen:
- */
- float identity[4];
-
- struct draw_vf_fastpath *fastpath;
-
- void (*codegen_emit)( struct draw_vertex_fetch *vf );
-};
-
-
-struct draw_vf_attr_type {
- unsigned format;
- unsigned size;
- unsigned stride;
- unsigned offset;
-};
-
-/** XXX this could be moved into draw_vf.c */
-struct draw_vf_fastpath {
- unsigned vertex_stride;
- unsigned attr_count;
- boolean match_strides;
-
- struct draw_vf_attr_type *attr;
-
- draw_vf_emit_func func;
- struct draw_vf_fastpath *next;
-};
-
-
-void
-draw_vf_register_fastpath( struct draw_vertex_fetch *vtx,
- boolean match_strides );
-
-void
-draw_vf_generic_emit( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *v );
-
-void
-draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf );
-
-void
-draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf );
-
-
-/** XXX this type and function could probably be moved into draw_vf.c */
-struct draw_vf_format_info {
- const char *name;
- draw_vf_insert_func insert[4];
- const unsigned attrsize;
- const boolean isconst;
-};
-
-extern const struct draw_vf_format_info
-draw_vf_format_info[DRAW_EMIT_MAX];
-
-
-#endif
+++ /dev/null
-
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-
-#include "draw_vf.h"
-
-
-
-static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = in[3];
-}
-
-static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
- out[3] = 1;
-}
-
-static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = 0;
- out[3] = 1;
-}
-
-static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
- out[2] = 0;
- out[3] = 1;
-}
-
-static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[3];
-}
-
-static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- (void) a; (void) v; (void) in;
- assert(0);
-}
-
-static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = in[2];
-}
-
-static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
- out[2] = 0;
-}
-
-static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
- out[2] = 0;
-}
-
-
-static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = in[1];
-}
-
-static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
- out[1] = 0;
-}
-
-static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- float *out = (float *)(v);
- (void) a;
-
- out[0] = in[0];
-}
-
-static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in )
-{
- (void) a; (void) v; (void) in;
-}
-
-static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
-}
-
-static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[2] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- v[1] = 0;
- v[2] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
-}
-
-static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[0] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- v[1] = 0;
- v[0] = 0;
- v[3] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
-}
-
-static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- v[3] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
- v[2] = 0x00;
- v[3] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
-}
-
-static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
- v[1] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
- v[2] = 0x00;
- v[1] = 0x00;
- v[0] = 0xff;
-}
-
-static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
-}
-
-static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[2] = 0;
-}
-
-static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
- v[1] = 0;
- v[2] = 0;
-}
-
-static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
-}
-
-static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
- v[0] = 0;
-}
-
-static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
- v[1] = 0;
- v[0] = 0;
-}
-
-
-static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v,
- const float *in )
-{
- (void) a;
- UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
-}
-
-
-const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] =
-{
- { "1f",
- { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
- sizeof(float), FALSE },
-
- { "2f",
- { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
- 2 * sizeof(float), FALSE },
-
- { "3f",
- { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
- 3 * sizeof(float), FALSE },
-
- { "4f",
- { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
- 4 * sizeof(float), FALSE },
-
- { "3f_xyw",
- { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err,
- insert_3f_xyw_4 },
- 3 * sizeof(float), FALSE },
-
- { "1ub_1f",
- { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 },
- sizeof(uint8_t), FALSE },
-
- { "3ub_3f_rgb",
- { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3,
- insert_3ub_3f_rgb_3 },
- 3 * sizeof(uint8_t), FALSE },
-
- { "3ub_3f_bgr",
- { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3,
- insert_3ub_3f_bgr_3 },
- 3 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_rgba",
- { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3,
- insert_4ub_4f_rgba_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_bgra",
- { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3,
- insert_4ub_4f_bgra_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_argb",
- { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3,
- insert_4ub_4f_argb_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "4ub_4f_abgr",
- { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3,
- insert_4ub_4f_abgr_4 },
- 4 * sizeof(uint8_t), FALSE },
-
- { "1f_const",
- { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
- sizeof(float), TRUE },
-
- { "2f_const",
- { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
- 2 * sizeof(float), TRUE },
-
- { "3f_const",
- { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
- 3 * sizeof(float), TRUE },
-
- { "4f_const",
- { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
- 4 * sizeof(float), TRUE },
-
- { "pad",
- { NULL, NULL, NULL, NULL },
- 0, FALSE },
-
-};
-
-
-
-
-/***********************************************************************
- * Hardwired fastpaths for emitting whole vertices or groups of
- * vertices
- */
-#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \
-static void NAME( struct draw_vertex_fetch *vf, \
- unsigned count, \
- uint8_t *v ) \
-{ \
- struct draw_vf_attr *a = vf->attr; \
- unsigned i; \
- \
- for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \
- if (NR > 0) { \
- F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \
- a[0].inputptr += a[0].inputstride; \
- } \
- \
- if (NR > 1) { \
- F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \
- a[1].inputptr += a[1].inputstride; \
- } \
- \
- if (NR > 2) { \
- F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \
- a[2].inputptr += a[2].inputstride; \
- } \
- \
- if (NR > 3) { \
- F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \
- a[3].inputptr += a[3].inputstride; \
- } \
- \
- if (NR > 4) { \
- F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \
- a[4].inputptr += a[4].inputstride; \
- } \
- } \
-}
-
-
-#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \
- insert_null, insert_null, NAME)
-
-#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \
- insert_null, NAME)
-
-#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \
- insert_null, NAME)
-
-
-EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4)
-
-EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2)
-
-EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2)
-
-
-/* Use the codegen paths to select one of a number of hardwired
- * fastpaths.
- */
-void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf )
-{
- draw_vf_emit_func func = NULL;
-
- /* Does it fit a hardwired fastpath? Help! this is growing out of
- * control!
- */
- switch (vf->attr_count) {
- case 2:
- if (vf->attr[0].do_insert == insert_3f_3 &&
- vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- func = emit_xyz3_rgba4;
- }
- break;
- case 3:
- if (vf->attr[2].do_insert == insert_2f_2) {
- if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- if (vf->attr[0].do_insert == insert_4f_4)
- func = emit_xyzw4_rgba4_st2;
- }
- }
- break;
- case 4:
- if (vf->attr[2].do_insert == insert_2f_2 &&
- vf->attr[3].do_insert == insert_2f_2) {
- if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
- if (vf->attr[0].do_insert == insert_4f_4)
- func = emit_xyzw4_rgba4_st2_st2;
- }
- }
- break;
- }
-
- vf->emit = func;
-}
-
-/***********************************************************************
- * Generic (non-codegen) functions for whole vertices or groups of
- * vertices
- */
-
-void draw_vf_generic_emit( struct draw_vertex_fetch *vf,
- unsigned count,
- uint8_t *v )
-{
- struct draw_vf_attr *a = vf->attr;
- const unsigned attr_count = vf->attr_count;
- const unsigned stride = vf->vertex_stride;
- unsigned i, j;
-
- for (i = 0 ; i < count ; i++, v += stride) {
- for (j = 0; j < attr_count; j++) {
- float *in = (float *)a[j].inputptr;
- a[j].inputptr += a[j].inputstride;
- a[j].do_insert( &a[j], v + a[j].vertoffset, in );
- }
- }
-}
-
-
+++ /dev/null
-/*
- * Copyright 2003 Tungsten Graphics, inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Keith Whitwell <keithw@tungstengraphics.com>
- */
-
-
-#include "simple_list.h"
-
-#include "pipe/p_compiler.h"
-
-#include "draw_vf.h"
-
-
-#if defined(USE_SSE_ASM)
-
-#include "x86/rtasm/x86sse.h"
-#include "x86/common_x86_asm.h"
-
-
-#define X 0
-#define Y 1
-#define Z 2
-#define W 3
-
-
-struct x86_program {
- struct x86_function func;
-
- struct draw_vertex_fetch *vf;
- boolean inputs_safe;
- boolean outputs_safe;
- boolean have_sse2;
-
- struct x86_reg identity;
- struct x86_reg chan0;
-};
-
-
-static struct x86_reg get_identity( struct x86_program *p )
-{
- return p->identity;
-}
-
-static void emit_load4f_4( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movups(&p->func, dest, arg0);
-}
-
-static void emit_load4f_3( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Have to jump through some hoops:
- *
- * c 0 0 0
- * c 0 0 1
- * 0 0 c 1
- * a b c 1
- */
- sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
- sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
- sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) );
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load4f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Initialize from identity, then pull in low two words:
- */
- sse_movups(&p->func, dest, get_identity(p));
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load4f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Pull in low word, then swizzle in identity */
- sse_movss(&p->func, dest, arg0);
- sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
-}
-
-
-
-static void emit_load3f_3( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- /* Over-reads by 1 dword - potential SEGV if input is a vertex
- * array.
- */
- if (p->inputs_safe) {
- sse_movups(&p->func, dest, arg0);
- }
- else {
- /* c 0 0 0
- * c c c c
- * a b c c
- */
- sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
- sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));
- sse_movlps(&p->func, dest, arg0);
- }
-}
-
-static void emit_load3f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_2(p, dest, arg0);
-}
-
-static void emit_load3f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_1(p, dest, arg0);
-}
-
-static void emit_load2f_2( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_load2f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- emit_load4f_1(p, dest, arg0);
-}
-
-static void emit_load1f_1( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movss(&p->func, dest, arg0);
-}
-
-static void (*load[4][4])( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 ) = {
- { emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1,
- emit_load1f_1 },
-
- { emit_load2f_1,
- emit_load2f_2,
- emit_load2f_2,
- emit_load2f_2 },
-
- { emit_load3f_1,
- emit_load3f_2,
- emit_load3f_3,
- emit_load3f_3 },
-
- { emit_load4f_1,
- emit_load4f_2,
- emit_load4f_3,
- emit_load4f_4 }
-};
-
-static void emit_load( struct x86_program *p,
- struct x86_reg dest,
- unsigned sz,
- struct x86_reg src,
- unsigned src_sz)
-{
- load[sz-1][src_sz-1](p, dest, src);
-}
-
-static void emit_store4f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movups(&p->func, dest, arg0);
-}
-
-static void emit_store3f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- if (p->outputs_safe) {
- /* Emit the extra dword anyway. This may hurt writecombining,
- * may cause other problems.
- */
- sse_movups(&p->func, dest, arg0);
- }
- else {
- /* Alternate strategy - emit two, shuffle, emit one.
- */
- sse_movlps(&p->func, dest, arg0);
- sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
- sse_movss(&p->func, x86_make_disp(dest,8), arg0);
- }
-}
-
-static void emit_store2f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movlps(&p->func, dest, arg0);
-}
-
-static void emit_store1f( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 )
-{
- sse_movss(&p->func, dest, arg0);
-}
-
-
-static void (*store[4])( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg arg0 ) =
-{
- emit_store1f,
- emit_store2f,
- emit_store3f,
- emit_store4f
-};
-
-static void emit_store( struct x86_program *p,
- struct x86_reg dest,
- unsigned sz,
- struct x86_reg temp )
-
-{
- store[sz-1](p, dest, temp);
-}
-
-static void emit_pack_store_4ub( struct x86_program *p,
- struct x86_reg dest,
- struct x86_reg temp )
-{
- /* Scale by 255.0
- */
- sse_mulps(&p->func, temp, p->chan0);
-
- if (p->have_sse2) {
- sse2_cvtps2dq(&p->func, temp, temp);
- sse2_packssdw(&p->func, temp, temp);
- sse2_packuswb(&p->func, temp, temp);
- sse_movss(&p->func, dest, temp);
- }
- else {
- struct x86_reg mmx0 = x86_make_reg(file_MMX, 0);
- struct x86_reg mmx1 = x86_make_reg(file_MMX, 1);
- sse_cvtps2pi(&p->func, mmx0, temp);
- sse_movhlps(&p->func, temp, temp);
- sse_cvtps2pi(&p->func, mmx1, temp);
- mmx_packssdw(&p->func, mmx0, mmx1);
- mmx_packuswb(&p->func, mmx0, mmx0);
- mmx_movd(&p->func, dest, mmx0);
- }
-}
-
-static int get_offset( const void *a, const void *b )
-{
- return (const char *)b - (const char *)a;
-}
-
-/* Not much happens here. Eventually use this function to try and
- * avoid saving/reloading the source pointers each vertex (if some of
- * them can fit in registers).
- */
-static void get_src_ptr( struct x86_program *p,
- struct x86_reg srcREG,
- struct x86_reg vfREG,
- struct draw_vf_attr *a )
-{
- struct draw_vertex_fetch *vf = p->vf;
- struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
-
- /* Load current a[j].inputptr
- */
- x86_mov(&p->func, srcREG, ptr_to_src);
-}
-
-static void update_src_ptr( struct x86_program *p,
- struct x86_reg srcREG,
- struct x86_reg vfREG,
- struct draw_vf_attr *a )
-{
- if (a->inputstride) {
- struct draw_vertex_fetch *vf = p->vf;
- struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
-
- /* add a[j].inputstride (hardcoded value - could just as easily
- * pull the stride value from memory each time).
- */
- x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));
-
- /* save new value of a[j].inputptr
- */
- x86_mov(&p->func, ptr_to_src, srcREG);
- }
-}
-
-
-/* Lots of hardcoding
- *
- * EAX -- pointer to current output vertex
- * ECX -- pointer to current attribute
- *
- */
-static boolean build_vertex_emit( struct x86_program *p )
-{
- struct draw_vertex_fetch *vf = p->vf;
- unsigned j = 0;
-
- struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX);
- struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX);
- struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
- struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI);
- struct x86_reg temp = x86_make_reg(file_XMM, 0);
- uint8_t *fixup, *label;
-
- /* Push a few regs?
- */
- x86_push(&p->func, countEBP);
- x86_push(&p->func, vfESI);
-
-
- /* Get vertex count, compare to zero
- */
- x86_xor(&p->func, srcECX, srcECX);
- x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2));
- x86_cmp(&p->func, countEBP, srcECX);
- fixup = x86_jcc_forward(&p->func, cc_E);
-
- /* Initialize destination register.
- */
- x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3));
-
- /* Move argument 1 (vf) into a reg:
- */
- x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1));
-
-
- /* always load, needed or not:
- */
- sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0])));
-
- /* Note address for loop jump */
- label = x86_get_label(&p->func);
-
- /* Emit code for each of the attributes. Currently routes
- * everything through SSE registers, even when it might be more
- * efficient to stick with regular old x86. No optimization or
- * other tricks - enough new ground to cover here just getting
- * things working.
- */
- while (j < vf->attr_count) {
- struct draw_vf_attr *a = &vf->attr[j];
- struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset);
-
- /* Now, load an XMM reg from src, perhaps transform, then save.
- * Could be shortcircuited in specific cases:
- */
- switch (a->format) {
- case DRAW_EMIT_1F:
- case DRAW_EMIT_1F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 1, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_2F:
- case DRAW_EMIT_2F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 2, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_3F:
- case DRAW_EMIT_3F_CONST:
- /* Potentially the worst case - hardcode 2+1 copying:
- */
- if (0) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 3, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- }
- else {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 2, temp);
- if (a->inputsize > 2) {
- emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1);
- emit_store(p, x86_make_disp(dest,8), 1, temp);
- }
- else {
- sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p));
- }
- update_src_ptr(p, srcECX, vfESI, a);
- }
- break;
- case DRAW_EMIT_4F:
- case DRAW_EMIT_4F_CONST:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- emit_store(p, dest, 4, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_3F_XYW:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z));
- emit_store(p, dest, 3, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
-
- case DRAW_EMIT_1UB_1F:
- /* Test for PAD3 + 1UB:
- */
- if (j > 0 &&
- a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3)
- {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X));
- emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */
- update_src_ptr(p, srcECX, vfESI, a);
- }
- else {
- debug_printf("Can't emit 1ub %x %x %d\n",
- a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize );
- return FALSE;
- }
- break;
- case DRAW_EMIT_3UB_3F_RGB:
- case DRAW_EMIT_3UB_3F_BGR:
- /* Test for 3UB + PAD1:
- */
- if (j == vf->attr_count - 1 ||
- a[1].vertoffset >= a->vertoffset + 4) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- if (a->format == DRAW_EMIT_3UB_3F_BGR)
- sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- }
- /* Test for 3UB + 1UB:
- */
- else if (j < vf->attr_count - 1 &&
- a[1].format == DRAW_EMIT_1UB_1F &&
- a[1].vertoffset == a->vertoffset + 3) {
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
- update_src_ptr(p, srcECX, vfESI, a);
-
- /* Make room for incoming value:
- */
- sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
-
- get_src_ptr(p, srcECX, vfESI, &a[1]);
- emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize);
- update_src_ptr(p, srcECX, vfESI, &a[1]);
-
- /* Rearrange and possibly do BGR conversion:
- */
- if (a->format == DRAW_EMIT_3UB_3F_BGR)
- sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
- else
- sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X));
-
- emit_pack_store_4ub(p, dest, temp);
- j++; /* NOTE: two attrs consumed */
- }
- else {
- debug_printf("Can't emit 3ub\n");
- }
- return FALSE; /* add this later */
- break;
-
- case DRAW_EMIT_4UB_4F_RGBA:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_BGRA:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_ARGB:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- case DRAW_EMIT_4UB_4F_ABGR:
- get_src_ptr(p, srcECX, vfESI, a);
- emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
- sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
- emit_pack_store_4ub(p, dest, temp);
- update_src_ptr(p, srcECX, vfESI, a);
- break;
- default:
- debug_printf("unknown a[%d].format %d\n", j, a->format);
- return FALSE; /* catch any new opcodes */
- }
-
- /* Increment j by at least 1 - may have been incremented above also:
- */
- j++;
- }
-
- /* Next vertex:
- */
- x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride));
-
- /* decr count, loop if not zero
- */
- x86_dec(&p->func, countEBP);
- x86_test(&p->func, countEBP, countEBP);
- x86_jcc(&p->func, cc_NZ, label);
-
- /* Exit mmx state?
- */
- if (p->func.need_emms)
- mmx_emms(&p->func);
-
- /* Land forward jump here:
- */
- x86_fixup_fwd_jump(&p->func, fixup);
-
- /* Pop regs and return
- */
- x86_pop(&p->func, x86_get_base_reg(vfESI));
- x86_pop(&p->func, countEBP);
- x86_ret(&p->func);
-
- vf->emit = (draw_vf_emit_func)x86_get_func(&p->func);
- return TRUE;
-}
-
-
-
-void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
-{
- struct x86_program p;
-
- if (!cpu_has_xmm) {
- vf->codegen_emit = NULL;
- return;
- }
-
- memset(&p, 0, sizeof(p));
-
- p.vf = vf;
- p.inputs_safe = 0; /* for now */
- p.outputs_safe = 1; /* for now */
- p.have_sse2 = cpu_has_xmm2;
- p.identity = x86_make_reg(file_XMM, 6);
- p.chan0 = x86_make_reg(file_XMM, 7);
-
- x86_init_func(&p.func);
-
- if (build_vertex_emit(&p)) {
- draw_vf_register_fastpath( vf, TRUE );
- }
- else {
- /* Note the failure so that we don't keep trying to codegen an
- * impossible state:
- */
- draw_vf_register_fastpath( vf, FALSE );
- x86_release_func(&p.func);
- }
-}
-
-#else
-
-void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
-{
- /* Dummy version for when USE_SSE_ASM not defined */
-}
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/* Authors: Keith Whitwell <keith@tungstengraphics.com>
- */
-
-#include "pipe/p_util.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "draw_private.h"
-
-
-struct wide_stage {
- struct draw_stage stage;
-
- float half_line_width;
- float half_point_size;
-
- uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
- uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS];
- uint num_texcoords;
-
- int psize_slot;
-};
-
-
-
-static INLINE struct wide_stage *wide_stage( struct draw_stage *stage )
-{
- return (struct wide_stage *)stage;
-}
-
-
-static void passthrough_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->point( stage->next, header );
-}
-
-static void passthrough_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->line(stage->next, header);
-}
-
-static void passthrough_tri( struct draw_stage *stage,
- struct prim_header *header )
-{
- stage->next->tri(stage->next, header);
-}
-
-
-/**
- * Draw a wide line by drawing a quad (two triangles).
- * XXX need to disable polygon stipple.
- */
-static void wide_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- const struct wide_stage *wide = wide_stage(stage);
- const float half_width = wide->half_line_width;
-
- struct prim_header tri;
-
- struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
- struct vertex_header *v2 = dup_vert(stage, header->v[1], 2);
- struct vertex_header *v3 = dup_vert(stage, header->v[1], 3);
-
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
-
- const float dx = FABSF(pos0[0] - pos2[0]);
- const float dy = FABSF(pos0[1] - pos2[1]);
-
- /*
- * Draw wide line as a quad (two tris) by "stretching" the line along
- * X or Y.
- * We need to tweak coords in several ways to be conformant here.
- */
-
- if (dx > dy) {
- /* x-major line */
- pos0[1] = pos0[1] - half_width - 0.25f;
- pos1[1] = pos1[1] + half_width - 0.25f;
- pos2[1] = pos2[1] - half_width - 0.25f;
- pos3[1] = pos3[1] + half_width - 0.25f;
- if (pos0[0] < pos2[0]) {
- /* left to right line */
- pos0[0] -= 0.5f;
- pos1[0] -= 0.5f;
- pos2[0] -= 0.5f;
- pos3[0] -= 0.5f;
- }
- else {
- /* right to left line */
- pos0[0] += 0.5f;
- pos1[0] += 0.5f;
- pos2[0] += 0.5f;
- pos3[0] += 0.5f;
- }
- }
- else {
- /* y-major line */
- pos0[0] = pos0[0] - half_width + 0.25f;
- pos1[0] = pos1[0] + half_width + 0.25f;
- pos2[0] = pos2[0] - half_width + 0.25f;
- pos3[0] = pos3[0] + half_width + 0.25f;
- if (pos0[1] < pos2[1]) {
- /* top to bottom line */
- pos0[1] -= 0.5f;
- pos1[1] -= 0.5f;
- pos2[1] -= 0.5f;
- pos3[1] -= 0.5f;
- }
- else {
- /* bottom to top line */
- pos0[1] += 0.5f;
- pos1[1] += 0.5f;
- pos2[1] += 0.5f;
- pos3[1] += 0.5f;
- }
- }
-
- tri.det = header->det; /* only the sign matters */
- tri.v[0] = v0;
- tri.v[1] = v2;
- tri.v[2] = v3;
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v0;
- tri.v[1] = v3;
- tri.v[2] = v1;
- stage->next->tri( stage->next, &tri );
-}
-
-
-/**
- * Draw a wide line by drawing a quad, using geometry which will
- * fullfill GL's antialiased line requirements.
- */
-static void wide_line_aa(struct draw_stage *stage,
- struct prim_header *header)
-{
- const struct wide_stage *wide = wide_stage(stage);
- const float half_width = wide->half_line_width;
- struct prim_header tri;
- struct vertex_header *v[4];
- float *pos;
- float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0];
- float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1];
- const float len = (float) sqrt(dx * dx + dy * dy);
- uint i;
-
- dx = dx * half_width / len;
- dy = dy * half_width / len;
-
- /* allocate/dup new verts */
- for (i = 0; i < 4; i++) {
- v[i] = dup_vert(stage, header->v[i/2], i);
- }
-
- /*
- * Quad for line from v0 to v1:
- *
- * 1 3
- * +-------------------------+
- * | |
- * *v0 v1*
- * | |
- * +-------------------------+
- * 0 2
- */
-
- pos = v[0]->data[0];
- pos[0] += dy;
- pos[1] -= dx;
-
- pos = v[1]->data[0];
- pos[0] -= dy;
- pos[1] += dx;
-
- pos = v[2]->data[0];
- pos[0] += dy;
- pos[1] -= dx;
-
- pos = v[3]->data[0];
- pos[0] -= dy;
- pos[1] += dx;
-
- tri.det = header->det; /* only the sign matters */
-
- tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0];
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2];
- stage->next->tri( stage->next, &tri );
-
-}
-
-
-/**
- * Set the vertex texcoords for sprite mode.
- * Coords may be left untouched or set to a right-side-up or upside-down
- * orientation.
- */
-static void set_texcoords(const struct wide_stage *wide,
- struct vertex_header *v, const float tc[4])
-{
- uint i;
- for (i = 0; i < wide->num_texcoords; i++) {
- if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) {
- uint j = wide->texcoord_slot[i];
- v->data[j][0] = tc[0];
- if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT)
- v->data[j][1] = 1.0f - tc[1];
- else
- v->data[j][1] = tc[1];
- v->data[j][2] = tc[2];
- v->data[j][3] = tc[3];
- }
- }
-}
-
-
-/* If there are lots of sprite points (and why wouldn't there be?) it
- * would probably be more sensible to change hardware setup to
- * optimize this rather than doing the whole thing in software like
- * this.
- */
-static void wide_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- const struct wide_stage *wide = wide_stage(stage);
- const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
- float half_size;
- float left_adj, right_adj;
-
- struct prim_header tri;
-
- /* four dups of original vertex */
- struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
- struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
- struct vertex_header *v2 = dup_vert(stage, header->v[0], 2);
- struct vertex_header *v3 = dup_vert(stage, header->v[0], 3);
-
- float *pos0 = v0->data[0];
- float *pos1 = v1->data[0];
- float *pos2 = v2->data[0];
- float *pos3 = v3->data[0];
-
- /* point size is either per-vertex or fixed size */
- if (wide->psize_slot >= 0) {
- half_size = 0.5f * header->v[0]->data[wide->psize_slot][0];
- }
- else {
- half_size = wide->half_point_size;
- }
-
- left_adj = -half_size + 0.25f;
- right_adj = half_size + 0.25f;
-
- pos0[0] += left_adj;
- pos0[1] -= half_size;
-
- pos1[0] += left_adj;
- pos1[1] += half_size;
-
- pos2[0] += right_adj;
- pos2[1] -= half_size;
-
- pos3[0] += right_adj;
- pos3[1] += half_size;
-
- if (sprite) {
- static const float tex00[4] = { 0, 0, 0, 1 };
- static const float tex01[4] = { 0, 1, 0, 1 };
- static const float tex11[4] = { 1, 1, 0, 1 };
- static const float tex10[4] = { 1, 0, 0, 1 };
- set_texcoords( wide, v0, tex00 );
- set_texcoords( wide, v1, tex01 );
- set_texcoords( wide, v2, tex10 );
- set_texcoords( wide, v3, tex11 );
- }
-
- tri.det = header->det; /* only the sign matters */
- tri.v[0] = v0;
- tri.v[1] = v2;
- tri.v[2] = v3;
- stage->next->tri( stage->next, &tri );
-
- tri.v[0] = v0;
- tri.v[1] = v3;
- tri.v[2] = v1;
- stage->next->tri( stage->next, &tri );
-}
-
-
-static void wide_first_point( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct wide_stage *wide = wide_stage(stage);
- struct draw_context *draw = stage->draw;
-
- wide->half_point_size = 0.5f * draw->rasterizer->point_size;
-
- if (draw->rasterizer->point_size != 1.0) {
- stage->point = wide_point;
- }
- else {
- stage->point = passthrough_point;
- }
-
- if (draw->rasterizer->point_sprite) {
- /* find vertex shader texcoord outputs */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
- uint i, j = 0;
- for (i = 0; i < vs->state->num_outputs; i++) {
- if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
- wide->texcoord_slot[j] = i;
- wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j];
- j++;
- }
- }
- wide->num_texcoords = j;
- }
-
- wide->psize_slot = -1;
-
- if (draw->rasterizer->point_size_per_vertex) {
- /* find PSIZ vertex output */
- const struct draw_vertex_shader *vs = draw->vertex_shader;
- uint i;
- for (i = 0; i < vs->state->num_outputs; i++) {
- if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
- wide->psize_slot = i;
- break;
- }
- }
- }
-
- stage->point( stage, header );
-}
-
-
-
-static void wide_first_line( struct draw_stage *stage,
- struct prim_header *header )
-{
- struct wide_stage *wide = wide_stage(stage);
- struct draw_context *draw = stage->draw;
-
- wide->half_line_width = 0.5f * draw->rasterizer->line_width;
-
- if (draw->rasterizer->line_width != 1.0) {
- if (draw->rasterizer->line_smooth)
- wide->stage.line = wide_line_aa;
- else
- wide->stage.line = wide_line;
- }
- else {
- wide->stage.line = passthrough_line;
- }
-
- stage->line( stage, header );
-}
-
-
-static void wide_flush( struct draw_stage *stage, unsigned flags )
-{
- stage->line = wide_first_line;
- stage->point = wide_first_point;
- stage->next->flush( stage->next, flags );
-}
-
-
-static void wide_reset_stipple_counter( struct draw_stage *stage )
-{
- stage->next->reset_stipple_counter( stage->next );
-}
-
-
-static void wide_destroy( struct draw_stage *stage )
-{
- draw_free_temp_verts( stage );
- FREE( stage );
-}
-
-
-struct draw_stage *draw_wide_stage( struct draw_context *draw )
-{
- struct wide_stage *wide = CALLOC_STRUCT(wide_stage);
-
- draw_alloc_temp_verts( &wide->stage, 4 );
-
- wide->stage.draw = draw;
- wide->stage.next = NULL;
- wide->stage.point = wide_first_point;
- wide->stage.line = wide_first_line;
- wide->stage.tri = passthrough_tri;
- wide->stage.flush = wide_flush;
- wide->stage.reset_stipple_counter = wide_reset_stipple_counter;
- wide->stage.destroy = wide_destroy;
-
- return &wide->stage;
-}
+++ /dev/null
-# -*-makefile-*-
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = gallivm
-
-
-GALLIVM_SOURCES = \
- gallivm.cpp \
- gallivm_cpu.cpp \
- instructions.cpp \
- loweringpass.cpp \
- tgsitollvm.cpp \
- storage.cpp \
- storagesoa.cpp \
- instructionssoa.cpp
-
-INC_SOURCES = gallivm_builtins.cpp
-
-CPP_SOURCES = \
- $(GALLIVM_SOURCES)
-
-C_SOURCES =
-ASM_SOURCES =
-
-OBJECTS = $(C_SOURCES:.c=.o) \
- $(CPP_SOURCES:.cpp=.o) \
- $(ASM_SOURCES:.S=.o)
-
-### Include directories
-INCLUDES = \
- -I. \
- -I$(TOP)/src/gallium/drivers
- -I$(TOP)/src/gallium/aux \
- -I$(TOP)/src/gallium/include \
- -I$(TOP)/src/mesa \
- -I$(TOP)/include
-
-
-##### RULES #####
-
-.c.o:
- $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.cpp.o:
- $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-.S.o:
- $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
-
-##### TARGETS #####
-
-default:: depend symlinks $(LIBNAME)
-
-
-$(LIBNAME): $(OBJECTS) Makefile
- $(TOP)/bin/mklib -o $@ -static $(OBJECTS)
-
-
-depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
- rm -f depend
- touch depend
- $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \
- $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null
-
-
-gallivm_builtins.cpp: llvm_builtins.c
- clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins
-
-
-# Emacs tags
-tags:
- etags `find . -name \*.[ch]` `find ../include`
-
-
-# Remove .o and backup files
-clean:
- -rm -f *.o */*.o *~ *.so *~ server/*.o
- -rm -f depend depend.bak
- -rm -f gallivm_builtins.cpp
-
-symlinks:
-
-
-include depend
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-#ifdef MESA_LLVM
-
-#include "gallivm.h"
-#include "gallivm_p.h"
-
-#include "instructions.h"
-#include "loweringpass.h"
-#include "storage.h"
-#include "tgsitollvm.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_shader_tokens.h"
-
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_dump.h"
-
-#include <llvm/Module.h>
-#include <llvm/CallingConv.h>
-#include <llvm/Constants.h>
-#include <llvm/DerivedTypes.h>
-#include <llvm/Instructions.h>
-#include <llvm/ModuleProvider.h>
-#include <llvm/Pass.h>
-#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
-#include <llvm/Support/PatternMatch.h>
-#include <llvm/ExecutionEngine/JIT.h>
-#include <llvm/ExecutionEngine/Interpreter.h>
-#include <llvm/ExecutionEngine/GenericValue.h>
-#include <llvm/Support/MemoryBuffer.h>
-#include <llvm/LinkAllPasses.h>
-#include <llvm/Analysis/Verifier.h>
-#include <llvm/Analysis/LoopPass.h>
-#include <llvm/Target/TargetData.h>
-#include <llvm/Bitcode/ReaderWriter.h>
-#include <llvm/Transforms/Utils/Cloning.h>
-
-#include <sstream>
-#include <fstream>
-#include <iostream>
-
-static int GLOBAL_ID = 0;
-
-using namespace llvm;
-
-static inline
-void AddStandardCompilePasses(PassManager &PM)
-{
- PM.add(new LoweringPass());
- PM.add(createVerifierPass()); // Verify that input is correct
-
- PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp
-
- //PM.add(createStripSymbolsPass(true));
-
- PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst
- PM.add(createCFGSimplificationPass()); // Clean up disgusting code
- PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas
- PM.add(createGlobalOptimizerPass()); // Optimize out global vars
- PM.add(createGlobalDCEPass()); // Remove unused fns and globs
- PM.add(createIPConstantPropagationPass());// IP Constant Propagation
- PM.add(createDeadArgEliminationPass()); // Dead argument elimination
- PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
- PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
-
- PM.add(createPruneEHPass()); // Remove dead EH info
-
- PM.add(createFunctionInliningPass()); // Inline small functions
- PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
-
- PM.add(createTailDuplicationPass()); // Simplify cfg by copying code
- PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
- PM.add(createCFGSimplificationPass()); // Merge & remove BBs
- PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas
- PM.add(createInstructionCombiningPass()); // Combine silly seq's
- PM.add(createCondPropagationPass()); // Propagate conditionals
-
- PM.add(createTailCallEliminationPass()); // Eliminate tail calls
- PM.add(createCFGSimplificationPass()); // Merge & remove BBs
- PM.add(createReassociatePass()); // Reassociate expressions
- PM.add(createLoopRotatePass());
- PM.add(createLICMPass()); // Hoist loop invariants
- PM.add(createLoopUnswitchPass()); // Unswitch loops.
- PM.add(createLoopIndexSplitPass()); // Index split loops.
- PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc
- PM.add(createIndVarSimplifyPass()); // Canonicalize indvars
- PM.add(createLoopUnrollPass()); // Unroll small loops
- PM.add(createInstructionCombiningPass()); // Clean up after the unroller
- PM.add(createGVNPass()); // Remove redundancies
- PM.add(createSCCPPass()); // Constant prop with SCCP
-
- // Run instcombine after redundancy elimination to exploit opportunities
- // opened up by them.
- PM.add(createInstructionCombiningPass());
- PM.add(createCondPropagationPass()); // Propagate conditionals
-
- PM.add(createDeadStoreEliminationPass()); // Delete dead stores
- PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE'
- PM.add(createCFGSimplificationPass()); // Merge & remove BBs
- PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
- PM.add(createDeadTypeEliminationPass()); // Eliminate dead types
- PM.add(createConstantMergePass()); // Merge dup global constants
-}
-
-void gallivm_prog_delete(struct gallivm_prog *prog)
-{
- delete prog->module;
- prog->module = 0;
- prog->function = 0;
- free(prog);
-}
-
-static inline void
-constant_interpolation(float (*inputs)[16][4],
- const struct tgsi_interp_coef *coefs,
- unsigned attrib,
- unsigned chan)
-{
- unsigned i;
-
- for (i = 0; i < QUAD_SIZE; ++i) {
- inputs[i][attrib][chan] = coefs[attrib].a0[chan];
- }
-}
-
-static inline void
-linear_interpolation(float (*inputs)[16][4],
- const struct tgsi_interp_coef *coefs,
- unsigned attrib,
- unsigned chan)
-{
- unsigned i;
-
- for( i = 0; i < QUAD_SIZE; i++ ) {
- const float x = inputs[i][0][0];
- const float y = inputs[i][0][1];
-
- inputs[i][attrib][chan] =
- coefs[attrib].a0[chan] +
- coefs[attrib].dadx[chan] * x +
- coefs[attrib].dady[chan] * y;
- }
-}
-
-static inline void
-perspective_interpolation(float (*inputs)[16][4],
- const struct tgsi_interp_coef *coefs,
- unsigned attrib,
- unsigned chan )
-{
- unsigned i;
-
- for( i = 0; i < QUAD_SIZE; i++ ) {
- const float x = inputs[i][0][0];
- const float y = inputs[i][0][1];
- /* WPOS.w here is really 1/w */
- const float w = 1.0f / inputs[i][0][3];
- assert(inputs[i][0][3] != 0.0);
-
- inputs[i][attrib][chan] =
- (coefs[attrib].a0[chan] +
- coefs[attrib].dadx[chan] * x +
- coefs[attrib].dady[chan] * y) * w;
- }
-}
-
-void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix)
-{
- if (!ir || !ir->module)
- return;
-
- if (file_prefix) {
- std::ostringstream stream;
- stream << file_prefix;
- stream << ir->id;
- stream << ".ll";
- std::string name = stream.str();
- std::ofstream out(name.c_str());
- if (!out) {
- std::cerr<<"Can't open file : "<<stream.str()<<std::endl;;
- return;
- }
- out << (*ir->module);
- out.close();
- } else {
- const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList();
- llvm::Module::FunctionListType::const_iterator itr;
- std::cout<<"; ---------- Start shader "<<ir->id<<std::endl;
- for (itr = funcs.begin(); itr != funcs.end(); ++itr) {
- const llvm::Function &func = (*itr);
- std::string name = func.getName();
- const llvm::Function *found = 0;
- if (name.find("vs_shader") != std::string::npos ||
- name.find("fs_shader") != std::string::npos ||
- name.find("function") != std::string::npos)
- found = &func;
- if (found) {
- std::cout<<*found<<std::endl;
- }
- }
- std::cout<<"; ---------- End shader "<<ir->id<<std::endl;
- }
-}
-
-
-void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
- float (*inputs)[16][4],
- const struct tgsi_interp_coef *coef)
-{
- for (int i = 0; i < prog->num_interp; ++i) {
- const gallivm_interpolate &interp = prog->interpolators[i];
- switch (interp.type) {
- case TGSI_INTERPOLATE_CONSTANT:
- constant_interpolation(inputs, coef, interp.attrib, interp.chan);
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- linear_interpolation(inputs, coef, interp.attrib, interp.chan);
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- perspective_interpolation(inputs, coef, interp.attrib, interp.chan);
- break;
-
- default:
- assert( 0 );
- }
- }
-}
-
-
-struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type)
-{
- struct gallivm_ir *ir =
- (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir));
- ++GLOBAL_ID;
- ir->id = GLOBAL_ID;
- ir->type = type;
-
- return ir;
-}
-
-void gallivm_ir_set_layout(struct gallivm_ir *ir,
- enum gallivm_vector_layout layout)
-{
- ir->layout = layout;
-}
-
-void gallivm_ir_set_components(struct gallivm_ir *ir, int num)
-{
- ir->num_components = num;
-}
-
-void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
- const struct tgsi_token *tokens)
-{
- std::cout << "Creating llvm from: " <<std::endl;
- tgsi_dump(tokens, 0);
-
-
- llvm::Module *mod = tgsi_to_llvmir(ir, tokens);
-
- //llvm::Module *mod = tgsi_to_llvm(ir, tokens);
- ir->module = mod;
- gallivm_ir_dump(ir, 0);
-}
-
-void gallivm_ir_delete(struct gallivm_ir *ir)
-{
- delete ir->module;
- free(ir);
-}
-
-struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
-{
- struct gallivm_prog *prog =
- (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
- llvm::Module *mod = llvm::CloneModule(ir->module);
- prog->num_consts = ir->num_consts;
- memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
- prog->num_interp = ir->num_interp;
-
- /* Run optimization passes over it */
- PassManager passes;
- passes.add(new TargetData(mod));
- AddStandardCompilePasses(passes);
- passes.run(*mod);
- prog->module = mod;
-
- std::cout << "After optimizations:"<<std::endl;
- mod->dump();
-
- return prog;
-}
-
-#endif /* MESA_LLVM */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-
-#ifndef GALLIVM_H
-#define GALLIVM_H
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-#include "pipe/p_state.h"
-
-#ifdef MESA_LLVM
-
-struct tgsi_token;
-
-struct gallivm_ir;
-struct gallivm_prog;
-struct gallivm_cpu_engine;
-struct tgsi_interp_coef;
-struct tgsi_sampler;
-struct tgsi_exec_vector;
-
-enum gallivm_shader_type {
- GALLIVM_VS,
- GALLIVM_FS
-};
-
-enum gallivm_vector_layout {
- GALLIVM_AOS,
- GALLIVM_SOA
-};
-
-struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type);
-void gallivm_ir_set_layout(struct gallivm_ir *ir,
- enum gallivm_vector_layout layout);
-void gallivm_ir_set_components(struct gallivm_ir *ir, int num);
-void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
- const struct tgsi_token *tokens);
-void gallivm_ir_delete(struct gallivm_ir *ir);
-
-
-struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir);
-
-void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
- float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
- const struct tgsi_interp_coef *coefs);
-void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix);
-
-
-struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog);
-struct gallivm_cpu_engine *gallivm_global_cpu_engine();
-int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
- struct tgsi_exec_vector *inputs,
- struct tgsi_exec_vector *dests,
- float (*consts)[4],
- struct tgsi_exec_vector *temps);
-int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
- float x, float y,
- float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
- float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
- float (*consts)[4],
- struct tgsi_sampler *samplers);
-void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog);
-void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee);
-
-
-#endif /* MESA_LLVM */
-
-#if defined __cplusplus
-} // extern "C"
-#endif
-
-#endif
+++ /dev/null
-// Generated by llvm2cpp - DO NOT MODIFY!
-
-
-Module* createGallivmBuiltins(Module *mod) {
-
-mod->setModuleIdentifier("shader");
-
-// Type Definitions
-ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25);
-
-PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0);
-
-std::vector<const Type*>FuncTy_2_args;
-FuncTy_2_args.push_back(Type::FloatTy);
-FuncTy_2_args.push_back(Type::FloatTy);
-FunctionType* FuncTy_2 = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/FuncTy_2_args,
- /*isVarArg=*/false);
-
-PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0);
-
-VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4);
-
-std::vector<const Type*>FuncTy_5_args;
-FuncTy_5_args.push_back(VectorTy_4);
-FunctionType* FuncTy_5 = FunctionType::get(
- /*Result=*/VectorTy_4,
- /*Params=*/FuncTy_5_args,
- /*isVarArg=*/false);
-
-std::vector<const Type*>FuncTy_6_args;
-FuncTy_6_args.push_back(VectorTy_4);
-FuncTy_6_args.push_back(VectorTy_4);
-FuncTy_6_args.push_back(VectorTy_4);
-FunctionType* FuncTy_6 = FunctionType::get(
- /*Result=*/VectorTy_4,
- /*Params=*/FuncTy_6_args,
- /*isVarArg=*/false);
-
-VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4);
-
-std::vector<const Type*>FuncTy_9_args;
-FunctionType* FuncTy_9 = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/FuncTy_9_args,
- /*isVarArg=*/true);
-
-PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0);
-
-PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0);
-
-std::vector<const Type*>FuncTy_12_args;
-FuncTy_12_args.push_back(Type::FloatTy);
-FunctionType* FuncTy_12 = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/FuncTy_12_args,
- /*isVarArg=*/false);
-
-PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0);
-
-std::vector<const Type*>FuncTy_13_args;
-FuncTy_13_args.push_back(VectorTy_4);
-FunctionType* FuncTy_13 = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/FuncTy_13_args,
- /*isVarArg=*/false);
-
-
-// Function Declarations
-
-Function* func_approx = new Function(
- /*Type=*/FuncTy_2,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"approx", mod);
-func_approx->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_approx_PAL = 0;
-func_approx->setParamAttrs(func_approx_PAL);
-
-Function* func_powf = new Function(
- /*Type=*/FuncTy_2,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"powf", mod); // (external, no body)
-func_powf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_powf_PAL = 0;
-func_powf->setParamAttrs(func_powf_PAL);
-
-Function* func_lit = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"lit", mod);
-func_lit->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_lit_PAL = 0;
-func_lit->setParamAttrs(func_lit_PAL);
-
-Function* func_cmp = new Function(
- /*Type=*/FuncTy_6,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"cmp", mod);
-func_cmp->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_cmp_PAL = 0;
-{
- ParamAttrsVector Attrs;
- ParamAttrsWithIndex PAWI;
- PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
- Attrs.push_back(PAWI);
- func_cmp_PAL = ParamAttrsList::get(Attrs);
-
-}
-func_cmp->setParamAttrs(func_cmp_PAL);
-
-Function* func_vcos = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"vcos", mod);
-func_vcos->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_vcos_PAL = 0;
-func_vcos->setParamAttrs(func_vcos_PAL);
-
-Function* func_printf = new Function(
- /*Type=*/FuncTy_9,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"printf", mod); // (external, no body)
-func_printf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_printf_PAL = 0;
-func_printf->setParamAttrs(func_printf_PAL);
-
-Function* func_cosf = new Function(
- /*Type=*/FuncTy_12,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"cosf", mod); // (external, no body)
-func_cosf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_cosf_PAL = 0;
-func_cosf->setParamAttrs(func_cosf_PAL);
-
-Function* func_scs = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"scs", mod);
-func_scs->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_scs_PAL = 0;
-func_scs->setParamAttrs(func_scs_PAL);
-
-Function* func_sinf = new Function(
- /*Type=*/FuncTy_12,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"sinf", mod); // (external, no body)
-func_sinf->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_sinf_PAL = 0;
-func_sinf->setParamAttrs(func_sinf_PAL);
-
-Function* func_vsin = new Function(
- /*Type=*/FuncTy_5,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"vsin", mod);
-func_vsin->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_vsin_PAL = 0;
-func_vsin->setParamAttrs(func_vsin_PAL);
-
-Function* func_kilp = new Function(
- /*Type=*/FuncTy_13,
- /*Linkage=*/GlobalValue::WeakLinkage,
- /*Name=*/"kilp", mod);
-func_kilp->setCallingConv(CallingConv::C);
-const ParamAttrsList *func_kilp_PAL = 0;
-{
- ParamAttrsVector Attrs;
- ParamAttrsWithIndex PAWI;
- PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
- Attrs.push_back(PAWI);
- func_kilp_PAL = ParamAttrsList::get(Attrs);
-
-}
-func_kilp->setParamAttrs(func_kilp_PAL);
-
-// Global Variable Declarations
-
-
-GlobalVariable* gvar_array__str = new GlobalVariable(
-/*Type=*/ArrayTy_0,
-/*isConstant=*/true,
-/*Linkage=*/GlobalValue::InternalLinkage,
-/*Initializer=*/0, // has initializer, specified below
-/*Name=*/".str",
-mod);
-
-GlobalVariable* gvar_array__str1 = new GlobalVariable(
-/*Type=*/ArrayTy_0,
-/*isConstant=*/true,
-/*Linkage=*/GlobalValue::InternalLinkage,
-/*Initializer=*/0, // has initializer, specified below
-/*Name=*/".str1",
-mod);
-
-// Constant Definitions
-Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true);
-Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true);
-ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
-ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
-Constant* const_float_18 = Constant::getNullValue(Type::FloatTy);
-Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32));
-std::vector<Constant*> const_packed_20_elems;
-ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
-const_packed_20_elems.push_back(const_float_21);
-UndefValue* const_float_22 = UndefValue::get(Type::FloatTy);
-const_packed_20_elems.push_back(const_float_22);
-const_packed_20_elems.push_back(const_float_22);
-const_packed_20_elems.push_back(const_float_21);
-Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems);
-ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10));
-ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10));
-ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10));
-std::vector<Constant*> const_packed_26_elems;
-const_packed_26_elems.push_back(const_float_21);
-const_packed_26_elems.push_back(const_float_18);
-const_packed_26_elems.push_back(const_float_18);
-const_packed_26_elems.push_back(const_float_21);
-Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems);
-Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy);
-std::vector<Constant*> const_packed_28_elems;
-const_packed_28_elems.push_back(const_int32_19);
-ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10));
-const_packed_28_elems.push_back(const_int32_29);
-const_packed_28_elems.push_back(const_int32_25);
-const_packed_28_elems.push_back(const_int32_24);
-Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems);
-std::vector<Constant*> const_packed_30_elems;
-const_packed_30_elems.push_back(const_int32_19);
-const_packed_30_elems.push_back(const_int32_23);
-ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10));
-const_packed_30_elems.push_back(const_int32_31);
-const_packed_30_elems.push_back(const_int32_24);
-Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems);
-std::vector<Constant*> const_packed_32_elems;
-const_packed_32_elems.push_back(const_int32_19);
-const_packed_32_elems.push_back(const_int32_23);
-const_packed_32_elems.push_back(const_int32_25);
-ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10));
-const_packed_32_elems.push_back(const_int32_33);
-Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems);
-std::vector<Constant*> const_ptr_34_indices;
-const_ptr_34_indices.push_back(const_int32_19);
-const_ptr_34_indices.push_back(const_int32_19);
-Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() );
-UndefValue* const_packed_35 = UndefValue::get(VectorTy_4);
-std::vector<Constant*> const_ptr_36_indices;
-const_ptr_36_indices.push_back(const_int32_19);
-const_ptr_36_indices.push_back(const_int32_19);
-Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() );
-
-// Global Variable Definitions
-gvar_array__str->setInitializer(const_array_14);
-gvar_array__str1->setInitializer(const_array_15);
-
-// Function Definitions
-
-// Function: approx (func_approx)
-{
- Function::arg_iterator args = func_approx->arg_begin();
- Value* float_a = args++;
- float_a->setName("a");
- Value* float_b = args++;
- float_b->setName("b");
-
- BasicBlock* label_entry = new BasicBlock("entry",func_approx,0);
-
- // Block entry (label_entry)
- FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry);
- SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry);
- FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry);
- SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry);
- FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry);
- SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry);
- std::vector<Value*> float_call_params;
- float_call_params.push_back(float_a_addr_0);
- float_call_params.push_back(float_b_addr_1);
- CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry);
- float_call->setCallingConv(CallingConv::C);
- float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0;
- float_call->setParamAttrs(float_call_PAL);
-
- new ReturnInst(float_call, label_entry);
-
-}
-
-// Function: lit (func_lit)
-{
- Function::arg_iterator args = func_lit->arg_begin();
- Value* packed_tmp = args++;
- packed_tmp->setName("tmp");
-
- BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0);
- BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0);
- BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0);
-
- // Block entry (label_entry_38)
- ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38);
- FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38);
- new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38);
-
- // Block ifthen (label_ifthen)
- InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen);
- ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen);
- ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen);
- std::vector<Value*> float_call_41_params;
- float_call_41_params.push_back(float_tmp12);
- float_call_41_params.push_back(float_tmp14);
- CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen);
- float_call_41->setCallingConv(CallingConv::C);
- float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0;
- float_call_41->setParamAttrs(float_call_41_PAL);
-
- InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen);
- new ReturnInst(packed_tmp16, label_ifthen);
-
- // Block UnifiedReturnBlock (label_UnifiedReturnBlock)
- new ReturnInst(const_packed_26, label_UnifiedReturnBlock);
-
-}
-
-// Function: cmp (func_cmp)
-{
- Function::arg_iterator args = func_cmp->arg_begin();
- Value* packed_tmp0 = args++;
- packed_tmp0->setName("tmp0");
- Value* packed_tmp1 = args++;
- packed_tmp1->setName("tmp1");
- Value* packed_tmp2 = args++;
- packed_tmp2->setName("tmp2");
-
- BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0);
- BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0);
- BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0);
- BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0);
- BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0);
- BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0);
- BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0);
-
- // Block entry (label_entry_44)
- ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44);
- CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44);
- FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44);
- ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44);
- CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44);
- FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44);
- SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44);
- new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44);
-
- // Block cond.?14 (label_cond__14)
- ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14);
- ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14);
- CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14);
- FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14);
- new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14);
-
- // Block cond.cont20 (label_cond_cont20)
- ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20);
- ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20);
- CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20);
- FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20);
- new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20);
-
- // Block cond.?28 (label_cond__28)
- PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28);
- packed_tmp23_reg2mem_0->reserveOperandSpace(2);
- packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14);
- packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20);
-
- ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28);
- ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28);
- CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28);
- FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28);
- new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28);
-
- // Block cond.cont34 (label_cond_cont34)
- PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34);
- packed_tmp23_reg2mem_1->reserveOperandSpace(2);
- packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14);
- packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20);
-
- ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34);
- ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34);
- CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34);
- FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34);
- new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34);
-
- // Block cond.?42 (label_cond__42)
- PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42);
- packed_tmp37_reg2mem_0->reserveOperandSpace(2);
- packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28);
- packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34);
-
- ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42);
- new ReturnInst(packed_tmp5113, label_cond__42);
-
- // Block cond.cont48 (label_cond_cont48)
- PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48);
- packed_tmp37_reg2mem_1->reserveOperandSpace(2);
- packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28);
- packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34);
-
- ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48);
- new ReturnInst(packed_tmp51, label_cond_cont48);
-
-}
-
-// Function: vcos (func_vcos)
-{
- Function::arg_iterator args = func_vcos->arg_begin();
- Value* packed_val = args++;
- packed_val->setName("val");
-
- BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0);
-
- // Block entry (label_entry_53)
- ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53);
- CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53);
- ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53);
- CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53);
- ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53);
- CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53);
- ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53);
- CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53);
- std::vector<Value*> int32_call_params;
- int32_call_params.push_back(const_ptr_34);
- int32_call_params.push_back(double_conv_54);
- int32_call_params.push_back(double_conv4);
- int32_call_params.push_back(double_conv7);
- int32_call_params.push_back(double_conv10);
- CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53);
- int32_call->setCallingConv(CallingConv::C);
- int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0;
- int32_call->setParamAttrs(int32_call_PAL);
-
- CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53);
- float_call13->setCallingConv(CallingConv::C);
- float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0;
- float_call13->setParamAttrs(float_call13_PAL);
-
- InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53);
- CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53);
- float_call18->setCallingConv(CallingConv::C);
- float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0;
- float_call18->setParamAttrs(float_call18_PAL);
-
- InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53);
- CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53);
- float_call23->setCallingConv(CallingConv::C);
- float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0;
- float_call23->setParamAttrs(float_call23_PAL);
-
- InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53);
- CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53);
- float_call28->setCallingConv(CallingConv::C);
- float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0;
- float_call28->setParamAttrs(float_call28_PAL);
-
- InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53);
- CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53);
- CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53);
- CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53);
- CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53);
- std::vector<Value*> int32_call43_params;
- int32_call43_params.push_back(const_ptr_36);
- int32_call43_params.push_back(double_conv33);
- int32_call43_params.push_back(double_conv36);
- int32_call43_params.push_back(double_conv39);
- int32_call43_params.push_back(double_conv42);
- CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53);
- int32_call43->setCallingConv(CallingConv::C);
- int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0;
- int32_call43->setParamAttrs(int32_call43_PAL);
-
- new ReturnInst(packed_tmp30, label_entry_53);
-
-}
-
-// Function: scs (func_scs)
-{
- Function::arg_iterator args = func_scs->arg_begin();
- Value* packed_val_58 = args++;
- packed_val_58->setName("val");
-
- BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0);
-
- // Block entry (label_entry_59)
- ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59);
- CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59);
- float_call_60->setCallingConv(CallingConv::C);
- float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0;
- float_call_60->setParamAttrs(float_call_60_PAL);
-
- InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59);
- CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59);
- float_call7->setCallingConv(CallingConv::C);
- float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0;
- float_call7->setParamAttrs(float_call7_PAL);
-
- InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59);
- new ReturnInst(packed_tmp9, label_entry_59);
-
-}
-
-// Function: vsin (func_vsin)
-{
- Function::arg_iterator args = func_vsin->arg_begin();
- Value* packed_val_62 = args++;
- packed_val_62->setName("val");
-
- BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0);
-
- // Block entry (label_entry_63)
- ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63);
- CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63);
- float_call_65->setCallingConv(CallingConv::C);
- float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0;
- float_call_65->setParamAttrs(float_call_65_PAL);
-
- InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63);
- InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63);
- InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63);
- InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63);
- new ReturnInst(packed_tmp15_67, label_entry_63);
-
-}
-
-// Function: kilp (func_kilp)
-{
- Function::arg_iterator args = func_kilp->arg_begin();
- Value* packed_val_69 = args++;
- packed_val_69->setName("val");
-
- BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0);
- BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0);
- BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0);
- BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0);
- BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0);
-
- // Block entry (label_entry_70)
- ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70);
- FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70);
-
- // Block lor_rhs (label_lor_rhs)
- ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs);
- FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs);
-
- // Block lor_rhs5 (label_lor_rhs5)
- ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5);
- FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5);
- new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5);
-
- // Block lor_rhs11 (label_lor_rhs11)
- ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11);
- FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11);
- CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11);
- new ReturnInst(int32_retval, label_lor_rhs11);
-
- // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71)
- new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71);
-
-}
-
-return mod;
-
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-#ifdef MESA_LLVM
-
-#include "gallivm.h"
-#include "gallivm_p.h"
-
-#include "instructions.h"
-#include "loweringpass.h"
-#include "storage.h"
-#include "tgsitollvm.h"
-
-#include "pipe/p_context.h"
-#include "pipe/p_shader_tokens.h"
-
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_dump.h"
-
-#include <llvm/Module.h>
-#include <llvm/CallingConv.h>
-#include <llvm/Constants.h>
-#include <llvm/DerivedTypes.h>
-#include <llvm/Instructions.h>
-#include <llvm/ModuleProvider.h>
-#include <llvm/Pass.h>
-#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
-#include <llvm/Support/PatternMatch.h>
-#include <llvm/ExecutionEngine/JIT.h>
-#include <llvm/ExecutionEngine/Interpreter.h>
-#include <llvm/ExecutionEngine/GenericValue.h>
-#include <llvm/Support/MemoryBuffer.h>
-#include <llvm/LinkAllPasses.h>
-#include <llvm/Analysis/Verifier.h>
-#include <llvm/Analysis/LoopPass.h>
-#include <llvm/Target/TargetData.h>
-#include <llvm/Bitcode/ReaderWriter.h>
-#include <llvm/Transforms/Utils/Cloning.h>
-
-#include <sstream>
-#include <fstream>
-#include <iostream>
-
-struct gallivm_cpu_engine {
- llvm::ExecutionEngine *engine;
-};
-
-static struct gallivm_cpu_engine *CPU = 0;
-
-typedef int (*fragment_shader_runner)(float x, float y,
- float (*dests)[16][4],
- float (*inputs)[16][4],
- int num_attribs,
- float (*consts)[4], int num_consts,
- struct tgsi_sampler *samplers);
-
-int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
- float fx, float fy,
- float (*dests)[16][4],
- float (*inputs)[16][4],
- float (*consts)[4],
- struct tgsi_sampler *samplers)
-{
- fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
- assert(runner);
-
- return runner(fx, fy, dests, inputs, prog->num_interp,
- consts, prog->num_consts,
- samplers);
-}
-
-static inline llvm::Function *func_for_shader(struct gallivm_prog *prog)
-{
- llvm::Module *mod = prog->module;
- llvm::Function *func = 0;
-
- switch (prog->type) {
- case GALLIVM_VS:
- func = mod->getFunction("vs_shader");
- break;
- case GALLIVM_FS:
- func = mod->getFunction("fs_shader");
- break;
- default:
- assert(!"Unknown shader type!");
- break;
- }
- return func;
-}
-
-/*!
- This function creates a CPU based execution engine for the given gallivm_prog.
- gallivm_cpu_engine should be used as a singleton throughout the library. Before
- executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
- The gallivm_prog instance which is being passed to the constructor is being
- automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
- with it again.
- */
-struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
-{
- struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *)
- calloc(1, sizeof(struct gallivm_cpu_engine));
- llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
- llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
- llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
- ee->DisableLazyCompilation();
- cpu->engine = ee;
-
- llvm::Function *func = func_for_shader(prog);
-
- prog->function = ee->getPointerToFunction(func);
- CPU = cpu;
- return cpu;
-}
-
-
-/*!
- This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
- The reference to the generated machine code entry point will be stored
- in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
- in order to execute the gallivm_prog on the CPU.
- */
-void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog)
-{
- llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
- llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
- llvm::ExecutionEngine *ee = cpu->engine;
- assert(ee);
- /*FIXME : remove */
- ee->DisableLazyCompilation();
- ee->addModuleProvider(mp);
-
- llvm::Function *func = func_for_shader(prog);
- prog->function = ee->getPointerToFunction(func);
-}
-
-void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
-{
- free(cpu);
-}
-
-struct gallivm_cpu_engine * gallivm_global_cpu_engine()
-{
- return CPU;
-}
-
-
-typedef void (*vertex_shader_runner)(void *ainputs,
- void *dests,
- float (*aconsts)[4],
- void *temps);
-
-
-/*!
- This function is used to execute the gallivm_prog in software. Before calling
- this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
- function.
- */
-int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
- struct tgsi_exec_vector *inputs,
- struct tgsi_exec_vector *dests,
- float (*consts)[4],
- struct tgsi_exec_vector *temps)
-{
- vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
- assert(runner);
- /*FIXME*/
- runner(inputs, dests, consts, temps);
-
- return 0;
-}
-
-#endif
+++ /dev/null
-#ifndef GALLIVM_P_H
-#define GALLIVM_P_H
-
-#ifdef MESA_LLVM
-
-#include "gallivm.h"
-#include "pipe/p_shader_tokens.h"
-#include "pipe/p_compiler.h"
-
-namespace llvm {
- class Module;
-}
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-enum gallivm_shader_type;
-enum gallivm_vector_layout;
-
-struct gallivm_interpolate {
- int attrib;
- int chan;
- int type;
-};
-
-struct gallivm_ir {
- llvm::Module *module;
- int id;
- enum gallivm_shader_type type;
- enum gallivm_vector_layout layout;
- int num_components;
- int num_consts;
-
- //FIXME: this might not be enough for some shaders
- struct gallivm_interpolate interpolators[32*4];
- int num_interp;
-};
-
-struct gallivm_prog {
- llvm::Module *module;
- void *function;
-
- int id;
- enum gallivm_shader_type type;
-
- int num_consts;
-
- //FIXME: this might not be enough for some shaders
- struct gallivm_interpolate interpolators[32*4];
- int num_interp;
-};
-
-static INLINE void gallivm_swizzle_components(int swizzle,
- int *xc, int *yc,
- int *zc, int *wc)
-{
- int x = swizzle / 1000; swizzle -= x * 1000;
- int y = swizzle / 100; swizzle -= y * 100;
- int z = swizzle / 10; swizzle -= z * 10;
- int w = swizzle;
-
- if (xc) *xc = x;
- if (yc) *yc = y;
- if (zc) *zc = z;
- if (wc) *wc = w;
-}
-
-static INLINE boolean gallivm_is_swizzle(int swizzle)
-{
- const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 +
- TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W;
- return swizzle != NO_SWIZZLE;
-}
-
-static INLINE int gallivm_x_swizzle(int swizzle)
-{
- int x;
- gallivm_swizzle_components(swizzle, &x, 0, 0, 0);
- return x;
-}
-
-static INLINE int gallivm_y_swizzle(int swizzle)
-{
- int y;
- gallivm_swizzle_components(swizzle, 0, &y, 0, 0);
- return y;
-}
-
-static INLINE int gallivm_z_swizzle(int swizzle)
-{
- int z;
- gallivm_swizzle_components(swizzle, 0, 0, &z, 0);
- return z;
-}
-
-static INLINE int gallivm_w_swizzle(int swizzle)
-{
- int w;
- gallivm_swizzle_components(swizzle, 0, 0, 0, &w);
- return w;
-}
-
-#endif /* MESA_LLVM */
-
-#if defined __cplusplus
-} // extern "C"
-#endif
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-#ifdef MESA_LLVM
-
-#include "instructions.h"
-
-#include "storage.h"
-
-#include <llvm/CallingConv.h>
-#include <llvm/Constants.h>
-#include <llvm/DerivedTypes.h>
-#include <llvm/Function.h>
-#include <llvm/InstrTypes.h>
-#include <llvm/Instructions.h>
-#include <llvm/ParameterAttributes.h>
-
-#include <sstream>
-#include <fstream>
-#include <iostream>
-
-using namespace llvm;
-
-#include "gallivm_builtins.cpp"
-
-static inline std::string createFuncName(int label)
-{
- std::ostringstream stream;
- stream << "function";
- stream << label;
- return stream.str();
-}
-
-Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block,
- Storage *storage)
- : m_mod(mod), m_func(func), m_builder(block), m_idx(0),
- m_storage(storage)
-{
- m_floatVecType = VectorType::get(Type::FloatTy, 4);
-
- m_llvmFSqrt = 0;
- m_llvmFAbs = 0;
- m_llvmPow = 0;
- m_llvmFloor = 0;
- m_llvmFlog = 0;
- m_llvmLit = 0;
- m_fmtPtr = 0;
-
- createGallivmBuiltins(m_mod);
-}
-
-llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2)
-{
- return m_builder.CreateAdd(in1, in2, name("add"));
-}
-
-llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2,
- llvm::Value *in3)
-{
- Value *mulRes = mul(in1, in2);
- return add(mulRes, in3);
-}
-
-llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2)
-{
- return m_builder.CreateMul(in1, in2, name("mul"));
-}
-
-const char * Instructions::name(const char *prefix)
-{
- ++m_idx;
- snprintf(m_name, 32, "%s%d", prefix, m_idx);
- return m_name;
-}
-
-llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2)
-{
- Value *mulRes = mul(in1, in2);
- Value *x = m_builder.CreateExtractElement(mulRes,
- m_storage->constantInt(0),
- name("extractx"));
- Value *y = m_builder.CreateExtractElement(mulRes,
- m_storage->constantInt(1),
- name("extracty"));
- Value *z = m_builder.CreateExtractElement(mulRes,
- m_storage->constantInt(2),
- name("extractz"));
- Value *xy = m_builder.CreateAdd(x, y,name("xy"));
- Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3"));
- return vectorFromVals(dot3, dot3, dot3, dot3);
-}
-
-llvm::Value *Instructions::callFSqrt(llvm::Value *val)
-{
- if (!m_llvmFSqrt) {
- // predeclare the intrinsic
- std::vector<const Type*> fsqrtArgs;
- fsqrtArgs.push_back(Type::FloatTy);
- ParamAttrsList *fsqrtPal = 0;
- FunctionType* fsqrtType = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/fsqrtArgs,
- /*isVarArg=*/false);
- m_llvmFSqrt = new Function(
- /*Type=*/fsqrtType,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"llvm.sqrt.f32", m_mod);
- m_llvmFSqrt->setCallingConv(CallingConv::C);
- m_llvmFSqrt->setParamAttrs(fsqrtPal);
- }
- CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val,
- name("sqrt"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::rsq(llvm::Value *in1)
-{
- Value *x = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(0),
- name("extractx"));
- Value *abs = callFAbs(x);
- Value *sqrt = callFSqrt(abs);
-
- Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy,
- APFloat(1.f)),
- sqrt,
- name("rsqrt"));
- return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt);
-}
-
-llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y,
- llvm::Value *z, llvm::Value *w)
-{
- Constant *const_vec = Constant::getNullValue(m_floatVecType);
- Value *res = m_builder.CreateInsertElement(const_vec, x,
- m_storage->constantInt(0),
- name("vecx"));
- res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1),
- name("vecxy"));
- res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2),
- name("vecxyz"));
- if (w)
- res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3),
- name("vecxyzw"));
- return res;
-}
-
-llvm::Value *Instructions::callFAbs(llvm::Value *val)
-{
- if (!m_llvmFAbs) {
- // predeclare the intrinsic
- std::vector<const Type*> fabsArgs;
- fabsArgs.push_back(Type::FloatTy);
- ParamAttrsList *fabsPal = 0;
- FunctionType* fabsType = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/fabsArgs,
- /*isVarArg=*/false);
- m_llvmFAbs = new Function(
- /*Type=*/fabsType,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"fabs", m_mod);
- m_llvmFAbs->setCallingConv(CallingConv::C);
- m_llvmFAbs->setParamAttrs(fabsPal);
- }
- CallInst *call = m_builder.CreateCall(m_llvmFAbs, val,
- name("fabs"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::lit(llvm::Value *in)
-{
- if (!m_llvmLit) {
- m_llvmLit = m_mod->getFunction("lit");
- }
- CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2)
-{
- Value *res = m_builder.CreateSub(in1, in2, name("sub"));
- return res;
-}
-
-llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2)
-{
- if (!m_llvmPow) {
- // predeclare the intrinsic
- std::vector<const Type*> powArgs;
- powArgs.push_back(Type::FloatTy);
- powArgs.push_back(Type::FloatTy);
- ParamAttrsList *powPal = 0;
- FunctionType* powType = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/powArgs,
- /*isVarArg=*/false);
- m_llvmPow = new Function(
- /*Type=*/powType,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"llvm.pow.f32", m_mod);
- m_llvmPow->setCallingConv(CallingConv::C);
- m_llvmPow->setParamAttrs(powPal);
- }
- std::vector<Value*> params;
- params.push_back(val1);
- params.push_back(val2);
- CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(),
- name("pow"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2)
-{
- Value *x1 = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(0),
- name("x1"));
- Value *x2 = m_builder.CreateExtractElement(in2,
- m_storage->constantInt(0),
- name("x2"));
- llvm::Value *val = callPow(x1, x2);
- return vectorFromVals(val, val, val, val);
-}
-
-llvm::Value * Instructions::rcp(llvm::Value *in1)
-{
- Value *x1 = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(0),
- name("x1"));
- Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy,
- APFloat(1.f)),
- x1, name("rcp"));
- return vectorFromVals(res, res, res, res);
-}
-
-llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2)
-{
- Value *mulRes = mul(in1, in2);
- std::vector<llvm::Value*> vec = extractVector(mulRes);
- Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy"));
- Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz"));
- Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4"));
- return vectorFromVals(dot4, dot4, dot4, dot4);
-}
-
-llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2)
-{
- Value *mulRes = mul(in1, in2);
- std::vector<llvm::Value*> vec1 = extractVector(mulRes);
- Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy"));
- Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz"));
- Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph"));
- return vectorFromVals(dph, dph, dph, dph);
-}
-
-llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2)
-{
- Value *y1 = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(1),
- name("y1"));
- Value *z = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(2),
- name("z"));
- Value *y2 = m_builder.CreateExtractElement(in2,
- m_storage->constantInt(1),
- name("y2"));
- Value *w = m_builder.CreateExtractElement(in2,
- m_storage->constantInt(3),
- name("w"));
- Value *ry = m_builder.CreateMul(y1, y2, name("tyuy"));
- return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)),
- ry, z, w);
-}
-
-llvm::Value * Instructions::ex2(llvm::Value *in)
-{
- llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)),
- m_builder.CreateExtractElement(
- in, m_storage->constantInt(0),
- name("x1")));
- return vectorFromVals(val, val, val, val);
-}
-
-llvm::Value * Instructions::callFloor(llvm::Value *val)
-{
- if (!m_llvmFloor) {
- // predeclare the intrinsic
- std::vector<const Type*> floorArgs;
- floorArgs.push_back(Type::FloatTy);
- ParamAttrsList *floorPal = 0;
- FunctionType* floorType = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/floorArgs,
- /*isVarArg=*/false);
- m_llvmFloor = new Function(
- /*Type=*/floorType,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"floorf", m_mod);
- m_llvmFloor->setCallingConv(CallingConv::C);
- m_llvmFloor->setParamAttrs(floorPal);
- }
- CallInst *call = m_builder.CreateCall(m_llvmFloor, val,
- name("floorf"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::floor(llvm::Value *in)
-{
- std::vector<llvm::Value*> vec = extractVector(in);
- return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]),
- callFloor(vec[2]), callFloor(vec[3]));
-}
-
-llvm::Value * Instructions::arl(llvm::Value *in)
-{
- return floor(in);
-}
-
-llvm::Value * Instructions::frc(llvm::Value *in)
-{
- llvm::Value *flr = floor(in);
- return sub(in, flr);
-}
-
-llvm::Value * Instructions::callFLog(llvm::Value *val)
-{
- if (!m_llvmFlog) {
- // predeclare the intrinsic
- std::vector<const Type*> flogArgs;
- flogArgs.push_back(Type::FloatTy);
- ParamAttrsList *flogPal = 0;
- FunctionType* flogType = FunctionType::get(
- /*Result=*/Type::FloatTy,
- /*Params=*/flogArgs,
- /*isVarArg=*/false);
- m_llvmFlog = new Function(
- /*Type=*/flogType,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"logf", m_mod);
- m_llvmFlog->setCallingConv(CallingConv::C);
- m_llvmFlog->setParamAttrs(flogPal);
- }
- CallInst *call = m_builder.CreateCall(m_llvmFlog, val,
- name("logf"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::lg2(llvm::Value *in)
-{
- std::vector<llvm::Value*> vec = extractVector(in);
- llvm::Value *const_vec = constVector(1.442695f, 1.442695f,
- 1.442695f, 1.442695f);
- return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]),
- callFLog(vec[2]), callFLog(vec[3])), const_vec);
-}
-
-llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2)
-{
- std::vector<llvm::Value*> vec1 = extractVector(in1);
- std::vector<llvm::Value*> vec2 = extractVector(in2);
-
- Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp"));
- Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0],
- name("selx"));
-
- Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp"));
- Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1],
- name("sely"));
-
- Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp"));
- Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2],
- name("selz"));
-
- Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp"));
- Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3],
- name("selw"));
-
- return vectorFromVals(selx, sely, selz, selw);
-}
-
-llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2)
-{
- std::vector<llvm::Value*> vec1 = extractVector(in1);
- std::vector<llvm::Value*> vec2 = extractVector(in2);
-
- Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0],
- name("xcmp"));
- Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0],
- name("selx"));
-
- Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1],
- name("ycmp"));
- Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1],
- name("sely"));
-
- Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2],
- name("zcmp"));
- Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2],
- name("selz"));
-
- Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3],
- name("wcmp"));
- Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3],
- name("selw"));
-
- return vectorFromVals(selx, sely, selz, selw);
-}
-
-void Instructions::printVector(llvm::Value *val)
-{
- static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A";
-
- if (!m_fmtPtr) {
- Constant *format = ConstantArray::get(frmt, true);
- ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1);
- GlobalVariable* globalFormat = new GlobalVariable(
- /*Type=*/arrayTy,
- /*isConstant=*/true,
- /*Linkage=*/GlobalValue::InternalLinkage,
- /*Initializer=*/0, // has initializer, specified below
- /*Name=*/name(".str"),
- m_mod);
- globalFormat->setInitializer(format);
-
- Constant* const_int0 = Constant::getNullValue(IntegerType::get(32));
- std::vector<Constant*> const_ptr_21_indices;
- const_ptr_21_indices.push_back(const_int0);
- const_ptr_21_indices.push_back(const_int0);
- m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat,
- &const_ptr_21_indices[0], const_ptr_21_indices.size());
- }
-
- Function *func_printf = m_mod->getFunction("printf");
- if (!func_printf)
- func_printf = declarePrintf();
- assert(func_printf);
- std::vector<llvm::Value*> vec = extractVector(val);
- Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx"));
- Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy"));
- Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz"));
- Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw"));
- std::vector<Value*> params;
- params.push_back(m_fmtPtr);
- params.push_back(dx);
- params.push_back(dy);
- params.push_back(dz);
- params.push_back(dw);
- CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(),
- name("printf"));
- call->setCallingConv(CallingConv::C);
- call->setTailCall(true);
-}
-
-llvm::Function * Instructions::declarePrintf()
-{
- std::vector<const Type*> args;
- ParamAttrsList *params = 0;
- FunctionType* funcTy = FunctionType::get(
- /*Result=*/IntegerType::get(32),
- /*Params=*/args,
- /*isVarArg=*/true);
- Function* func_printf = new Function(
- /*Type=*/funcTy,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/"printf", m_mod);
- func_printf->setCallingConv(CallingConv::C);
- func_printf->setParamAttrs(params);
- return func_printf;
-}
-
-
-llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2)
-{
- Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
- Constant *const0f = Constant::getNullValue(Type::FloatTy);
-
- std::vector<llvm::Value*> vec1 = extractVector(in1);
- std::vector<llvm::Value*> vec2 = extractVector(in2);
- Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp"));
- Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
-
- Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp"));
- Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
-
- Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp"));
- Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
-
- Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp"));
- Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
-
- return vectorFromVals(x, y, z, w);
-}
-llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2)
-{
- Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
- Constant *const0f = Constant::getNullValue(Type::FloatTy);
-
- std::vector<llvm::Value*> vec1 = extractVector(in1);
- std::vector<llvm::Value*> vec2 = extractVector(in2);
-
- Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp"));
- Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
-
- Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp"));
- Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
-
- Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp"));
- Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
-
- Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp"));
- Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
-
- return vectorFromVals(x, y, z, w);
-}
-
-
-llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2)
-{
- Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
- Constant *const0f = Constant::getNullValue(Type::FloatTy);
-
- std::vector<llvm::Value*> vec1 = extractVector(in1);
- std::vector<llvm::Value*> vec2 = extractVector(in2);
-
- Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp"));
- Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
-
- Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp"));
- Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
-
- Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp"));
- Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
-
- Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp"));
- Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
-
- return vectorFromVals(x, y, z, w);
-}
-
-llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2)
-{
- Value *x1 = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(0),
- name("x1"));
- Value *y1 = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(1),
- name("y1"));
- Value *z1 = m_builder.CreateExtractElement(in1,
- m_storage->constantInt(2),
- name("z1"));
-
- Value *x2 = m_builder.CreateExtractElement(in2,
- m_storage->constantInt(0),
- name("x2"));
- Value *y2 = m_builder.CreateExtractElement(in2,
- m_storage->constantInt(1),
- name("y2"));
- Value *z2 = m_builder.CreateExtractElement(in2,
- m_storage->constantInt(2),
- name("z2"));
- Value *y1z2 = mul(y1, z2);
- Value *z1y2 = mul(z1, y2);
-
- Value *z1x2 = mul(z1, x2);
- Value *x1z2 = mul(x1, z2);
-
- Value *x1y2 = mul(x1, y2);
- Value *y1x2 = mul(y1, x2);
-
- return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2));
-}
-
-
-llvm::Value * Instructions::abs(llvm::Value *in)
-{
- std::vector<llvm::Value*> vec = extractVector(in);
- Value *xabs = callFAbs(vec[0]);
- Value *yabs = callFAbs(vec[1]);
- Value *zabs = callFAbs(vec[2]);
- Value *wabs = callFAbs(vec[3]);
- return vectorFromVals(xabs, yabs, zabs, wabs);
-}
-
-void Instructions::ifop(llvm::Value *in)
-{
- BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0);
- BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0);
-
- //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0);
- //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0);
- //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0);
-
- Constant *float0 = Constant::getNullValue(Type::FloatTy);
-
- Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0),
- name("extractx"));
- Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp"));
- m_builder.CreateCondBr(xcmp, ifthen, ifend);
- //m_builder.SetInsertPoint(yblock);
-
- m_builder.SetInsertPoint(ifthen);
- m_ifStack.push(ifend);
-}
-
-llvm::BasicBlock * Instructions::currentBlock() const
-{
- return m_builder.GetInsertBlock();
-}
-
-void Instructions::elseop()
-{
- assert(!m_ifStack.empty());
- BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0);
- m_builder.CreateBr(ifend);
- m_builder.SetInsertPoint(m_ifStack.top());
- currentBlock()->setName(name("ifelse"));
- m_ifStack.pop();
- m_ifStack.push(ifend);
-}
-
-void Instructions::endif()
-{
- assert(!m_ifStack.empty());
- m_builder.CreateBr(m_ifStack.top());
- m_builder.SetInsertPoint(m_ifStack.top());
- m_ifStack.pop();
-}
-
-llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2,
- llvm::Value *in3)
-{
- llvm::Value *m = mul(in1, in2);
- llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f);
- llvm::Value *s = sub(vec1, in1);
- return add(m, mul(s, in3));
-}
-
-void Instructions::beginLoop()
-{
- BasicBlock *begin = new BasicBlock(name("loop"), m_func,0);
- BasicBlock *end = new BasicBlock(name("endloop"), m_func,0);
-
- m_builder.CreateBr(begin);
- Loop loop;
- loop.begin = begin;
- loop.end = end;
- m_builder.SetInsertPoint(begin);
- m_loopStack.push(loop);
-}
-
-void Instructions::endLoop()
-{
- assert(!m_loopStack.empty());
- Loop loop = m_loopStack.top();
- m_builder.CreateBr(loop.begin);
- loop.end->moveAfter(currentBlock());
- m_builder.SetInsertPoint(loop.end);
- m_loopStack.pop();
-}
-
-void Instructions::brk()
-{
- assert(!m_loopStack.empty());
- BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0);
- m_builder.CreateBr(m_loopStack.top().end);
- m_builder.SetInsertPoint(unr);
-}
-
-llvm::Value * Instructions::trunc(llvm::Value *in)
-{
- std::vector<llvm::Value*> vec = extractVector(in);
- Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32),
- name("ftoix"));
- Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32),
- name("ftoiy"));
- Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32),
- name("ftoiz"));
- Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32),
- name("ftoiw"));
- Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy,
- name("fx"));
- Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy,
- name("fy"));
- Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy,
- name("fz"));
- Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy,
- name("fw"));
- return vectorFromVals(fx, fy, fz, fw);
-}
-
-void Instructions::end()
-{
- m_builder.CreateRetVoid();
-}
-
-void Instructions::cal(int label, llvm::Value *input)
-{
- std::vector<Value*> params;
- params.push_back(input);
- llvm::Function *func = findFunction(label);
-
- m_builder.CreateCall(func, params.begin(), params.end());
-}
-
-llvm::Function * Instructions::declareFunc(int label)
-{
- PointerType *vecPtr = PointerType::getUnqual(m_floatVecType);
- std::vector<const Type*> args;
- args.push_back(vecPtr);
- args.push_back(vecPtr);
- args.push_back(vecPtr);
- args.push_back(vecPtr);
- ParamAttrsList *params = 0;
- FunctionType *funcType = FunctionType::get(
- /*Result=*/Type::VoidTy,
- /*Params=*/args,
- /*isVarArg=*/false);
- std::string name = createFuncName(label);
- Function *func = new Function(
- /*Type=*/funcType,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Name=*/name.c_str(), m_mod);
- func->setCallingConv(CallingConv::C);
- func->setParamAttrs(params);
- return func;
-}
-
-void Instructions::bgnSub(unsigned label)
-{
- llvm::Function *func = findFunction(label);
-
- Function::arg_iterator args = func->arg_begin();
- Value *ptr_INPUT = args++;
- ptr_INPUT->setName("INPUT");
- m_storage->pushArguments(ptr_INPUT);
-
- llvm::BasicBlock *entry = new BasicBlock("entry", func, 0);
-
- m_func = func;
- m_builder.SetInsertPoint(entry);
-}
-
-void Instructions::endSub()
-{
- m_func = 0;
- m_builder.SetInsertPoint(0);
-}
-
-llvm::Function * Instructions::findFunction(int label)
-{
- llvm::Function *func = m_functions[label];
- if (!func) {
- func = declareFunc(label);
- m_functions[label] = func;
- }
- return func;
-}
-
-llvm::Value * Instructions::constVector(float x, float y, float z, float w)
-{
- std::vector<Constant*> vec(4);
- vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x));
- vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y));
- vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z));
- vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w));
- return ConstantVector::get(m_floatVecType, vec);
-}
-
-
-std::vector<llvm::Value*> Instructions::extractVector(llvm::Value *vec)
-{
- std::vector<llvm::Value*> elems(4);
- elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0),
- name("x"));
- elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1),
- name("y"));
- elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2),
- name("z"));
- elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3),
- name("w"));
- return elems;
-}
-
-llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
-{
- llvm::Function *func = m_mod->getFunction("cmp");
- assert(func);
-
- std::vector<Value*> params;
- params.push_back(in1);
- params.push_back(in2);
- params.push_back(in3);
- CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres"));
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::cos(llvm::Value *in)
-{
-#if 0
- llvm::Function *func = m_mod->getFunction("vcos");
- assert(func);
-
- CallInst *call = m_builder.CreateCall(func, in, name("cosres"));
- call->setTailCall(false);
- return call;
-#else
- std::vector<llvm::Value*> elems = extractVector(in);
- Function *func = m_mod->getFunction("cosf");
- assert(func);
- CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres"));
- cos->setCallingConv(CallingConv::C);
- cos->setTailCall(true);
- return vectorFromVals(cos, cos, cos, cos);
-#endif
-}
-
-llvm::Value * Instructions::scs(llvm::Value *in)
-{
- llvm::Function *func = m_mod->getFunction("scs");
- assert(func);
-
- CallInst *call = m_builder.CreateCall(func, in, name("scsres"));
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::kilp(llvm::Value *in)
-{
- llvm::Function *func = m_mod->getFunction("kilp");
- assert(func);
-
- CallInst *call = m_builder.CreateCall(func, in, name("kilpres"));
- call->setTailCall(false);
- return call;
-}
-
-llvm::Value * Instructions::sin(llvm::Value *in)
-{
- llvm::Function *func = m_mod->getFunction("vsin");
- assert(func);
-
- CallInst *call = m_builder.CreateCall(func, in, name("sinres"));
- call->setTailCall(false);
- return call;
-}
-#endif //MESA_LLVM
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-
-#ifndef INSTRUCTIONS_H
-#define INSTRUCTIONS_H
-
-#include <llvm/BasicBlock.h>
-#include <llvm/Module.h>
-#include <llvm/Value.h>
-#include <llvm/Support/LLVMBuilder.h>
-
-#include <map>
-#include <stack>
-
-namespace llvm {
- class VectorType;
- class Function;
-}
-
-class Storage;
-
-class Instructions
-{
-public:
- Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block,
- Storage *storage);
-
- llvm::BasicBlock *currentBlock() const;
-
- llvm::Value *abs(llvm::Value *in1);
- llvm::Value *arl(llvm::Value *in1);
- llvm::Value *add(llvm::Value *in1, llvm::Value *in2);
- void beginLoop();
- void bgnSub(unsigned);
- void brk();
- void cal(int label, llvm::Value *input);
- llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
- llvm::Value *cos(llvm::Value *in);
- llvm::Value *cross(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *dph(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *dst(llvm::Value *in1, llvm::Value *in2);
- void elseop();
- void endif();
- void endLoop();
- void end();
- void endSub();
- llvm::Value *ex2(llvm::Value *in);
- llvm::Value *floor(llvm::Value *in);
- llvm::Value *frc(llvm::Value *in);
- void ifop(llvm::Value *in);
- llvm::Value *kilp(llvm::Value *in);
- llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2,
- llvm::Value *in3);
- llvm::Value *lit(llvm::Value *in);
- llvm::Value *lg2(llvm::Value *in);
- llvm::Value *madd(llvm::Value *in1, llvm::Value *in2,
- llvm::Value *in2);
- llvm::Value *min(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *max(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *mul(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *pow(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *rcp(llvm::Value *in);
- llvm::Value *rsq(llvm::Value *in);
- llvm::Value *scs(llvm::Value *in);
- llvm::Value *sge(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *sin(llvm::Value *in);
- llvm::Value *slt(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *sub(llvm::Value *in1, llvm::Value *in2);
- llvm::Value *trunc(llvm::Value *in);
-
- void printVector(llvm::Value *val);
-private:
- const char *name(const char *prefix);
-
- llvm::Value *callFAbs(llvm::Value *val);
- llvm::Value *callFloor(llvm::Value *val);
- llvm::Value *callFSqrt(llvm::Value *val);
- llvm::Value *callFLog(llvm::Value *val);
- llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2);
-
- llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
- llvm::Value *z, llvm::Value *w=0);
-
- llvm::Value *constVector(float x, float y, float z, float w);
-
- llvm::Function *declarePrintf();
- llvm::Function *declareFunc(int label);
-
- llvm::Function *findFunction(int label);
-
- std::vector<llvm::Value*> extractVector(llvm::Value *vec);
-private:
- llvm::Module *m_mod;
- llvm::Function *m_func;
- char m_name[32];
- llvm::LLVMFoldingBuilder m_builder;
- int m_idx;
-
- llvm::VectorType *m_floatVecType;
-
- llvm::Function *m_llvmFSqrt;
- llvm::Function *m_llvmFAbs;
- llvm::Function *m_llvmPow;
- llvm::Function *m_llvmFloor;
- llvm::Function *m_llvmFlog;
- llvm::Function *m_llvmLit;
-
- llvm::Constant *m_fmtPtr;
-
- std::stack<llvm::BasicBlock*> m_ifStack;
- struct Loop {
- llvm::BasicBlock *begin;
- llvm::BasicBlock *end;
- };
- std::stack<Loop> m_loopStack;
- std::map<int, llvm::Function*> m_functions;
- Storage *m_storage;
-};
-
-#endif
+++ /dev/null
-#include "instructionssoa.h"
-
-#include "storagesoa.h"
-
-#include <llvm/Constants.h>
-
-using namespace llvm;
-
-InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func,
- llvm::BasicBlock *block, StorageSoa *storage)
- : m_builder(block),
- m_storage(storage),
- m_idx(0)
-{
-}
-
-const char * InstructionsSoa::name(const char *prefix) const
-{
- ++m_idx;
- snprintf(m_name, 32, "%s%d", prefix, m_idx);
- return m_name;
-}
-
-llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y,
- llvm::Value *z, llvm::Value *w)
-{
- VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
- Constant *constVector = Constant::getNullValue(vectorType);
- Value *res = m_builder.CreateInsertElement(constVector, x,
- m_storage->constantInt(0),
- name("vecx"));
- res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1),
- name("vecxy"));
- res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2),
- name("vecxyz"));
- if (w)
- res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3),
- name("vecxyzw"));
- return res;
-}
-
-std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
-{
- std::vector<llvm::Value*> res(4);
-
- //Extract x's
- llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
- m_storage->constantInt(0),
- name("extractX"));
- //cast it to an unsigned int
- x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
-
- res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
- //only x is valid. the others shouldn't be necessary
- /*
- res[1] = Constant::getNullValue(m_floatVecType);
- res[2] = Constant::getNullValue(m_floatVecType);
- res[3] = Constant::getNullValue(m_floatVecType);
- */
-
- return res;
-}
-
-
-std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
- res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
- res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
- res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
-
- return res;
-}
-
-std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
- res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
- res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
- res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
-
- return res;
-}
-
-void InstructionsSoa::end()
-{
- m_builder.CreateRetVoid();
-}
-
-std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2,
- const std::vector<llvm::Value*> in3)
-{
- std::vector<llvm::Value*> res = mul(in1, in2);
- return add(res, in3);
-}
-
-std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
-{
- std::vector<llvm::Value*> res(4);
- res[0] = m_builder.CreateExtractElement(vector,
- m_storage->constantInt(0),
- name("extract1X"));
- res[1] = m_builder.CreateExtractElement(vector,
- m_storage->constantInt(1),
- name("extract2X"));
- res[2] = m_builder.CreateExtractElement(vector,
- m_storage->constantInt(2),
- name("extract3X"));
- res[3] = m_builder.CreateExtractElement(vector,
- m_storage->constantInt(3),
- name("extract4X"));
-
- return res;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef INSTRUCTIONSSOA_H
-#define INSTRUCTIONSSOA_H
-
-#include <llvm/Support/LLVMBuilder.h>
-
-#include <vector>
-
-namespace llvm {
- class Module;
- class Function;
- class BasicBlock;
- class Value;
-}
-class StorageSoa;
-
-class InstructionsSoa
-{
-public:
- InstructionsSoa(llvm::Module *mod, llvm::Function *func,
- llvm::BasicBlock *block, StorageSoa *storage);
-
- std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in);
-
- std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2);
- std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2,
- const std::vector<llvm::Value*> in3);
- std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1,
- const std::vector<llvm::Value*> in2);
- void end();
-
- std::vector<llvm::Value*> extractVector(llvm::Value *vector);
-private:
- const char * name(const char *prefix) const;
- llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
- llvm::Value *z, llvm::Value *w);
-private:
- llvm::LLVMFoldingBuilder m_builder;
- StorageSoa *m_storage;
-private:
- mutable int m_idx;
- mutable char m_name[32];
-};
-
-
-#endif
+++ /dev/null
-/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-typedef __attribute__(( ocu_vector_type(4) )) float float4;
-
-extern float powf(float a, float b);
-
-inline float approx(float a, float b)
-{
- if (b < -128.0f) b = -128.0f;
- if (b > 128.0f) b = 128.0f;
- if (a < 0) a = 0;
- return powf(a, b);
-}
-
-inline float4 lit(float4 tmp)
-{
- float4 result;
- result.x = 1.0;
- result.w = 1.0;
- if (tmp.x > 0) {
- result.y = tmp.x;
- result.z = approx(tmp.y, tmp.w);
- } else {
- result.y = 0;
- result.z = 0;
- }
- return result;
-}
-
-inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2)
-{
- float4 result;
-
- result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x;
- result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y;
- result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z;
- result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w;
-
- return result;
-}
-
-extern float cosf(float val);
-extern float sinf(float val);
-
-inline float4 vcos(float4 val)
-{
- float4 result;
- printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w);
- result.x = cosf(val.x);
- result.y = cosf(val.x);
- result.z = cosf(val.x);
- result.w = cosf(val.x);
- printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w);
- return result;
-}
-
-inline float4 scs(float4 val)
-{
- float4 result;
- float tmp = val.x;
- result.x = cosf(tmp);
- result.y = sinf(tmp);
- return result;
-}
-
-
-inline float4 vsin(float4 val)
-{
- float4 result;
- float tmp = val.x;
- float res = sinf(tmp);
- result.x = res;
- result.y = res;
- result.z = res;
- result.w = res;
- return result;
-}
-
-inline int kilp(float4 val)
-{
- if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0)
- return 1;
- else
- return 0;
-}
+++ /dev/null
-#include "loweringpass.h"
-
-using namespace llvm;
-
-char LoweringPass::ID = 0;
-RegisterPass<LoweringPass> X("lowering", "Lowering Pass");
-
-LoweringPass::LoweringPass()
- : ModulePass((intptr_t)&ID)
-{
-}
-
-bool LoweringPass::runOnModule(Module &m)
-{
- llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n";
- return false;
-}
+++ /dev/null
-#ifndef LOWERINGPASS_H
-#define LOWERINGPASS_H
-
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
-
-struct LoweringPass : public llvm::ModulePass
-{
- static char ID;
- LoweringPass();
-
- virtual bool runOnModule(llvm::Module &m);
-};
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-#ifdef MESA_LLVM
-
-#include "storage.h"
-
-#include "gallivm_p.h"
-
-#include "pipe/p_shader_tokens.h"
-#include <llvm/BasicBlock.h>
-#include <llvm/Module.h>
-#include <llvm/Value.h>
-
-#include <llvm/CallingConv.h>
-#include <llvm/Constants.h>
-#include <llvm/DerivedTypes.h>
-#include <llvm/InstrTypes.h>
-#include <llvm/Instructions.h>
-
-using namespace llvm;
-
-Storage::Storage(llvm::BasicBlock *block, llvm::Value *input)
- : m_block(block),
- m_INPUT(input),
- m_addrs(32),
- m_idx(0)
-{
- m_floatVecType = VectorType::get(Type::FloatTy, 4);
- m_intVecType = VectorType::get(IntegerType::get(32), 4);
-
- m_undefFloatVec = UndefValue::get(m_floatVecType);
- m_undefIntVec = UndefValue::get(m_intVecType);
- m_extSwizzleVec = 0;
-
- m_numConsts = 0;
-}
-
-//can only build vectors with all members in the [0, 9] range
-llvm::Constant *Storage::shuffleMask(int vec)
-{
- if (!m_extSwizzleVec) {
- std::vector<Constant*> elems;
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f)));
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f)));
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f)));
- elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f)));
- m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems);
- }
-
- if (m_intVecs.find(vec) != m_intVecs.end()) {
- return m_intVecs[vec];
- }
- int origVec = vec;
- Constant* const_vec = 0;
- if (origVec == 0) {
- const_vec = Constant::getNullValue(m_intVecType);
- } else {
- int x = gallivm_x_swizzle(vec);
- int y = gallivm_y_swizzle(vec);
- int z = gallivm_z_swizzle(vec);
- int w = gallivm_w_swizzle(vec);
- std::vector<Constant*> elems;
- elems.push_back(constantInt(x));
- elems.push_back(constantInt(y));
- elems.push_back(constantInt(z));
- elems.push_back(constantInt(w));
- const_vec = ConstantVector::get(m_intVecType, elems);
- }
-
- m_intVecs[origVec] = const_vec;
- return const_vec;
-}
-
-llvm::ConstantInt *Storage::constantInt(int idx)
-{
- if (m_constInts.find(idx) != m_constInts.end()) {
- return m_constInts[idx];
- }
- ConstantInt *const_int = ConstantInt::get(APInt(32, idx));
- m_constInts[idx] = const_int;
- return const_int;
-}
-
-llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx)
-{
- Value *val = element(InputsArg, idx, indIdx);
- LoadInst *load = new LoadInst(val, name("input"), false, m_block);
- load->setAlignment(8);
-
- return load;
-}
-
-llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx)
-{
- m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts;
-
- Value *elem = element(ConstsArg, idx, indIdx);
- LoadInst *load = new LoadInst(elem, name("const"), false, m_block);
- load->setAlignment(8);
- return load;
-}
-
-llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle)
-{
- Constant *mask = shuffleMask(shuffle);
- ShuffleVectorInst *res =
- new ShuffleVectorInst(vec, m_extSwizzleVec, mask,
- name("shuffle"), m_block);
- return res;
-}
-
-
-llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx)
-{
- Value *elem = element(TempsArg, idx, indIdx);
-
- LoadInst *load = new LoadInst(elem, name("temp"), false, m_block);
- load->setAlignment(8);
-
- return load;
-}
-
-void Storage::setTempElement(int idx, llvm::Value *val, int mask)
-{
- if (mask != TGSI_WRITEMASK_XYZW) {
- llvm::Value *templ = 0;
- if (m_tempWriteMap[idx])
- templ = tempElement(idx);
- val = maskWrite(val, mask, templ);
- }
- Value *elem = element(TempsArg, idx);
- StoreInst *st = new StoreInst(val, elem, false, m_block);
- st->setAlignment(8);
- m_tempWriteMap[idx] = true;
-}
-
-void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask)
-{
- if (mask != TGSI_WRITEMASK_XYZW) {
- llvm::Value *templ = 0;
- if (m_destWriteMap[dstIdx])
- templ = outputElement(dstIdx);
- val = maskWrite(val, mask, templ);
- }
-
- Value *elem = element(DestsArg, dstIdx);
- StoreInst *st = new StoreInst(val, elem, false, m_block);
- st->setAlignment(8);
- m_destWriteMap[dstIdx] = true;
-}
-
-llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ)
-{
- llvm::Value *dst = templ;
- if (!dst)
- dst = Constant::getNullValue(m_floatVecType);
- if ((mask & TGSI_WRITEMASK_X)) {
- llvm::Value *x = new ExtractElementInst(src, unsigned(0),
- name("x"), m_block);
- dst = new InsertElementInst(dst, x, unsigned(0),
- name("dstx"), m_block);
- }
- if ((mask & TGSI_WRITEMASK_Y)) {
- llvm::Value *y = new ExtractElementInst(src, unsigned(1),
- name("y"), m_block);
- dst = new InsertElementInst(dst, y, unsigned(1),
- name("dsty"), m_block);
- }
- if ((mask & TGSI_WRITEMASK_Z)) {
- llvm::Value *z = new ExtractElementInst(src, unsigned(2),
- name("z"), m_block);
- dst = new InsertElementInst(dst, z, unsigned(2),
- name("dstz"), m_block);
- }
- if ((mask & TGSI_WRITEMASK_W)) {
- llvm::Value *w = new ExtractElementInst(src, unsigned(3),
- name("w"), m_block);
- dst = new InsertElementInst(dst, w, unsigned(3),
- name("dstw"), m_block);
- }
- return dst;
-}
-
-const char * Storage::name(const char *prefix)
-{
- ++m_idx;
- snprintf(m_name, 32, "%s%d", prefix, m_idx);
- return m_name;
-}
-
-int Storage::numConsts() const
-{
- return m_numConsts;
-}
-
-llvm::Value * Storage::addrElement(int idx) const
-{
- Value *ret = m_addrs[idx];
- if (!ret)
- return m_undefFloatVec;
- return ret;
-}
-
-void Storage::setAddrElement(int idx, llvm::Value *val, int mask)
-{
- if (mask != TGSI_WRITEMASK_XYZW) {
- llvm::Value *templ = m_addrs[idx];
- val = maskWrite(val, mask, templ);
- }
- m_addrs[idx] = val;
-}
-
-llvm::Value * Storage::extractIndex(llvm::Value *vec)
-{
- llvm::Value *x = new ExtractElementInst(vec, unsigned(0),
- name("x"), m_block);
- return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block);
-}
-
-void Storage::setCurrentBlock(llvm::BasicBlock *block)
-{
- m_block = block;
-}
-
-llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx)
-{
- Value *elem = element(DestsArg, idx, indIdx);
- LoadInst *load = new LoadInst(elem, name("output"), false, m_block);
- load->setAlignment(8);
-
- return load;
-}
-
-llvm::Value * Storage::inputPtr() const
-{
- return m_INPUT;
-}
-
-void Storage::pushArguments(llvm::Value *input)
-{
- m_argStack.push(m_INPUT);
-
- m_INPUT = input;
-}
-
-void Storage::popArguments()
-{
- m_INPUT = m_argStack.top();
- m_argStack.pop();
-}
-
-void Storage::pushTemps()
-{
- m_extSwizzleVec = 0;
-}
-
-void Storage::popTemps()
-{
-}
-
-llvm::Value * Storage::immediateElement(int idx)
-{
- return m_immediates[idx];
-}
-
-void Storage::addImmediate(float *val)
-{
- std::vector<Constant*> vec(4);
- vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0]));
- vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1]));
- vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2]));
- vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3]));
- m_immediates.push_back(ConstantVector::get(m_floatVecType, vec));
-}
-
-
-llvm::Value * Storage::elemPtr(Args arg)
-{
- std::vector<Value*> indices;
- indices.push_back(constantInt(0));
- indices.push_back(constantInt(static_cast<int>(arg)));
- GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT,
- indices.begin(),
- indices.end(),
- name("input_ptr"),
- m_block);
- return new LoadInst(getElem, name("input_field"), false, m_block);
-}
-
-llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
- llvm::Value *indIdx )
-{
- GetElementPtrInst *getElem = 0;
-
- if (indIdx) {
- getElem = new GetElementPtrInst(ptr,
- BinaryOperator::create(Instruction::Add,
- indIdx,
- constantInt(idx),
- name("add"),
- m_block),
- name("field"),
- m_block);
- } else {
- getElem = new GetElementPtrInst(ptr,
- constantInt(idx),
- name("field"),
- m_block);
- }
- return getElem;
-}
-
-llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx )
-{
- Value *val = elemPtr(arg);
- return elemIdx(val, idx, indIdx);
-}
-
-void Storage::setKilElement(llvm::Value *val)
-{
- std::vector<Value*> indices;
- indices.push_back(constantInt(0));
- indices.push_back(constantInt(static_cast<int>(KilArg)));
- GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT,
- indices.begin(),
- indices.end(),
- name("kil_ptr"),
- m_block);
- StoreInst *st = new StoreInst(val, elem, false, m_block);
- st->setAlignment(8);
-}
-
-#endif //MESA_LLVM
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
- /*
- * Authors:
- * Zack Rusin zack@tungstengraphics.com
- */
-
-#ifndef STORAGE_H
-#define STORAGE_H
-
-#include <map>
-#include <set>
-#include <stack>
-#include <vector>
-
-namespace llvm {
- class BasicBlock;
- class Constant;
- class ConstantInt;
- class LoadInst;
- class Value;
- class VectorType;
-}
-
-class Storage
-{
-public:
- Storage(llvm::BasicBlock *block,
- llvm::Value *input);
-
- llvm::Value *inputPtr() const;
-
- void setCurrentBlock(llvm::BasicBlock *block);
-
- llvm::ConstantInt *constantInt(int);
- llvm::Constant *shuffleMask(int vec);
- llvm::Value *inputElement(int idx, llvm::Value *indIdx =0);
- llvm::Value *constElement(int idx, llvm::Value *indIdx =0);
- llvm::Value *outputElement(int idx, llvm::Value *indIdx =0);
- llvm::Value *tempElement(int idx, llvm::Value *indIdx =0);
- llvm::Value *immediateElement(int idx);
-
- void setOutputElement(int dstIdx, llvm::Value *val, int mask);
- void setTempElement(int idx, llvm::Value *val, int mask);
-
- llvm::Value *addrElement(int idx) const;
- void setAddrElement(int idx, llvm::Value *val, int mask);
-
- void setKilElement(llvm::Value *val);
-
- llvm::Value *shuffleVector(llvm::Value *vec, int shuffle);
-
- llvm::Value *extractIndex(llvm::Value *vec);
-
- int numConsts() const;
-
- void pushArguments(llvm::Value *input);
- void popArguments();
- void pushTemps();
- void popTemps();
-
- void addImmediate(float *val);
-
-private:
- llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ);
- const char *name(const char *prefix);
-
- enum Args {
- DestsArg = 0,
- InputsArg = 1,
- TempsArg = 2,
- ConstsArg = 3,
- KilArg = 4
- };
- llvm::Value *elemPtr(Args arg);
- llvm::Value *elemIdx(llvm::Value *ptr, int idx,
- llvm::Value *indIdx = 0);
- llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0);
-
-private:
- llvm::BasicBlock *m_block;
- llvm::Value *m_INPUT;
-
- std::map<int, llvm::ConstantInt*> m_constInts;
- std::map<int, llvm::Constant*> m_intVecs;
- std::vector<llvm::Value*> m_addrs;
- std::vector<llvm::Constant*> m_immediates;
-
- llvm::VectorType *m_floatVecType;
- llvm::VectorType *m_intVecType;
-
- char m_name[32];
- int m_idx;
-
- int m_numConsts;
-
- std::map<int, bool > m_destWriteMap;
- std::map<int, bool > m_tempWriteMap;
-
- llvm::Value *m_undefFloatVec;
- llvm::Value *m_undefIntVec;
- llvm::Value *m_extSwizzleVec;
-
- std::stack<llvm::Value*> m_argStack;
- std::stack<std::vector<llvm::Value*> > m_tempStack;
-};
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "storagesoa.h"
-
-#include "gallivm_p.h"
-
-#include "pipe/p_shader_tokens.h"
-#include "pipe/p_debug.h"
-
-#include <llvm/BasicBlock.h>
-#include <llvm/Module.h>
-#include <llvm/Value.h>
-
-#include <llvm/CallingConv.h>
-#include <llvm/Constants.h>
-#include <llvm/DerivedTypes.h>
-#include <llvm/InstrTypes.h>
-#include <llvm/Instructions.h>
-
-using namespace llvm;
-
-
-StorageSoa::StorageSoa(llvm::BasicBlock *block,
- llvm::Value *input,
- llvm::Value *output,
- llvm::Value *consts,
- llvm::Value *temps)
- : m_block(block),
- m_input(input),
- m_output(output),
- m_consts(consts),
- m_temps(temps),
- m_immediates(0),
- m_idx(0)
-{
-}
-
-void StorageSoa::addImmediate(float *vec)
-{
- std::vector<float> vals(4);
- vals[0] = vec[0];
- vals[1] = vec[1];
- vals[2] = vec[2];
- vals[3] = vec[3];
- m_immediatesToFlush.push_back(vals);
-}
-
-void StorageSoa::declareImmediates()
-{
- if (m_immediatesToFlush.empty())
- return;
-
- VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
- ArrayType *vectorChannels = ArrayType::get(vectorType, 4);
- ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size());
-
- m_immediates = new GlobalVariable(
- /*Type=*/arrayType,
- /*isConstant=*/false,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Initializer=*/0, // has initializer, specified below
- /*Name=*/name("immediates"),
- currentModule());
-
- std::vector<Constant*> arrayVals;
- for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) {
- std::vector<float> vec = m_immediatesToFlush[i];
- std::vector<float> vals(4);
- std::vector<Constant*> channelArray;
-
- vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0];
- llvm::Constant *xChannel = createConstGlobalVector(vals);
-
- vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
- llvm::Constant *yChannel = createConstGlobalVector(vals);
-
- vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2];
- llvm::Constant *zChannel = createConstGlobalVector(vals);
-
- vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3];
- llvm::Constant *wChannel = createConstGlobalVector(vals);
- channelArray.push_back(xChannel);
- channelArray.push_back(yChannel);
- channelArray.push_back(zChannel);
- channelArray.push_back(wChannel);
- Constant *constChannels = ConstantArray::get(vectorChannels,
- channelArray);
- arrayVals.push_back(constChannels);
- }
- Constant *constArray = ConstantArray::get(arrayType, arrayVals);
- m_immediates->setInitializer(constArray);
-
- m_immediatesToFlush.clear();
-}
-
-llvm::Value *StorageSoa::addrElement(int idx) const
-{
- std::map<int, llvm::Value*>::const_iterator itr = m_addresses.find(idx);
- if (itr == m_addresses.end()) {
- debug_printf("Trying to access invalid shader 'address'\n");
- return 0;
- }
- llvm::Value * res = (*itr).second;
-
- res = new LoadInst(res, name("addr"), false, m_block);
-
- return res;
-}
-
-std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = element(m_input, idx, 0);
- res[1] = element(m_input, idx, 1);
- res[2] = element(m_input, idx, 2);
- res[3] = element(m_input, idx, 3);
-
- return res;
-}
-
-std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx)
-{
- std::vector<llvm::Value*> res(4);
- llvm::Value *xChannel, *yChannel, *zChannel, *wChannel;
-
- xChannel = elementPointer(m_consts, idx, 0);
- yChannel = elementPointer(m_consts, idx, 1);
- zChannel = elementPointer(m_consts, idx, 2);
- wChannel = elementPointer(m_consts, idx, 3);
-
- res[0] = alignedArrayLoad(xChannel);
- res[1] = alignedArrayLoad(yChannel);
- res[2] = alignedArrayLoad(zChannel);
- res[3] = alignedArrayLoad(wChannel);
-
- return res;
-}
-
-std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = element(m_output, idx, 0);
- res[1] = element(m_output, idx, 1);
- res[2] = element(m_output, idx, 2);
- res[3] = element(m_output, idx, 3);
-
- return res;
-}
-
-std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = element(m_temps, idx, 0);
- res[1] = element(m_temps, idx, 1);
- res[2] = element(m_temps, idx, 2);
- res[3] = element(m_temps, idx, 3);
-
- return res;
-}
-
-std::vector<llvm::Value*> StorageSoa::immediateElement(llvm::Value *idx)
-{
- std::vector<llvm::Value*> res(4);
-
- res[0] = element(m_immediates, idx, 0);
- res[1] = element(m_immediates, idx, 1);
- res[2] = element(m_immediates, idx, 2);
- res[3] = element(m_immediates, idx, 3);
-
- return res;
-}
-
-llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index,
- int channel) const
-{
- std::vector<Value*> indices;
- if (m_immediates == ptr)
- indices.push_back(constantInt(0));
- indices.push_back(index);
- indices.push_back(constantInt(channel));
-
- GetElementPtrInst *getElem = new GetElementPtrInst(ptr,
- indices.begin(),
- indices.end(),
- name("ptr"),
- m_block);
- return getElem;
-}
-
-llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index,
- int channel) const
-{
- llvm::Value *res = elementPointer(ptr, index, channel);
- LoadInst *load = new LoadInst(res, name("element"), false, m_block);
- //load->setAlignment(8);
- return load;
-}
-
-const char * StorageSoa::name(const char *prefix) const
-{
- ++m_idx;
- snprintf(m_name, 32, "%s%d", prefix, m_idx);
- return m_name;
-}
-
-llvm::ConstantInt * StorageSoa::constantInt(int idx) const
-{
- if (m_constInts.find(idx) != m_constInts.end()) {
- return m_constInts[idx];
- }
- ConstantInt *constInt = ConstantInt::get(APInt(32, idx));
- m_constInts[idx] = constInt;
- return constInt;
-}
-
-llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val)
-{
- VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
- PointerType *vectorPtr = PointerType::get(vectorType, 0);
-
- CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block);
- LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block);
- load->setAlignment(8);
- return load;
-}
-
-llvm::Module * StorageSoa::currentModule() const
-{
- if (!m_block || !m_block->getParent())
- return 0;
-
- return m_block->getParent()->getParent();
-}
-
-llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec)
-{
- VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
- std::vector<Constant*> immValues;
- ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0]));
- ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1]));
- ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2]));
- ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3]));
- immValues.push_back(constx);
- immValues.push_back(consty);
- immValues.push_back(constz);
- immValues.push_back(constw);
- Constant *constVector = ConstantVector::get(vectorType, immValues);
-
- return constVector;
-}
-
-std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle,
- llvm::Value *indIdx)
-{
- std::vector<llvm::Value*> val(4);
-
- //if we have an indirect index, always use that
- // if not use the integer offset to create one
- llvm::Value *realIndex = 0;
- if (indIdx)
- realIndex = indIdx;
- else
- realIndex = constantInt(idx);
- debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx);
-
- switch(type) {
- case Input:
- val = inputElement(realIndex);
- break;
- case Output:
- val = outputElement(realIndex);
- break;
- case Temp:
- val = tempElement(realIndex);
- break;
- case Const:
- val = constElement(realIndex);
- break;
- case Immediate:
- val = immediateElement(realIndex);
- break;
- case Address:
- debug_printf("Address not handled in the load phase!\n");
- assert(0);
- break;
- }
- if (!gallivm_is_swizzle(swizzle))
- return val;
-
- std::vector<llvm::Value*> res(4);
-
- res[0] = val[gallivm_x_swizzle(swizzle)];
- res[1] = val[gallivm_y_swizzle(swizzle)];
- res[2] = val[gallivm_z_swizzle(swizzle)];
- res[3] = val[gallivm_w_swizzle(swizzle)];
- return res;
-}
-
-void StorageSoa::store(Argument type, int idx, const std::vector<llvm::Value*> &val,
- int mask)
-{
- llvm::Value *out = 0;
- switch(type) {
- case Output:
- out = m_output;
- break;
- case Temp:
- out = m_temps;
- break;
- case Input:
- out = m_input;
- break;
- case Address: {
- llvm::Value *addr = m_addresses[idx];
- if (!addr) {
- addAddress(idx);
- addr = m_addresses[idx];
- assert(addr);
- }
- new StoreInst(val[0], addr, false, m_block);
- return;
- break;
- }
- default:
- debug_printf("Can't save output of this type: %d !\n", type);
- assert(0);
- break;
- }
- llvm::Value *realIndex = constantInt(idx);
- if ((mask & TGSI_WRITEMASK_X)) {
- llvm::Value *xChannel = elementPointer(out, realIndex, 0);
- new StoreInst(val[0], xChannel, false, m_block);
- }
- if ((mask & TGSI_WRITEMASK_Y)) {
- llvm::Value *yChannel = elementPointer(out, realIndex, 1);
- new StoreInst(val[1], yChannel, false, m_block);
- }
- if ((mask & TGSI_WRITEMASK_Z)) {
- llvm::Value *zChannel = elementPointer(out, realIndex, 2);
- new StoreInst(val[2], zChannel, false, m_block);
- }
- if ((mask & TGSI_WRITEMASK_W)) {
- llvm::Value *wChannel = elementPointer(out, realIndex, 3);
- new StoreInst(val[3], wChannel, false, m_block);
- }
-}
-
-void StorageSoa::addAddress(int idx)
-{
- GlobalVariable *val = new GlobalVariable(
- /*Type=*/IntegerType::get(32),
- /*isConstant=*/false,
- /*Linkage=*/GlobalValue::ExternalLinkage,
- /*Initializer=*/0, // has initializer, specified below
- /*Name=*/name("address"),
- currentModule());
- val->setInitializer(Constant::getNullValue(IntegerType::get(32)));
-
- debug_printf("adding to %d\n", idx);
- m_addresses[idx] = val;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef STORAGESOA_H
-#define STORAGESOA_H
-
-#include <vector>
-#include <list>
-#include <map>
-
-namespace llvm {
- class BasicBlock;
- class Constant;
- class ConstantInt;
- class GlobalVariable;
- class LoadInst;
- class Value;
- class VectorType;
- class Module;
-}
-
-class StorageSoa
-{
-public:
- enum Argument {
- Input,
- Output,
- Temp,
- Const,
- Immediate,
- Address
- };
-public:
- StorageSoa(llvm::BasicBlock *block,
- llvm::Value *input,
- llvm::Value *output,
- llvm::Value *consts,
- llvm::Value *temps);
-
-
- std::vector<llvm::Value*> load(Argument type, int idx, int swizzle,
- llvm::Value *indIdx =0);
- void store(Argument type, int idx, const std::vector<llvm::Value*> &val,
- int mask);
-
- void addImmediate(float *vec);
- void declareImmediates();
-
- void addAddress(int idx);
-
- llvm::Value * addrElement(int idx) const;
-
- llvm::ConstantInt *constantInt(int) const;
-private:
- llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx,
- int channel) const;
- llvm::Value *element(llvm::Value *ptr, llvm::Value *idx,
- int channel) const;
- const char *name(const char *prefix) const;
- llvm::Value *alignedArrayLoad(llvm::Value *val);
- llvm::Module *currentModule() const;
- llvm::Constant *createConstGlobalVector(const std::vector<float> &vec);
-
- std::vector<llvm::Value*> inputElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> constElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> outputElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> tempElement(llvm::Value *indIdx);
- std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx);
-private:
- llvm::BasicBlock *m_block;
-
- llvm::Value *m_input;
- llvm::Value *m_output;
- llvm::Value *m_consts;
- llvm::Value *m_temps;
- llvm::GlobalVariable *m_immediates;
-
- std::map<int, llvm::Value*> m_addresses;
-
- std::vector<std::vector<float> > m_immediatesToFlush;
-
- mutable std::map<int, llvm::ConstantInt*> m_constInts;
- mutable char m_name[32];
- mutable int m_idx;
-};
-
-#endif
+++ /dev/null
-#include "tgsitollvm.h"
-
-#include "gallivm.h"
-#include "gallivm_p.h"
-
-#include "storage.h"
-#include "instructions.h"
-#include "storagesoa.h"
-#include "instructionssoa.h"
-
-#include "pipe/p_shader_tokens.h"
-
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/exec/tgsi_exec.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi/util/tgsi_build.h"
-#include "tgsi/util/tgsi_dump.h"
-
-
-#include <llvm/Module.h>
-#include <llvm/CallingConv.h>
-#include <llvm/Constants.h>
-#include <llvm/DerivedTypes.h>
-#include <llvm/Instructions.h>
-#include <llvm/ModuleProvider.h>
-#include <llvm/Pass.h>
-#include <llvm/PassManager.h>
-#include <llvm/ParameterAttributes.h>
-#include <llvm/Support/PatternMatch.h>
-#include <llvm/ExecutionEngine/JIT.h>
-#include <llvm/ExecutionEngine/Interpreter.h>
-#include <llvm/ExecutionEngine/GenericValue.h>
-#include <llvm/Support/MemoryBuffer.h>
-#include <llvm/LinkAllPasses.h>
-#include <llvm/Analysis/Verifier.h>
-#include <llvm/Analysis/LoopPass.h>
-#include <llvm/Target/TargetData.h>
-#include <llvm/Bitcode/ReaderWriter.h>
-#include <llvm/Transforms/Utils/Cloning.h>
-
-
-#include <sstream>
-#include <fstream>
-#include <iostream>
-
-using namespace llvm;
-
-static inline FunctionType *vertexShaderFunctionType()
-{
- //Function takes three arguments,
- // the calling code has to make sure the types it will
- // pass are castable to the following:
- // [4 x <4 x float>] inputs,
- // [4 x <4 x float>] output,
- // [4 x [4 x float]] consts,
- // [4 x <4 x float>] temps
-
- std::vector<const Type*> funcArgs;
- VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
- ArrayType *vectorArray = ArrayType::get(vectorType, 4);
- PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0);
-
- ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4);
- ArrayType *constsArray = ArrayType::get(floatArray, 4);
- PointerType *constsArrayPtr = PointerType::get(constsArray, 0);
-
- funcArgs.push_back(vectorArrayPtr);//inputs
- funcArgs.push_back(vectorArrayPtr);//output
- funcArgs.push_back(constsArrayPtr);//consts
- funcArgs.push_back(vectorArrayPtr);//temps
-
- FunctionType *functionType = FunctionType::get(
- /*Result=*/Type::VoidTy,
- /*Params=*/funcArgs,
- /*isVarArg=*/false);
-
- return functionType;
-}
-
-static inline void
-add_interpolator(struct gallivm_ir *ir,
- struct gallivm_interpolate *interp)
-{
- ir->interpolators[ir->num_interp] = *interp;
- ++ir->num_interp;
-}
-
-static void
-translate_declaration(struct gallivm_ir *prog,
- llvm::Module *module,
- Storage *storage,
- struct tgsi_full_declaration *decl,
- struct tgsi_full_declaration *fd)
-{
- if (decl->Declaration.File == TGSI_FILE_INPUT) {
- unsigned first, last, mask;
- uint interp_method;
-
- assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE);
-
- first = decl->u.DeclarationRange.First;
- last = decl->u.DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- /* Do not touch WPOS.xy */
- if (first == 0) {
- mask &= ~TGSI_WRITEMASK_XY;
- if (mask == TGSI_WRITEMASK_NONE) {
- first++;
- if (first > last) {
- return;
- }
- }
- }
-
- interp_method = decl->Interpolation.Interpolate;
-
- if (mask == TGSI_WRITEMASK_XYZW) {
- unsigned i, j;
-
- for (i = first; i <= last; i++) {
- for (j = 0; j < NUM_CHANNELS; j++) {
- //interp( mach, i, j );
- struct gallivm_interpolate interp;
- interp.type = interp_method;
- interp.attrib = i;
- interp.chan = j;
- add_interpolator(prog, &interp);
- }
- }
- } else {
- unsigned i, j;
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- struct gallivm_interpolate interp;
- interp.type = interp_method;
- interp.attrib = i;
- interp.chan = j;
- add_interpolator(prog, &interp);
- }
- }
- }
- }
- }
-}
-
-static void
-translate_declarationir(struct gallivm_ir *,
- llvm::Module *,
- StorageSoa *storage,
- struct tgsi_full_declaration *decl,
- struct tgsi_full_declaration *)
-{
- if (decl->Declaration.File == TGSI_FILE_ADDRESS) {
- int idx = decl->u.DeclarationRange.First;
- storage->addAddress(idx);
- }
-}
-
-static void
-translate_immediate(Storage *storage,
- struct tgsi_full_immediate *imm)
-{
- float vec[4];
- int i;
- for (i = 0; i < imm->Immediate.Size - 1; ++i) {
- switch (imm->Immediate.DataType) {
- case TGSI_IMM_FLOAT32:
- vec[i] = imm->u.ImmediateFloat32[i].Float;
- break;
- default:
- assert(0);
- }
- }
- storage->addImmediate(vec);
-}
-
-
-static void
-translate_immediateir(StorageSoa *storage,
- struct tgsi_full_immediate *imm)
-{
- float vec[4];
- int i;
- for (i = 0; i < imm->Immediate.Size - 1; ++i) {
- switch (imm->Immediate.DataType) {
- case TGSI_IMM_FLOAT32:
- vec[i] = imm->u.ImmediateFloat32[i].Float;
- break;
- default:
- assert(0);
- }
- }
- storage->addImmediate(vec);
-}
-
-static inline int
-swizzleInt(struct tgsi_full_src_register *src)
-{
- int swizzle = 0;
- int start = 1000;
-
- for (int k = 0; k < 4; ++k) {
- swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start;
- start /= 10;
- }
- return swizzle;
-}
-
-static inline llvm::Value *
-swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src,
- Storage *storage)
-{
- int swizzle = swizzleInt(src);
-
- if (gallivm_is_swizzle(swizzle)) {
- /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/
- val = storage->shuffleVector(val, swizzle);
- }
- return val;
-}
-
-static void
-translate_instruction(llvm::Module *module,
- Storage *storage,
- Instructions *instr,
- struct tgsi_full_instruction *inst,
- struct tgsi_full_instruction *fi,
- unsigned instno)
-{
- llvm::Value *inputs[4];
- inputs[0] = 0;
- inputs[1] = 0;
- inputs[2] = 0;
- inputs[3] = 0;
-
- for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
- struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
- llvm::Value *val = 0;
- llvm::Value *indIdx = 0;
-
- if (src->SrcRegister.Indirect) {
- indIdx = storage->addrElement(src->SrcRegisterInd.Index);
- indIdx = storage->extractIndex(indIdx);
- }
- if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
- val = storage->constElement(src->SrcRegister.Index, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
- val = storage->inputElement(src->SrcRegister.Index, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
- val = storage->tempElement(src->SrcRegister.Index);
- } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
- val = storage->outputElement(src->SrcRegister.Index, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
- val = storage->immediateElement(src->SrcRegister.Index);
- } else {
- fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
- return;
- }
-
- inputs[i] = swizzleVector(val, src, storage);
- }
-
- /*if (inputs[0])
- instr->printVector(inputs[0]);
- if (inputs[1])
- instr->printVector(inputs[1]);*/
- llvm::Value *out = 0;
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL: {
- out = instr->arl(inputs[0]);
- }
- break;
- case TGSI_OPCODE_MOV: {
- out = inputs[0];
- }
- break;
- case TGSI_OPCODE_LIT: {
- out = instr->lit(inputs[0]);
- }
- break;
- case TGSI_OPCODE_RCP: {
- out = instr->rcp(inputs[0]);
- }
- break;
- case TGSI_OPCODE_RSQ: {
- out = instr->rsq(inputs[0]);
- }
- break;
- case TGSI_OPCODE_EXP:
- break;
- case TGSI_OPCODE_LOG:
- break;
- case TGSI_OPCODE_MUL: {
- out = instr->mul(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_ADD: {
- out = instr->add(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_DP3: {
- out = instr->dp3(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_DP4: {
- out = instr->dp4(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_DST: {
- out = instr->dst(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_MIN: {
- out = instr->min(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_MAX: {
- out = instr->max(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_SLT: {
- out = instr->slt(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_SGE: {
- out = instr->sge(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_MAD: {
- out = instr->madd(inputs[0], inputs[1], inputs[2]);
- }
- break;
- case TGSI_OPCODE_SUB: {
- out = instr->sub(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_LERP: {
- out = instr->lerp(inputs[0], inputs[1], inputs[2]);
- }
- break;
- case TGSI_OPCODE_CND:
- break;
- case TGSI_OPCODE_CND0:
- break;
- case TGSI_OPCODE_DOT2ADD:
- break;
- case TGSI_OPCODE_INDEX:
- break;
- case TGSI_OPCODE_NEGATE:
- break;
- case TGSI_OPCODE_FRAC: {
- out = instr->frc(inputs[0]);
- }
- break;
- case TGSI_OPCODE_CLAMP:
- break;
- case TGSI_OPCODE_FLOOR: {
- out = instr->floor(inputs[0]);
- }
- break;
- case TGSI_OPCODE_ROUND:
- break;
- case TGSI_OPCODE_EXPBASE2: {
- out = instr->ex2(inputs[0]);
- }
- break;
- case TGSI_OPCODE_LOGBASE2: {
- out = instr->lg2(inputs[0]);
- }
- break;
- case TGSI_OPCODE_POWER: {
- out = instr->pow(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_CROSSPRODUCT: {
- out = instr->cross(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- break;
- case TGSI_OPCODE_ABS: {
- out = instr->abs(inputs[0]);
- }
- break;
- case TGSI_OPCODE_RCC:
- break;
- case TGSI_OPCODE_DPH: {
- out = instr->dph(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_COS: {
- out = instr->cos(inputs[0]);
- }
- break;
- case TGSI_OPCODE_DDX:
- break;
- case TGSI_OPCODE_DDY:
- break;
- case TGSI_OPCODE_KILP: {
- out = instr->kilp(inputs[0]);
- storage->setKilElement(out);
- return;
- }
- break;
- case TGSI_OPCODE_PK2H:
- break;
- case TGSI_OPCODE_PK2US:
- break;
- case TGSI_OPCODE_PK4B:
- break;
- case TGSI_OPCODE_PK4UB:
- break;
- case TGSI_OPCODE_RFL:
- break;
- case TGSI_OPCODE_SEQ:
- break;
- case TGSI_OPCODE_SFL:
- break;
- case TGSI_OPCODE_SGT: {
- out = instr->sgt(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_SIN: {
- out = instr->sin(inputs[0]);
- }
- break;
- case TGSI_OPCODE_SLE:
- break;
- case TGSI_OPCODE_SNE:
- break;
- case TGSI_OPCODE_STR:
- break;
- case TGSI_OPCODE_TEX:
- break;
- case TGSI_OPCODE_TXD:
- break;
- case TGSI_OPCODE_UP2H:
- break;
- case TGSI_OPCODE_UP2US:
- break;
- case TGSI_OPCODE_UP4B:
- break;
- case TGSI_OPCODE_UP4UB:
- break;
- case TGSI_OPCODE_X2D:
- break;
- case TGSI_OPCODE_ARA:
- break;
- case TGSI_OPCODE_ARR:
- break;
- case TGSI_OPCODE_BRA:
- break;
- case TGSI_OPCODE_CAL: {
- instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr());
- return;
- }
- break;
- case TGSI_OPCODE_RET: {
- instr->end();
- return;
- }
- break;
- case TGSI_OPCODE_SSG:
- break;
- case TGSI_OPCODE_CMP: {
- out = instr->cmp(inputs[0], inputs[1], inputs[2]);
- }
- break;
- case TGSI_OPCODE_SCS: {
- out = instr->scs(inputs[0]);
- }
- break;
- case TGSI_OPCODE_TXB:
- break;
- case TGSI_OPCODE_NRM:
- break;
- case TGSI_OPCODE_DIV:
- break;
- case TGSI_OPCODE_DP2:
- break;
- case TGSI_OPCODE_TXL:
- break;
- case TGSI_OPCODE_BRK: {
- instr->brk();
- return;
- }
- break;
- case TGSI_OPCODE_IF: {
- instr->ifop(inputs[0]);
- storage->setCurrentBlock(instr->currentBlock());
- return; //just update the state
- }
- break;
- case TGSI_OPCODE_LOOP:
- break;
- case TGSI_OPCODE_REP:
- break;
- case TGSI_OPCODE_ELSE: {
- instr->elseop();
- storage->setCurrentBlock(instr->currentBlock());
- return; //only state update
- }
- break;
- case TGSI_OPCODE_ENDIF: {
- instr->endif();
- storage->setCurrentBlock(instr->currentBlock());
- return; //just update the state
- }
- break;
- case TGSI_OPCODE_ENDLOOP:
- break;
- case TGSI_OPCODE_ENDREP:
- break;
- case TGSI_OPCODE_PUSHA:
- break;
- case TGSI_OPCODE_POPA:
- break;
- case TGSI_OPCODE_CEIL:
- break;
- case TGSI_OPCODE_I2F:
- break;
- case TGSI_OPCODE_NOT:
- break;
- case TGSI_OPCODE_TRUNC: {
- out = instr->trunc(inputs[0]);
- }
- break;
- case TGSI_OPCODE_SHL:
- break;
- case TGSI_OPCODE_SHR:
- break;
- case TGSI_OPCODE_AND:
- break;
- case TGSI_OPCODE_OR:
- break;
- case TGSI_OPCODE_MOD:
- break;
- case TGSI_OPCODE_XOR:
- break;
- case TGSI_OPCODE_SAD:
- break;
- case TGSI_OPCODE_TXF:
- break;
- case TGSI_OPCODE_TXQ:
- break;
- case TGSI_OPCODE_CONT:
- break;
- case TGSI_OPCODE_EMIT:
- break;
- case TGSI_OPCODE_ENDPRIM:
- break;
- case TGSI_OPCODE_BGNLOOP2: {
- instr->beginLoop();
- storage->setCurrentBlock(instr->currentBlock());
- return;
- }
- break;
- case TGSI_OPCODE_BGNSUB: {
- instr->bgnSub(instno);
- storage->setCurrentBlock(instr->currentBlock());
- storage->pushTemps();
- return;
- }
- break;
- case TGSI_OPCODE_ENDLOOP2: {
- instr->endLoop();
- storage->setCurrentBlock(instr->currentBlock());
- return;
- }
- break;
- case TGSI_OPCODE_ENDSUB: {
- instr->endSub();
- storage->setCurrentBlock(instr->currentBlock());
- storage->popArguments();
- storage->popTemps();
- return;
- }
- break;
- case TGSI_OPCODE_NOISE1:
- break;
- case TGSI_OPCODE_NOISE2:
- break;
- case TGSI_OPCODE_NOISE3:
- break;
- case TGSI_OPCODE_NOISE4:
- break;
- case TGSI_OPCODE_NOP:
- break;
- case TGSI_OPCODE_TEXBEM:
- break;
- case TGSI_OPCODE_TEXBEML:
- break;
- case TGSI_OPCODE_TEXREG2AR:
- break;
- case TGSI_OPCODE_TEXM3X2PAD:
- break;
- case TGSI_OPCODE_TEXM3X2TEX:
- break;
- case TGSI_OPCODE_TEXM3X3PAD:
- break;
- case TGSI_OPCODE_TEXM3X3TEX:
- break;
- case TGSI_OPCODE_TEXM3X3SPEC:
- break;
- case TGSI_OPCODE_TEXM3X3VSPEC:
- break;
- case TGSI_OPCODE_TEXREG2GB:
- break;
- case TGSI_OPCODE_TEXREG2RGB:
- break;
- case TGSI_OPCODE_TEXDP3TEX:
- break;
- case TGSI_OPCODE_TEXDP3:
- break;
- case TGSI_OPCODE_TEXM3X3:
- break;
- case TGSI_OPCODE_TEXM3X2DEPTH:
- break;
- case TGSI_OPCODE_TEXDEPTH:
- break;
- case TGSI_OPCODE_BEM:
- break;
- case TGSI_OPCODE_M4X3:
- break;
- case TGSI_OPCODE_M3X4:
- break;
- case TGSI_OPCODE_M3X3:
- break;
- case TGSI_OPCODE_M3X2:
- break;
- case TGSI_OPCODE_NRM4:
- break;
- case TGSI_OPCODE_CALLNZ:
- break;
- case TGSI_OPCODE_IFC:
- break;
- case TGSI_OPCODE_BREAKC:
- break;
- case TGSI_OPCODE_KIL:
- break;
- case TGSI_OPCODE_END:
- instr->end();
- return;
- break;
- default:
- fprintf(stderr, "ERROR: Unknown opcode %d\n",
- inst->Instruction.Opcode);
- assert(0);
- break;
- }
-
- if (!out) {
- fprintf(stderr, "ERROR: unsupported opcode %d\n",
- inst->Instruction.Opcode);
- assert(!"Unsupported opcode");
- }
-
- /* # not sure if we need this */
- switch( inst->Instruction.Saturate ) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
- /*TXT( "_SAT" );*/
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- /*TXT( "_SAT[-1,1]" );*/
- break;
- default:
- assert( 0 );
- }
-
- /* store results */
- for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
- struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
- if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
- storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
- storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
- storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else {
- fprintf(stderr, "ERROR: unsupported LLVM destination!");
- assert(!"wrong destination");
- }
- }
-}
-
-
-static void
-translate_instructionir(llvm::Module *module,
- StorageSoa *storage,
- InstructionsSoa *instr,
- struct tgsi_full_instruction *inst,
- struct tgsi_full_instruction *fi,
- unsigned instno)
-{
- std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs);
-
- for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
- struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
- std::vector<llvm::Value*> val;
- llvm::Value *indIdx = 0;
- int swizzle = swizzleInt(src);
-
- if (src->SrcRegister.Indirect) {
- indIdx = storage->addrElement(src->SrcRegisterInd.Index);
- }
- if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
- val = storage->load(StorageSoa::Const,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
- val = storage->load(StorageSoa::Input,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
- val = storage->load(StorageSoa::Temp,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
- val = storage->load(StorageSoa::Output,
- src->SrcRegister.Index, swizzle, indIdx);
- } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
- val = storage->load(StorageSoa::Immediate,
- src->SrcRegister.Index, swizzle, indIdx);
- } else {
- fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
- return;
- }
-
- inputs[i] = val;
- }
-
- std::vector<llvm::Value*> out(4);
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL: {
- out = instr->arl(inputs[0]);
- }
- break;
- case TGSI_OPCODE_MOV: {
- out = inputs[0];
- }
- break;
- case TGSI_OPCODE_LIT: {
- }
- break;
- case TGSI_OPCODE_RCP: {
- }
- break;
- case TGSI_OPCODE_RSQ: {
- }
- break;
- case TGSI_OPCODE_EXP:
- break;
- case TGSI_OPCODE_LOG:
- break;
- case TGSI_OPCODE_MUL: {
- out = instr->mul(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_ADD: {
- out = instr->add(inputs[0], inputs[1]);
- }
- break;
- case TGSI_OPCODE_DP3: {
- }
- break;
- case TGSI_OPCODE_DP4: {
- }
- break;
- case TGSI_OPCODE_DST: {
- }
- break;
- case TGSI_OPCODE_MIN: {
- }
- break;
- case TGSI_OPCODE_MAX: {
- }
- break;
- case TGSI_OPCODE_SLT: {
- }
- break;
- case TGSI_OPCODE_SGE: {
- }
- break;
- case TGSI_OPCODE_MAD: {
- out = instr->madd(inputs[0], inputs[1], inputs[2]);
- }
- break;
- case TGSI_OPCODE_SUB: {
- }
- break;
- case TGSI_OPCODE_LERP: {
- }
- break;
- case TGSI_OPCODE_CND:
- break;
- case TGSI_OPCODE_CND0:
- break;
- case TGSI_OPCODE_DOT2ADD:
- break;
- case TGSI_OPCODE_INDEX:
- break;
- case TGSI_OPCODE_NEGATE:
- break;
- case TGSI_OPCODE_FRAC: {
- }
- break;
- case TGSI_OPCODE_CLAMP:
- break;
- case TGSI_OPCODE_FLOOR: {
- }
- break;
- case TGSI_OPCODE_ROUND:
- break;
- case TGSI_OPCODE_EXPBASE2: {
- }
- break;
- case TGSI_OPCODE_LOGBASE2: {
- }
- break;
- case TGSI_OPCODE_POWER: {
- }
- break;
- case TGSI_OPCODE_CROSSPRODUCT: {
- }
- break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- break;
- case TGSI_OPCODE_ABS: {
- }
- break;
- case TGSI_OPCODE_RCC:
- break;
- case TGSI_OPCODE_DPH: {
- }
- break;
- case TGSI_OPCODE_COS: {
- }
- break;
- case TGSI_OPCODE_DDX:
- break;
- case TGSI_OPCODE_DDY:
- break;
- case TGSI_OPCODE_KILP: {
- }
- break;
- case TGSI_OPCODE_PK2H:
- break;
- case TGSI_OPCODE_PK2US:
- break;
- case TGSI_OPCODE_PK4B:
- break;
- case TGSI_OPCODE_PK4UB:
- break;
- case TGSI_OPCODE_RFL:
- break;
- case TGSI_OPCODE_SEQ:
- break;
- case TGSI_OPCODE_SFL:
- break;
- case TGSI_OPCODE_SGT: {
- }
- break;
- case TGSI_OPCODE_SIN: {
- }
- break;
- case TGSI_OPCODE_SLE:
- break;
- case TGSI_OPCODE_SNE:
- break;
- case TGSI_OPCODE_STR:
- break;
- case TGSI_OPCODE_TEX:
- break;
- case TGSI_OPCODE_TXD:
- break;
- case TGSI_OPCODE_UP2H:
- break;
- case TGSI_OPCODE_UP2US:
- break;
- case TGSI_OPCODE_UP4B:
- break;
- case TGSI_OPCODE_UP4UB:
- break;
- case TGSI_OPCODE_X2D:
- break;
- case TGSI_OPCODE_ARA:
- break;
- case TGSI_OPCODE_ARR:
- break;
- case TGSI_OPCODE_BRA:
- break;
- case TGSI_OPCODE_CAL: {
- }
- break;
- case TGSI_OPCODE_RET: {
- }
- break;
- case TGSI_OPCODE_SSG:
- break;
- case TGSI_OPCODE_CMP: {
- }
- break;
- case TGSI_OPCODE_SCS: {
- }
- break;
- case TGSI_OPCODE_TXB:
- break;
- case TGSI_OPCODE_NRM:
- break;
- case TGSI_OPCODE_DIV:
- break;
- case TGSI_OPCODE_DP2:
- break;
- case TGSI_OPCODE_TXL:
- break;
- case TGSI_OPCODE_BRK: {
- }
- break;
- case TGSI_OPCODE_IF: {
- }
- break;
- case TGSI_OPCODE_LOOP:
- break;
- case TGSI_OPCODE_REP:
- break;
- case TGSI_OPCODE_ELSE: {
- }
- break;
- case TGSI_OPCODE_ENDIF: {
- }
- break;
- case TGSI_OPCODE_ENDLOOP:
- break;
- case TGSI_OPCODE_ENDREP:
- break;
- case TGSI_OPCODE_PUSHA:
- break;
- case TGSI_OPCODE_POPA:
- break;
- case TGSI_OPCODE_CEIL:
- break;
- case TGSI_OPCODE_I2F:
- break;
- case TGSI_OPCODE_NOT:
- break;
- case TGSI_OPCODE_TRUNC: {
- }
- break;
- case TGSI_OPCODE_SHL:
- break;
- case TGSI_OPCODE_SHR:
- break;
- case TGSI_OPCODE_AND:
- break;
- case TGSI_OPCODE_OR:
- break;
- case TGSI_OPCODE_MOD:
- break;
- case TGSI_OPCODE_XOR:
- break;
- case TGSI_OPCODE_SAD:
- break;
- case TGSI_OPCODE_TXF:
- break;
- case TGSI_OPCODE_TXQ:
- break;
- case TGSI_OPCODE_CONT:
- break;
- case TGSI_OPCODE_EMIT:
- break;
- case TGSI_OPCODE_ENDPRIM:
- break;
- case TGSI_OPCODE_BGNLOOP2: {
- }
- break;
- case TGSI_OPCODE_BGNSUB: {
- }
- break;
- case TGSI_OPCODE_ENDLOOP2: {
- }
- break;
- case TGSI_OPCODE_ENDSUB: {
- }
- break;
- case TGSI_OPCODE_NOISE1:
- break;
- case TGSI_OPCODE_NOISE2:
- break;
- case TGSI_OPCODE_NOISE3:
- break;
- case TGSI_OPCODE_NOISE4:
- break;
- case TGSI_OPCODE_NOP:
- break;
- case TGSI_OPCODE_TEXBEM:
- break;
- case TGSI_OPCODE_TEXBEML:
- break;
- case TGSI_OPCODE_TEXREG2AR:
- break;
- case TGSI_OPCODE_TEXM3X2PAD:
- break;
- case TGSI_OPCODE_TEXM3X2TEX:
- break;
- case TGSI_OPCODE_TEXM3X3PAD:
- break;
- case TGSI_OPCODE_TEXM3X3TEX:
- break;
- case TGSI_OPCODE_TEXM3X3SPEC:
- break;
- case TGSI_OPCODE_TEXM3X3VSPEC:
- break;
- case TGSI_OPCODE_TEXREG2GB:
- break;
- case TGSI_OPCODE_TEXREG2RGB:
- break;
- case TGSI_OPCODE_TEXDP3TEX:
- break;
- case TGSI_OPCODE_TEXDP3:
- break;
- case TGSI_OPCODE_TEXM3X3:
- break;
- case TGSI_OPCODE_TEXM3X2DEPTH:
- break;
- case TGSI_OPCODE_TEXDEPTH:
- break;
- case TGSI_OPCODE_BEM:
- break;
- case TGSI_OPCODE_M4X3:
- break;
- case TGSI_OPCODE_M3X4:
- break;
- case TGSI_OPCODE_M3X3:
- break;
- case TGSI_OPCODE_M3X2:
- break;
- case TGSI_OPCODE_NRM4:
- break;
- case TGSI_OPCODE_CALLNZ:
- break;
- case TGSI_OPCODE_IFC:
- break;
- case TGSI_OPCODE_BREAKC:
- break;
- case TGSI_OPCODE_KIL:
- break;
- case TGSI_OPCODE_END:
- instr->end();
- return;
- break;
- default:
- fprintf(stderr, "ERROR: Unknown opcode %d\n",
- inst->Instruction.Opcode);
- assert(0);
- break;
- }
-
- if (!out[0]) {
- fprintf(stderr, "ERROR: unsupported opcode %d\n",
- inst->Instruction.Opcode);
- assert(!"Unsupported opcode");
- }
-
- /* store results */
- for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
- struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
- if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
- storage->store(StorageSoa::Output,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
- storage->store(StorageSoa::Temp,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
- storage->store(StorageSoa::Address,
- dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
- } else {
- fprintf(stderr, "ERROR: unsupported LLVM destination!");
- assert(!"wrong destination");
- }
- }
-}
-
-llvm::Module *
-tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens)
-{
- llvm::Module *mod = new Module("shader");
- struct tgsi_parse_context parse;
- struct tgsi_full_instruction fi;
- struct tgsi_full_declaration fd;
- unsigned instno = 0;
- Function* shader = mod->getFunction("execute_shader");
- std::ostringstream stream;
- if (ir->type == GALLIVM_VS) {
- stream << "vs_shader";
- } else {
- stream << "fs_shader";
- }
- stream << ir->id;
- std::string func_name = stream.str();
- shader->setName(func_name.c_str());
-
- Function::arg_iterator args = shader->arg_begin();
- Value *ptr_INPUT = args++;
- ptr_INPUT->setName("input");
-
- BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
-
- tgsi_parse_init(&parse, tokens);
-
- fi = tgsi_default_full_instruction();
- fd = tgsi_default_full_declaration();
- Storage storage(label_entry, ptr_INPUT);
- Instructions instr(mod, shader, label_entry, &storage);
- while(!tgsi_parse_end_of_tokens(&parse)) {
- tgsi_parse_token(&parse);
-
- switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- translate_declaration(ir, mod, &storage,
- &parse.FullToken.FullDeclaration,
- &fd);
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- translate_immediate(&storage,
- &parse.FullToken.FullImmediate);
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- translate_instruction(mod, &storage, &instr,
- &parse.FullToken.FullInstruction,
- &fi, instno);
- ++instno;
- break;
-
- default:
- assert(0);
- }
- }
-
- tgsi_parse_free(&parse);
-
- ir->num_consts = storage.numConsts();
- return mod;
-}
-
-llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
- const struct tgsi_token *tokens)
-{
- llvm::Module *mod = new Module("shader");
- struct tgsi_parse_context parse;
- struct tgsi_full_instruction fi;
- struct tgsi_full_declaration fd;
- unsigned instno = 0;
- std::ostringstream stream;
- if (ir->type == GALLIVM_VS) {
- stream << "vs_shader";
- } else {
- stream << "fs_shader";
- }
- //stream << ir->id;
- std::string func_name = stream.str();
- Function *shader = llvm::cast<Function>(mod->getOrInsertFunction(
- func_name.c_str(),
- vertexShaderFunctionType()));
-
- Function::arg_iterator args = shader->arg_begin();
- Value *input = args++;
- input->setName("inputs");
- Value *output = args++;
- output->setName("outputs");
- Value *consts = args++;
- consts->setName("consts");
- Value *temps = args++;
- temps->setName("temps");
-
- BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
-
- tgsi_parse_init(&parse, tokens);
-
- fi = tgsi_default_full_instruction();
- fd = tgsi_default_full_declaration();
-
- StorageSoa storage(label_entry, input, output, consts, temps);
- InstructionsSoa instr(mod, shader, label_entry, &storage);
-
- while(!tgsi_parse_end_of_tokens(&parse)) {
- tgsi_parse_token(&parse);
-
- switch (parse.FullToken.Token.Type) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- translate_declarationir(ir, mod, &storage,
- &parse.FullToken.FullDeclaration,
- &fd);
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- translate_immediateir(&storage,
- &parse.FullToken.FullImmediate);
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- storage.declareImmediates();
- translate_instructionir(mod, &storage, &instr,
- &parse.FullToken.FullInstruction,
- &fi, instno);
- ++instno;
- break;
-
- default:
- assert(0);
- }
- }
-
- tgsi_parse_free(&parse);
-
- return mod;
-}
+++ /dev/null
-#ifndef TGSITOLLVM_H
-#define TGSITOLLVM_H
-
-
-namespace llvm {
- class Module;
-}
-
-struct gallivm_ir;
-struct tgsi_token;
-
-
-llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir,
- const struct tgsi_token *tokens);
-
-
-llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
- const struct tgsi_token *tokens);
-
-#endif
+++ /dev/null
-
-TOP = ../../../..
-include $(TOP)/configs/current
-
-LIBNAME = pipebuffer
-
-DRIVER_SOURCES = \
- pb_buffer_fenced.c \
- pb_buffer_malloc.c \
- pb_bufmgr_fenced.c \
- pb_bufmgr_mm.c \
- pb_bufmgr_pool.c \
- pb_winsys.c
-
-C_SOURCES = \
- $(DRIVER_SOURCES)
-
-ASM_SOURCES =
-
-include ../../Makefile.template
-
-symlinks:
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- **************************************************************************/
-
-/**
- * \file
- * List macros heavily inspired by the Linux kernel
- * list handling. No list looping yet.
- *
- * Is not threadsafe, so common operations need to
- * be protected using an external mutex.
- */
-
-#ifndef LINKED_LIST_H_
-#define LINKED_LIST_H_
-
-
-#include <stddef.h>
-
-
-struct list_head
-{
- struct list_head *prev;
- struct list_head *next;
-};
-
-
-#define LIST_INITHEAD(__item) \
- do { \
- (__item)->prev = (__item); \
- (__item)->next = (__item); \
- } while (0)
-
-#define LIST_ADD(__item, __list) \
- do { \
- (__item)->prev = (__list); \
- (__item)->next = (__list)->next; \
- (__list)->next->prev = (__item); \
- (__list)->next = (__item); \
- } while (0)
-
-#define LIST_ADDTAIL(__item, __list) \
- do { \
- (__item)->next = (__list); \
- (__item)->prev = (__list)->prev; \
- (__list)->prev->next = (__item); \
- (__list)->prev = (__item); \
- } while(0)
-
-#define LIST_DEL(__item) \
- do { \
- (__item)->prev->next = (__item)->next; \
- (__item)->next->prev = (__item)->prev; \
- } while(0)
-
-#define LIST_DELINIT(__item) \
- do { \
- (__item)->prev->next = (__item)->next; \
- (__item)->next->prev = (__item)->prev; \
- (__item)->next = (__item); \
- (__item)->prev = (__item); \
- } while(0)
-
-#define LIST_ENTRY(__type, __item, __field) \
- ((__type *)(((char *)(__item)) - offsetof(__type, __field)))
-
-
-#endif /*LINKED_LIST_H_*/
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Generic code for buffers.
- *
- * Behind a pipe buffle handle there can be DMA buffers, client (or user)
- * buffers, regular malloced buffers, etc. This file provides an abstract base
- * buffer handle that allows the driver to cope with all those kinds of buffers
- * in a more flexible way.
- *
- * There is no obligation of a winsys driver to use this library. And a pipe
- * driver should be completly agnostic about it.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-#ifndef PB_BUFFER_H_
-#define PB_BUFFER_H_
-
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_debug.h"
-#include "pipe/p_state.h"
-#include "pipe/p_inlines.h"
-
-
-struct pb_vtbl;
-
-/**
- * Buffer description.
- *
- * Used when allocating the buffer.
- */
-struct pb_desc
-{
- unsigned alignment;
- unsigned usage;
-};
-
-
-/**
- * Base class for all pb_* buffers.
- */
-struct pb_buffer
-{
- struct pipe_buffer base;
-
- /**
- * Pointer to the virtual function table.
- *
- * Avoid accessing this table directly. Use the inline functions below
- * instead to avoid mistakes.
- */
- const struct pb_vtbl *vtbl;
-};
-
-
-/**
- * Virtual function table for the buffer storage operations.
- *
- * Note that creation is not done through this table.
- */
-struct pb_vtbl
-{
- void (*destroy)( struct pb_buffer *buf );
-
- /**
- * Map the entire data store of a buffer object into the client's address.
- * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE.
- */
- void *(*map)( struct pb_buffer *buf,
- unsigned flags );
-
- void (*unmap)( struct pb_buffer *buf );
-
- /**
- * Get the base buffer and the offset.
- *
- * A buffer can be subdivided in smaller buffers. This method should return
- * the underlaying buffer, and the relative offset.
- *
- * Buffers without an underlaying base buffer should return themselves, with
- * a zero offset.
- *
- * Note that this will increase the reference count of the base buffer.
- */
- void (*get_base_buffer)( struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset );
-};
-
-
-static INLINE struct pipe_buffer *
-pb_pipe_buffer( struct pb_buffer *pbuf )
-{
- assert(pbuf);
- return &pbuf->base;
-}
-
-
-static INLINE struct pb_buffer *
-pb_buffer( struct pipe_buffer *buf )
-{
- assert(buf);
- /* Could add a magic cookie check on debug builds.
- */
- return (struct pb_buffer *)buf;
-}
-
-
-/* Accessor functions for pb->vtbl:
- */
-static INLINE void *
-pb_map(struct pb_buffer *buf,
- unsigned flags)
-{
- assert(buf);
- return buf->vtbl->map(buf, flags);
-}
-
-
-static INLINE void
-pb_unmap(struct pb_buffer *buf)
-{
- assert(buf);
- buf->vtbl->unmap(buf);
-}
-
-
-static INLINE void
-pb_get_base_buffer( struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset )
-{
- buf->vtbl->get_base_buffer(buf, base_buf, offset);
-}
-
-
-static INLINE void
-pb_destroy(struct pb_buffer *buf)
-{
- assert(buf);
- buf->vtbl->destroy(buf);
-}
-
-
-/* XXX: thread safety issues!
- */
-static INLINE void
-pb_reference(struct pb_buffer **dst,
- struct pb_buffer *src)
-{
- if (src)
- src->base.refcount++;
-
- if (*dst && --(*dst)->base.refcount == 0)
- pb_destroy( *dst );
-
- *dst = src;
-}
-
-
-/**
- * Malloc-based buffer to store data that can't be used by the graphics
- * hardware.
- */
-struct pb_buffer *
-pb_malloc_buffer_create(size_t size,
- const struct pb_desc *desc);
-
-
-void
-pb_init_winsys(struct pipe_winsys *winsys);
-
-
-#endif /*PB_BUFFER_H_*/
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Implementation of fenced buffers.
- *
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
- * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-
-#include "linked_list.h"
-
-#include "p_compiler.h"
-#include "p_debug.h"
-#include "p_winsys.h"
-#include "p_thread.h"
-#include "p_util.h"
-
-#include "pb_buffer.h"
-#include "pb_buffer_fenced.h"
-
-#ifndef __MSC__
-#include <unistd.h>
-#endif
-
-
-/**
- * Convenience macro (type safe).
- */
-#define SUPER(__derived) (&(__derived)->base)
-
-
-struct fenced_buffer_list
-{
- _glthread_Mutex mutex;
-
- struct pipe_winsys *winsys;
-
- size_t numDelayed;
- size_t checkDelayed;
-
- struct list_head delayed;
-};
-
-
-/**
- * Wrapper around a pipe buffer which adds fencing and reference counting.
- */
-struct fenced_buffer
-{
- struct pb_buffer base;
-
- struct pb_buffer *buffer;
-
- struct pipe_fence_handle *fence;
-
- struct list_head head;
- struct fenced_buffer_list *list;
-};
-
-
-static INLINE struct fenced_buffer *
-fenced_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &fenced_buffer_vtbl);
- return (struct fenced_buffer *)buf;
-}
-
-
-
-
-static void
-_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
- int wait)
-{
- struct pipe_winsys *winsys = fenced_list->winsys;
- struct fenced_buffer *fenced_buf;
- struct list_head *list, *prev;
- int signaled = -1;
-
- list = fenced_list->delayed.next;
-
- if (fenced_list->numDelayed > 3) {
- unsigned i;
-
- for (i = 0; i < fenced_list->numDelayed; i += 3) {
- list = list->next;
- }
- }
-
- prev = list->prev;
- for (; list != &fenced_list->delayed; list = prev, prev = list->prev) {
-
- fenced_buf = LIST_ENTRY(struct fenced_buffer, list, head);
-
- if (signaled != 0) {
- if (wait) {
- signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0);
- }
- else {
- signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0);
- }
- }
-
- if (signaled != 0)
- /* XXX: we are assuming that buffers are freed in the same order they
- * are fenced which may not always be true...
- */
- break;
-
- winsys->fence_reference(winsys, &fenced_buf->fence, NULL);
-
- LIST_DEL(list);
- fenced_list->numDelayed--;
-
- /* Do the delayed destroy:
- */
- pb_reference(&fenced_buf->buffer, NULL);
- FREE(fenced_buf);
- }
-}
-
-
-static void
-fenced_buffer_destroy(struct pb_buffer *buf)
-{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
- if (fenced_buf->fence) {
- LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed);
- fenced_list->numDelayed++;
- }
- else {
- pb_reference(&fenced_buf->buffer, NULL);
- FREE(fenced_buf);
- }
-
- if ((fenced_list->numDelayed % fenced_list->checkDelayed) == 0)
- _fenced_buffer_list_check_free(fenced_list, 0);
-}
-
-
-static void *
-fenced_buffer_map(struct pb_buffer *buf,
- unsigned flags)
-{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- return pb_map(fenced_buf->buffer, flags);
-}
-
-
-static void
-fenced_buffer_unmap(struct pb_buffer *buf)
-{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- pb_unmap(fenced_buf->buffer);
-}
-
-
-static void
-fenced_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
-}
-
-
-const struct pb_vtbl
-fenced_buffer_vtbl = {
- fenced_buffer_destroy,
- fenced_buffer_map,
- fenced_buffer_unmap,
- fenced_buffer_get_base_buffer
-};
-
-
-struct pb_buffer *
-fenced_buffer_create(struct fenced_buffer_list *fenced_list,
- struct pb_buffer *buffer)
-{
- struct fenced_buffer *buf;
-
- if(!buffer)
- return NULL;
-
- buf = CALLOC_STRUCT(fenced_buffer);
- if(!buf)
- return NULL;
-
- buf->base.base.refcount = 1;
- buf->base.base.alignment = buffer->base.alignment;
- buf->base.base.usage = buffer->base.usage;
- buf->base.base.size = buffer->base.size;
-
- buf->base.vtbl = &fenced_buffer_vtbl;
- buf->buffer = buffer;
- buf->list = fenced_list;
-
- return &buf->base;
-}
-
-
-void
-buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence)
-{
- struct fenced_buffer *fenced_buf = fenced_buffer(buf);
- struct fenced_buffer_list *fenced_list = fenced_buf->list;
- struct pipe_winsys *winsys = fenced_list->winsys;
-
- _glthread_LOCK_MUTEX(fenced_list->mutex);
- winsys->fence_reference(winsys, &fenced_buf->fence, fence);
- _glthread_UNLOCK_MUTEX(fenced_list->mutex);
-}
-
-
-struct fenced_buffer_list *
-fenced_buffer_list_create(struct pipe_winsys *winsys)
-{
- struct fenced_buffer_list *fenced_list;
-
- fenced_list = (struct fenced_buffer_list *)CALLOC(1, sizeof(*fenced_list));
- if (!fenced_list)
- return NULL;
-
- fenced_list->winsys = winsys;
-
- LIST_INITHEAD(&fenced_list->delayed);
-
- fenced_list->numDelayed = 0;
-
- /* TODO: don't hard code this */
- fenced_list->checkDelayed = 5;
-
- _glthread_INIT_MUTEX(fenced_list->mutex);
-
- return fenced_list;
-}
-
-
-void
-fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
- int wait)
-{
- _glthread_LOCK_MUTEX(fenced_list->mutex);
- _fenced_buffer_list_check_free(fenced_list, wait);
- _glthread_UNLOCK_MUTEX(fenced_list->mutex);
-}
-
-
-void
-fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
-{
- _glthread_LOCK_MUTEX(fenced_list->mutex);
-
- /* Wait on outstanding fences */
- while (fenced_list->numDelayed) {
- _glthread_UNLOCK_MUTEX(fenced_list->mutex);
- sched_yield();
- _fenced_buffer_list_check_free(fenced_list, 1);
- _glthread_LOCK_MUTEX(fenced_list->mutex);
- }
-
- _glthread_UNLOCK_MUTEX(fenced_list->mutex);
-
- FREE(fenced_list);
-}
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Buffer fencing.
- *
- * "Fenced buffers" is actually a misnomer. They should be referred as
- * "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find
- * the word "fenceable" in the dictionary.
- *
- * A "fenced buffer" is a decorator around a normal buffer, which adds two
- * special properties:
- * - the ability for the destruction to be delayed by a fence;
- * - reference counting.
- *
- * Usually DMA buffers have a life-time that will extend the life-time of its
- * handle. The end-of-life is dictated by the fence signalling.
- *
- * Between the handle's destruction, and the fence signalling, the buffer is
- * stored in a fenced buffer list.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-#ifndef PB_BUFFER_FENCED_H_
-#define PB_BUFFER_FENCED_H_
-
-
-#include "pipe/p_debug.h"
-
-
-struct pipe_winsys;
-struct pipe_buffer;
-struct pipe_fence_handle;
-
-
-/**
- * List of buffers which are awaiting fence signalling.
- */
-struct fenced_buffer_list;
-
-
-/**
- * The fenced buffer's virtual function table.
- *
- * NOTE: Made public for debugging purposes.
- */
-extern const struct pb_vtbl fenced_buffer_vtbl;
-
-
-/**
- * Create a fenced buffer list.
- *
- * See also fenced_bufmgr_create for a more convenient way to use this.
- */
-struct fenced_buffer_list *
-fenced_buffer_list_create(struct pipe_winsys *winsys);
-
-
-/**
- * Walk the fenced buffer list to check and free signalled buffers.
- */
-void
-fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
- int wait);
-
-void
-fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list);
-
-
-/**
- * Wrap a buffer in a fenced buffer.
- *
- * NOTE: this will not increase the buffer reference count.
- */
-struct pb_buffer *
-fenced_buffer_create(struct fenced_buffer_list *fenced,
- struct pb_buffer *buffer);
-
-
-/**
- * Set a buffer's fence.
- *
- * NOTE: Although it takes a generic pb_buffer argument, it will fail
- * on everything but buffers returned by fenced_buffer_create.
- */
-void
-buffer_fence(struct pb_buffer *buf,
- struct pipe_fence_handle *fence);
-
-
-#endif /*PB_BUFFER_FENCED_H_*/
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Implementation of malloc-based buffers to store data that can't be processed
- * by the hardware.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pb_buffer.h"
-
-
-struct malloc_buffer
-{
- struct pb_buffer base;
- void *data;
-};
-
-
-extern const struct pb_vtbl malloc_buffer_vtbl;
-
-static INLINE struct malloc_buffer *
-malloc_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &malloc_buffer_vtbl);
- return (struct malloc_buffer *)buf;
-}
-
-
-static void
-malloc_buffer_destroy(struct pb_buffer *buf)
-{
- align_free(malloc_buffer(buf)->data);
- FREE(buf);
-}
-
-
-static void *
-malloc_buffer_map(struct pb_buffer *buf,
- unsigned flags)
-{
- return malloc_buffer(buf)->data;
-}
-
-
-static void
-malloc_buffer_unmap(struct pb_buffer *buf)
-{
- /* No-op */
-}
-
-
-static void
-malloc_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
-
-
-const struct pb_vtbl
-malloc_buffer_vtbl = {
- malloc_buffer_destroy,
- malloc_buffer_map,
- malloc_buffer_unmap,
- malloc_buffer_get_base_buffer
-};
-
-
-struct pb_buffer *
-pb_malloc_buffer_create(size_t size,
- const struct pb_desc *desc)
-{
- struct malloc_buffer *buf;
-
- /* TODO: do a single allocation */
-
- buf = CALLOC_STRUCT(malloc_buffer);
- if(!buf)
- return NULL;
-
- buf->base.base.refcount = 1;
- buf->base.base.alignment = desc->alignment;
- buf->base.base.usage = desc->usage;
- buf->base.base.size = size;
- buf->base.vtbl = &malloc_buffer_vtbl;
-
- buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
- if(!buf->data) {
- align_free(buf);
- return NULL;
- }
-
- return &buf->base;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Buffer management.
- *
- * A buffer manager does only one basic thing: it creates buffers. Actually,
- * "buffer factory" would probably a more accurate description.
- *
- * You can chain buffer managers so that you can have a finer grained memory
- * management and pooling.
- *
- * For example, for a simple batch buffer manager you would chain:
- * - the native buffer manager, which provides DMA memory from the graphics
- * memory space;
- * - the pool buffer manager, which keep around a pool of equally sized buffers
- * to avoid latency associated with the native buffer manager;
- * - the fenced buffer manager, which will delay buffer destruction until the
- * the moment the card finishing processing it.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-#ifndef PB_BUFMGR_H_
-#define PB_BUFMGR_H_
-
-
-#include <stddef.h>
-
-
-struct pb_desc;
-struct pipe_buffer;
-struct pipe_winsys;
-
-
-/**
- * Abstract base class for all buffer managers.
- */
-struct pb_manager
-{
- /* XXX: we will likely need more allocation flags */
- struct pb_buffer *
- (*create_buffer)( struct pb_manager *mgr,
- size_t size,
- const struct pb_desc *desc);
-
- void
- (*destroy)( struct pb_manager *mgr );
-};
-
-
-/**
- * Static buffer pool manager.
- *
- * Manages the allocation of equally sized buffers. It does so by allocating
- * a single big buffer and divide it equally sized buffers.
- *
- * It is meant to manage the allocation of batch buffer pools.
- */
-struct pb_manager *
-pool_bufmgr_create(struct pb_manager *provider,
- size_t n, size_t size,
- const struct pb_desc *desc);
-
-
-/**
- * Wraper around the old memory manager.
- *
- * It managers buffers of different sizes. It does so by allocating a buffer
- * with the size of the heap, and then using the old mm memory manager to manage
- * that heap.
- */
-struct pb_manager *
-mm_bufmgr_create(struct pb_manager *provider,
- size_t size, size_t align2);
-
-/**
- * Same as mm_bufmgr_create.
- *
- * Buffer will be release when the manager is destroyed.
- */
-struct pb_manager *
-mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
- size_t size, size_t align2);
-
-
-/**
- * Fenced buffer manager.
- *
- * This manager is just meant for convenience. It wraps the buffers returned
- * by another manager in fenced buffers, so that
- *
- * NOTE: the buffer manager that provides the buffers will be destroyed
- * at the same time.
- */
-struct pb_manager *
-fenced_bufmgr_create(struct pb_manager *provider,
- struct pipe_winsys *winsys);
-
-
-#endif /*PB_BUFMGR_H_*/
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-
-/**
- * \file
- * A buffer manager that wraps buffers in fenced buffers.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.dot.com>
- */
-
-
-#include "p_debug.h"
-#include "p_util.h"
-
-#include "pb_buffer.h"
-#include "pb_buffer_fenced.h"
-#include "pb_bufmgr.h"
-
-
-struct fenced_pb_manager
-{
- struct pb_manager base;
-
- struct pb_manager *provider;
-
- struct fenced_buffer_list *fenced_list;
-};
-
-
-static INLINE struct fenced_pb_manager *
-fenced_pb_manager(struct pb_manager *mgr)
-{
- assert(mgr);
- return (struct fenced_pb_manager *)mgr;
-}
-
-
-static struct pb_buffer *
-fenced_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
- const struct pb_desc *desc)
-{
- struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr);
- struct pb_buffer *buf;
- struct pb_buffer *fenced_buf;
-
- /* check for free buffers before allocating new ones */
- fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0);
-
- buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc);
- if(!buf) {
- /* try harder to get a buffer */
- fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1);
-
- buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc);
- if(!buf) {
- /* give up */
- return NULL;
- }
- }
-
- fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf);
- if(!fenced_buf) {
- assert(buf->base.refcount == 1);
- pb_destroy(buf);
- }
-
- return fenced_buf;
-}
-
-
-static void
-fenced_bufmgr_destroy(struct pb_manager *mgr)
-{
- struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr);
-
- fenced_buffer_list_destroy(fenced_mgr->fenced_list);
-
- fenced_mgr->provider->destroy(fenced_mgr->provider);
-
- FREE(fenced_mgr);
-}
-
-
-struct pb_manager *
-fenced_bufmgr_create(struct pb_manager *provider,
- struct pipe_winsys *winsys)
-{
- struct fenced_pb_manager *fenced_mgr;
-
- fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr));
- if (!fenced_mgr)
- return NULL;
-
- fenced_mgr->base.destroy = fenced_bufmgr_destroy;
- fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer;
-
- fenced_mgr->provider = provider;
- fenced_mgr->fenced_list = fenced_buffer_list_create(winsys);
- if(!fenced_mgr->fenced_list) {
- FREE(fenced_mgr);
- return NULL;
- }
-
- return &fenced_mgr->base;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * Copyright 1999 Wittawat Yamwong
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Buffer manager using the old texture memory manager.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-
-#include "linked_list.h"
-
-#include "p_defines.h"
-#include "p_debug.h"
-#include "p_thread.h"
-#include "p_util.h"
-#include "pb_buffer.h"
-#include "pb_bufmgr.h"
-
-
-/**
- * Convenience macro (type safe).
- */
-#define SUPER(__derived) (&(__derived)->base)
-
-
-struct mem_block
-{
- struct mem_block *next, *prev;
- struct mem_block *next_free, *prev_free;
- struct mem_block *heap;
- int ofs, size;
- unsigned int free:1;
- unsigned int reserved:1;
-};
-
-
-#ifdef DEBUG
-/**
- * For debugging purposes.
- */
-static void
-mmDumpMemInfo(const struct mem_block *heap)
-{
- debug_printf("Memory heap %p:\n", (void *)heap);
- if (heap == 0) {
- debug_printf(" heap == 0\n");
- } else {
- const struct mem_block *p;
-
- for(p = heap->next; p != heap; p = p->next) {
- debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
- p->free ? 'F':'.',
- p->reserved ? 'R':'.');
- }
-
- debug_printf("\nFree list:\n");
-
- for(p = heap->next_free; p != heap; p = p->next_free) {
- debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
- p->free ? 'F':'.',
- p->reserved ? 'R':'.');
- }
-
- }
- debug_printf("End of memory blocks\n");
-}
-#endif
-
-
-/**
- * input: total size in bytes
- * return: a heap pointer if OK, NULL if error
- */
-static struct mem_block *
-mmInit(int ofs, int size)
-{
- struct mem_block *heap, *block;
-
- if (size <= 0)
- return NULL;
-
- heap = CALLOC_STRUCT(mem_block);
- if (!heap)
- return NULL;
-
- block = CALLOC_STRUCT(mem_block);
- if (!block) {
- FREE(heap);
- return NULL;
- }
-
- heap->next = block;
- heap->prev = block;
- heap->next_free = block;
- heap->prev_free = block;
-
- block->heap = heap;
- block->next = heap;
- block->prev = heap;
- block->next_free = heap;
- block->prev_free = heap;
-
- block->ofs = ofs;
- block->size = size;
- block->free = 1;
-
- return heap;
-}
-
-
-static struct mem_block *
-SliceBlock(struct mem_block *p,
- int startofs, int size,
- int reserved, int alignment)
-{
- struct mem_block *newblock;
-
- /* break left [p, newblock, p->next], then p = newblock */
- if (startofs > p->ofs) {
- newblock = CALLOC_STRUCT(mem_block);
- if (!newblock)
- return NULL;
- newblock->ofs = startofs;
- newblock->size = p->size - (startofs - p->ofs);
- newblock->free = 1;
- newblock->heap = p->heap;
-
- newblock->next = p->next;
- newblock->prev = p;
- p->next->prev = newblock;
- p->next = newblock;
-
- newblock->next_free = p->next_free;
- newblock->prev_free = p;
- p->next_free->prev_free = newblock;
- p->next_free = newblock;
-
- p->size -= newblock->size;
- p = newblock;
- }
-
- /* break right, also [p, newblock, p->next] */
- if (size < p->size) {
- newblock = CALLOC_STRUCT(mem_block);
- if (!newblock)
- return NULL;
- newblock->ofs = startofs + size;
- newblock->size = p->size - size;
- newblock->free = 1;
- newblock->heap = p->heap;
-
- newblock->next = p->next;
- newblock->prev = p;
- p->next->prev = newblock;
- p->next = newblock;
-
- newblock->next_free = p->next_free;
- newblock->prev_free = p;
- p->next_free->prev_free = newblock;
- p->next_free = newblock;
-
- p->size = size;
- }
-
- /* p = middle block */
- p->free = 0;
-
- /* Remove p from the free list:
- */
- p->next_free->prev_free = p->prev_free;
- p->prev_free->next_free = p->next_free;
-
- p->next_free = 0;
- p->prev_free = 0;
-
- p->reserved = reserved;
- return p;
-}
-
-
-/**
- * Allocate 'size' bytes with 2^align2 bytes alignment,
- * restrict the search to free memory after 'startSearch'
- * depth and back buffers should be in different 4mb banks
- * to get better page hits if possible
- * input: size = size of block
- * align2 = 2^align2 bytes alignment
- * startSearch = linear offset from start of heap to begin search
- * return: pointer to the allocated block, 0 if error
- */
-static struct mem_block *
-mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
-{
- struct mem_block *p;
- const int mask = (1 << align2)-1;
- int startofs = 0;
- int endofs;
-
- if (!heap || align2 < 0 || size <= 0)
- return NULL;
-
- for (p = heap->next_free; p != heap; p = p->next_free) {
- assert(p->free);
-
- startofs = (p->ofs + mask) & ~mask;
- if ( startofs < startSearch ) {
- startofs = startSearch;
- }
- endofs = startofs+size;
- if (endofs <= (p->ofs+p->size))
- break;
- }
-
- if (p == heap)
- return NULL;
-
- assert(p->free);
- p = SliceBlock(p,startofs,size,0,mask+1);
-
- return p;
-}
-
-
-#if 0
-/**
- * Free block starts at offset
- * input: pointer to a heap, start offset
- * return: pointer to a block
- */
-static struct mem_block *
-mmFindBlock(struct mem_block *heap, int start)
-{
- struct mem_block *p;
-
- for (p = heap->next; p != heap; p = p->next) {
- if (p->ofs == start)
- return p;
- }
-
- return NULL;
-}
-#endif
-
-
-static INLINE int
-Join2Blocks(struct mem_block *p)
-{
- /* XXX there should be some assertions here */
-
- /* NOTE: heap->free == 0 */
-
- if (p->free && p->next->free) {
- struct mem_block *q = p->next;
-
- assert(p->ofs + p->size == q->ofs);
- p->size += q->size;
-
- p->next = q->next;
- q->next->prev = p;
-
- q->next_free->prev_free = q->prev_free;
- q->prev_free->next_free = q->next_free;
-
- FREE(q);
- return 1;
- }
- return 0;
-}
-
-
-/**
- * Free block starts at offset
- * input: pointer to a block
- * return: 0 if OK, -1 if error
- */
-static int
-mmFreeMem(struct mem_block *b)
-{
- if (!b)
- return 0;
-
- if (b->free) {
- debug_printf("block already free\n");
- return -1;
- }
- if (b->reserved) {
- debug_printf("block is reserved\n");
- return -1;
- }
-
- b->free = 1;
- b->next_free = b->heap->next_free;
- b->prev_free = b->heap;
- b->next_free->prev_free = b;
- b->prev_free->next_free = b;
-
- Join2Blocks(b);
- if (b->prev != b->heap)
- Join2Blocks(b->prev);
-
- return 0;
-}
-
-
-/**
- * destroy MM
- */
-static void
-mmDestroy(struct mem_block *heap)
-{
- struct mem_block *p;
-
- if (!heap)
- return;
-
- for (p = heap->next; p != heap; ) {
- struct mem_block *next = p->next;
- FREE(p);
- p = next;
- }
-
- FREE(heap);
-}
-
-
-struct mm_pb_manager
-{
- struct pb_manager base;
-
- _glthread_Mutex mutex;
-
- size_t size;
- struct mem_block *heap;
-
- size_t align2;
-
- struct pb_buffer *buffer;
- void *map;
-};
-
-
-static INLINE struct mm_pb_manager *
-mm_pb_manager(struct pb_manager *mgr)
-{
- assert(mgr);
- return (struct mm_pb_manager *)mgr;
-}
-
-
-struct mm_buffer
-{
- struct pb_buffer base;
-
- struct mm_pb_manager *mgr;
-
- struct mem_block *block;
-};
-
-
-static INLINE struct mm_buffer *
-mm_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- return (struct mm_buffer *)buf;
-}
-
-
-static void
-mm_buffer_destroy(struct pb_buffer *buf)
-{
- struct mm_buffer *mm_buf = mm_buffer(buf);
- struct mm_pb_manager *mm = mm_buf->mgr;
-
- assert(buf->base.refcount == 0);
-
- _glthread_LOCK_MUTEX(mm->mutex);
- mmFreeMem(mm_buf->block);
- FREE(buf);
- _glthread_UNLOCK_MUTEX(mm->mutex);
-}
-
-
-static void *
-mm_buffer_map(struct pb_buffer *buf,
- unsigned flags)
-{
- struct mm_buffer *mm_buf = mm_buffer(buf);
- struct mm_pb_manager *mm = mm_buf->mgr;
-
- return (unsigned char *) mm->map + mm_buf->block->ofs;
-}
-
-
-static void
-mm_buffer_unmap(struct pb_buffer *buf)
-{
- /* No-op */
-}
-
-
-static void
-mm_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- struct mm_buffer *mm_buf = mm_buffer(buf);
- struct mm_pb_manager *mm = mm_buf->mgr;
- pb_get_base_buffer(mm->buffer, base_buf, offset);
- *offset += mm_buf->block->ofs;
-}
-
-
-static const struct pb_vtbl
-mm_buffer_vtbl = {
- mm_buffer_destroy,
- mm_buffer_map,
- mm_buffer_unmap,
- mm_buffer_get_base_buffer
-};
-
-
-static struct pb_buffer *
-mm_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
- const struct pb_desc *desc)
-{
- struct mm_pb_manager *mm = mm_pb_manager(mgr);
- struct mm_buffer *mm_buf;
-
- /* We don't handle alignments larger then the one initially setup */
- assert(desc->alignment % (1 << mm->align2) == 0);
- if(desc->alignment % (1 << mm->align2))
- return NULL;
-
- _glthread_LOCK_MUTEX(mm->mutex);
-
- mm_buf = CALLOC_STRUCT(mm_buffer);
- if (!mm_buf) {
- _glthread_UNLOCK_MUTEX(mm->mutex);
- return NULL;
- }
-
- mm_buf->base.base.refcount = 1;
- mm_buf->base.base.alignment = desc->alignment;
- mm_buf->base.base.usage = desc->usage;
- mm_buf->base.base.size = size;
-
- mm_buf->base.vtbl = &mm_buffer_vtbl;
-
- mm_buf->mgr = mm;
-
- mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
- if(!mm_buf->block) {
- debug_printf("warning: heap full\n");
-#if 0
- mmDumpMemInfo(mm->heap);
-#endif
-
- mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
- if(!mm_buf->block) {
- assert(0);
- FREE(mm_buf);
- _glthread_UNLOCK_MUTEX(mm->mutex);
- return NULL;
- }
- }
-
- /* Some sanity checks */
- assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size);
- assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size);
-
- _glthread_UNLOCK_MUTEX(mm->mutex);
- return SUPER(mm_buf);
-}
-
-
-static void
-mm_bufmgr_destroy(struct pb_manager *mgr)
-{
- struct mm_pb_manager *mm = mm_pb_manager(mgr);
-
- _glthread_LOCK_MUTEX(mm->mutex);
-
- mmDestroy(mm->heap);
-
- pb_unmap(mm->buffer);
- pb_reference(&mm->buffer, NULL);
-
- _glthread_UNLOCK_MUTEX(mm->mutex);
-
- FREE(mgr);
-}
-
-
-struct pb_manager *
-mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
- size_t size, size_t align2)
-{
- struct mm_pb_manager *mm;
-
- if(!buffer)
- return NULL;
-
- mm = CALLOC_STRUCT(mm_pb_manager);
- if (!mm)
- return NULL;
-
- mm->base.create_buffer = mm_bufmgr_create_buffer;
- mm->base.destroy = mm_bufmgr_destroy;
-
- mm->size = size;
- mm->align2 = align2; /* 64-byte alignment */
-
- _glthread_INIT_MUTEX(mm->mutex);
-
- mm->buffer = buffer;
-
- mm->map = pb_map(mm->buffer,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
- if(!mm->map)
- goto failure;
-
- mm->heap = mmInit(0, size);
- if (!mm->heap)
- goto failure;
-
- return SUPER(mm);
-
-failure:
-if(mm->heap)
- mmDestroy(mm->heap);
- if(mm->map)
- pb_unmap(mm->buffer);
- if(mm)
- FREE(mm);
- return NULL;
-}
-
-
-struct pb_manager *
-mm_bufmgr_create(struct pb_manager *provider,
- size_t size, size_t align2)
-{
- struct pb_buffer *buffer;
- struct pb_manager *mgr;
- struct pb_desc desc;
-
- assert(provider);
- assert(provider->create_buffer);
-
- memset(&desc, 0, sizeof(desc));
- desc.alignment = 1 << align2;
-
- buffer = provider->create_buffer(provider, size, &desc);
- if (!buffer)
- return NULL;
-
- mgr = mm_bufmgr_create_from_buffer(buffer, size, align2);
- if (!mgr) {
- pb_reference(&buffer, NULL);
- return NULL;
- }
-
- return mgr;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- *
- **************************************************************************/
-
-/**
- * \file
- * Batch buffer pool management.
- *
- * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
- * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
- */
-
-
-#include "linked_list.h"
-
-#include "p_compiler.h"
-#include "p_debug.h"
-#include "p_thread.h"
-#include "p_defines.h"
-#include "p_util.h"
-
-#include "pb_buffer.h"
-#include "pb_bufmgr.h"
-
-
-/**
- * Convenience macro (type safe).
- */
-#define SUPER(__derived) (&(__derived)->base)
-
-
-struct pool_pb_manager
-{
- struct pb_manager base;
-
- _glthread_Mutex mutex;
-
- size_t bufSize;
- size_t bufAlign;
-
- size_t numFree;
- size_t numTot;
-
- struct list_head free;
-
- struct pb_buffer *buffer;
- void *map;
-
- struct pool_buffer *bufs;
-};
-
-
-static INLINE struct pool_pb_manager *
-pool_pb_manager(struct pb_manager *mgr)
-{
- assert(mgr);
- return (struct pool_pb_manager *)mgr;
-}
-
-
-struct pool_buffer
-{
- struct pb_buffer base;
-
- struct pool_pb_manager *mgr;
-
- struct list_head head;
-
- size_t start;
-};
-
-
-static INLINE struct pool_buffer *
-pool_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- return (struct pool_buffer *)buf;
-}
-
-
-
-static void
-pool_buffer_destroy(struct pb_buffer *buf)
-{
- struct pool_buffer *pool_buf = pool_buffer(buf);
- struct pool_pb_manager *pool = pool_buf->mgr;
-
- assert(pool_buf->base.base.refcount == 0);
-
- _glthread_LOCK_MUTEX(pool->mutex);
- LIST_ADD(&pool_buf->head, &pool->free);
- pool->numFree++;
- _glthread_UNLOCK_MUTEX(pool->mutex);
-}
-
-
-static void *
-pool_buffer_map(struct pb_buffer *buf, unsigned flags)
-{
- struct pool_buffer *pool_buf = pool_buffer(buf);
- struct pool_pb_manager *pool = pool_buf->mgr;
- void *map;
-
- _glthread_LOCK_MUTEX(pool->mutex);
- map = (unsigned char *) pool->map + pool_buf->start;
- _glthread_UNLOCK_MUTEX(pool->mutex);
- return map;
-}
-
-
-static void
-pool_buffer_unmap(struct pb_buffer *buf)
-{
- /* No-op */
-}
-
-
-static void
-pool_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- struct pool_buffer *pool_buf = pool_buffer(buf);
- struct pool_pb_manager *pool = pool_buf->mgr;
- pb_get_base_buffer(pool->buffer, base_buf, offset);
- *offset += pool_buf->start;
-}
-
-
-static const struct pb_vtbl
-pool_buffer_vtbl = {
- pool_buffer_destroy,
- pool_buffer_map,
- pool_buffer_unmap,
- pool_buffer_get_base_buffer
-};
-
-
-static struct pb_buffer *
-pool_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
- const struct pb_desc *desc)
-{
- struct pool_pb_manager *pool = pool_pb_manager(mgr);
- struct pool_buffer *pool_buf;
- struct list_head *item;
-
- assert(size == pool->bufSize);
- assert(pool->bufAlign % desc->alignment == 0);
-
- _glthread_LOCK_MUTEX(pool->mutex);
-
- if (pool->numFree == 0) {
- _glthread_UNLOCK_MUTEX(pool->mutex);
- debug_printf("warning: out of fixed size buffer objects\n");
- return NULL;
- }
-
- item = pool->free.next;
-
- if (item == &pool->free) {
- _glthread_UNLOCK_MUTEX(pool->mutex);
- debug_printf("error: fixed size buffer pool corruption\n");
- return NULL;
- }
-
- LIST_DEL(item);
- --pool->numFree;
-
- _glthread_UNLOCK_MUTEX(pool->mutex);
-
- pool_buf = LIST_ENTRY(struct pool_buffer, item, head);
- assert(pool_buf->base.base.refcount == 0);
- pool_buf->base.base.refcount = 1;
- pool_buf->base.base.alignment = desc->alignment;
- pool_buf->base.base.usage = desc->usage;
-
- return SUPER(pool_buf);
-}
-
-
-static void
-pool_bufmgr_destroy(struct pb_manager *mgr)
-{
- struct pool_pb_manager *pool = pool_pb_manager(mgr);
- _glthread_LOCK_MUTEX(pool->mutex);
-
- FREE(pool->bufs);
-
- pb_unmap(pool->buffer);
- pb_reference(&pool->buffer, NULL);
-
- _glthread_UNLOCK_MUTEX(pool->mutex);
-
- FREE(mgr);
-}
-
-
-struct pb_manager *
-pool_bufmgr_create(struct pb_manager *provider,
- size_t numBufs,
- size_t bufSize,
- const struct pb_desc *desc)
-{
- struct pool_pb_manager *pool;
- struct pool_buffer *pool_buf;
- size_t i;
-
- pool = (struct pool_pb_manager *)CALLOC(1, sizeof(*pool));
- if (!pool)
- return NULL;
-
- pool->base.destroy = pool_bufmgr_destroy;
- pool->base.create_buffer = pool_bufmgr_create_buffer;
-
- LIST_INITHEAD(&pool->free);
-
- pool->numTot = numBufs;
- pool->numFree = numBufs;
- pool->bufSize = bufSize;
- pool->bufAlign = desc->alignment;
-
- _glthread_INIT_MUTEX(pool->mutex);
-
- pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc);
- if (!pool->buffer)
- goto failure;
-
- pool->map = pb_map(pool->buffer,
- PIPE_BUFFER_USAGE_CPU_READ |
- PIPE_BUFFER_USAGE_CPU_WRITE);
- if(!pool->map)
- goto failure;
-
- pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs));
- if (!pool->bufs)
- goto failure;
-
- pool_buf = pool->bufs;
- for (i = 0; i < numBufs; ++i) {
- pool_buf->base.base.refcount = 0;
- pool_buf->base.base.alignment = 0;
- pool_buf->base.base.usage = 0;
- pool_buf->base.base.size = bufSize;
- pool_buf->base.vtbl = &pool_buffer_vtbl;
- pool_buf->mgr = pool;
- pool_buf->start = i * bufSize;
- LIST_ADDTAIL(&pool_buf->head, &pool->free);
- pool_buf++;
- }
-
- return SUPER(pool);
-
-failure:
- if(pool->bufs)
- FREE(pool->bufs);
- if(pool->map)
- pb_unmap(pool->buffer);
- if(pool->buffer)
- pb_reference(&pool->buffer, NULL);
- if(pool)
- FREE(pool);
- return NULL;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * \file
- * Implementation of client buffer (also designated as "user buffers"), which
- * are just state-tracker owned data masqueraded as buffers.
- *
- * \author José Fonseca <jrfonseca@tungstengraphics.com>
- */
-
-
-#include "pipe/p_winsys.h"
-#include "pipe/p_util.h"
-
-#include "pb_buffer.h"
-
-
-/**
- * User buffers are special buffers that initially reference memory
- * held by the user but which may if necessary copy that memory into
- * device memory behind the scenes, for submission to hardware.
- *
- * These are particularly useful when the referenced data is never
- * submitted to hardware at all, in the particular case of software
- * vertex processing.
- */
-struct pb_user_buffer
-{
- struct pb_buffer base;
- void *data;
-};
-
-
-extern const struct pb_vtbl pb_user_buffer_vtbl;
-
-
-static INLINE struct pb_user_buffer *
-pb_user_buffer(struct pb_buffer *buf)
-{
- assert(buf);
- assert(buf->vtbl == &pb_user_buffer_vtbl);
- return (struct pb_user_buffer *)buf;
-}
-
-
-static void
-pb_user_buffer_destroy(struct pb_buffer *buf)
-{
- assert(buf);
- FREE(buf);
-}
-
-
-static void *
-pb_user_buffer_map(struct pb_buffer *buf,
- unsigned flags)
-{
- return pb_user_buffer(buf)->data;
-}
-
-
-static void
-pb_user_buffer_unmap(struct pb_buffer *buf)
-{
- /* No-op */
-}
-
-
-static void
-pb_user_buffer_get_base_buffer(struct pb_buffer *buf,
- struct pb_buffer **base_buf,
- unsigned *offset)
-{
- *base_buf = buf;
- *offset = 0;
-}
-
-
-const struct pb_vtbl
-pb_user_buffer_vtbl = {
- pb_user_buffer_destroy,
- pb_user_buffer_map,
- pb_user_buffer_unmap,
- pb_user_buffer_get_base_buffer
-};
-
-
-static struct pipe_buffer *
-pb_winsys_user_buffer_create(struct pipe_winsys *winsys,
- void *data,
- unsigned bytes)
-{
- struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer);
-
- if(!buf)
- return NULL;
-
- buf->base.base.refcount = 1;
- buf->base.base.size = bytes;
- buf->base.base.alignment = 0;
- buf->base.base.usage = 0;
-
- buf->base.vtbl = &pb_user_buffer_vtbl;
- buf->data = data;
-
- return &buf->base.base;
-}
-
-
-static void *
-pb_winsys_buffer_map(struct pipe_winsys *winsys,
- struct pipe_buffer *buf,
- unsigned flags)
-{
- (void)winsys;
- return pb_map(pb_buffer(buf), flags);
-}
-
-
-static void
-pb_winsys_buffer_unmap(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- (void)winsys;
- pb_unmap(pb_buffer(buf));
-}
-
-
-static void
-pb_winsys_buffer_destroy(struct pipe_winsys *winsys,
- struct pipe_buffer *buf)
-{
- (void)winsys;
- pb_destroy(pb_buffer(buf));
-}
-
-
-void
-pb_init_winsys(struct pipe_winsys *winsys)
-{
- winsys->user_buffer_create = pb_winsys_user_buffer_create;
- winsys->buffer_map = pb_winsys_buffer_map;
- winsys->buffer_unmap = pb_winsys_buffer_unmap;
- winsys->buffer_destroy = pb_winsys_buffer_destroy;
-}
+++ /dev/null
-default:
- cd ../.. ; make
-
+++ /dev/null
-default:
- cd ../../.. ; make
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * TGSI interpretor/executor.
- *
- * Flow control information:
- *
- * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
- * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
- * care since a condition may be true for some quad components but false
- * for other components.
- *
- * We basically execute all statements (even if they're in the part of
- * an IF/ELSE clause that's "not taken") and use a special mask to
- * control writing to destination registers. This is the ExecMask.
- * See store_dest().
- *
- * The ExecMask is computed from three other masks (CondMask, LoopMask and
- * ContMask) which are controlled by the flow control instructions (namely:
- * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
- *
- *
- * Authors:
- * Michal Krol
- * Brian Paul
- */
-
-#include "pipe/p_compiler.h"
-#include "pipe/p_state.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi_exec.h"
-
-#define TILE_TOP_LEFT 0
-#define TILE_TOP_RIGHT 1
-#define TILE_BOTTOM_LEFT 2
-#define TILE_BOTTOM_RIGHT 3
-
-/*
- * Shorthand locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
-#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
-#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
-#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
-#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
-#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
-#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
-#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
-#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
-#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
-#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
-#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
-#define TEMP_128_I TGSI_EXEC_TEMP_128_I
-#define TEMP_128_C TGSI_EXEC_TEMP_128_C
-#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
-#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
-#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
-#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
-#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
-#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
-#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
-#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
-#define TEMP_R0 TGSI_EXEC_TEMP_R0
-
-#define FOR_EACH_CHANNEL(CHAN)\
- for (CHAN = 0; CHAN < 4; CHAN++)
-
-#define IS_CHANNEL_ENABLED(INST, CHAN)\
- ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define IS_CHANNEL_ENABLED2(INST, CHAN)\
- ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
- if (IS_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
- FOR_EACH_CHANNEL( CHAN )\
- if (IS_CHANNEL_ENABLED2( INST, CHAN ))
-
-
-/** The execution mask depends on the conditional mask and the loop mask */
-#define UPDATE_EXEC_MASK(MACH) \
- MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
-
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-
-
-static void
-tgsi_exec_prepare( struct tgsi_exec_machine *mach )
-{
- struct tgsi_exec_labels *labels = &mach->Labels;
- struct tgsi_parse_context parse;
- struct tgsi_full_instruction *instructions;
- struct tgsi_full_declaration *declarations;
- uint maxInstructions = 10, numInstructions = 0;
- uint maxDeclarations = 10, numDeclarations = 0;
- uint k;
- uint instno = 0;
-
- mach->ImmLimit = 0;
- labels->count = 0;
-
- declarations = (struct tgsi_full_declaration *)
- MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
-
- instructions = (struct tgsi_full_instruction *)
- MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
-
- k = tgsi_parse_init( &parse, mach->Tokens );
- if (k != TGSI_PARSE_OK) {
- debug_printf("Problem parsing!\n");
- return;
- }
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- uint pointer = parse.Position;
- uint i;
-
- tgsi_parse_token( &parse );
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- /* save expanded declaration */
- if (numDeclarations == maxDeclarations) {
- declarations = REALLOC(declarations,
- maxDeclarations
- * sizeof(struct tgsi_full_declaration),
- (maxDeclarations + 10)
- * sizeof(struct tgsi_full_declaration));
- maxDeclarations += 10;
- }
- memcpy(declarations + numDeclarations,
- &parse.FullToken.FullDeclaration,
- sizeof(declarations[0]));
- numDeclarations++;
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
-
- for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
- }
- mach->ImmLimit += size / 4;
- }
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- assert( labels->count < 128 );
-
- labels->labels[labels->count][0] = instno;
- labels->labels[labels->count][1] = pointer;
- labels->count++;
-
- /* save expanded instruction */
- if (numInstructions == maxInstructions) {
- instructions = REALLOC(instructions,
- maxInstructions
- * sizeof(struct tgsi_full_instruction),
- (maxInstructions + 10)
- * sizeof(struct tgsi_full_instruction));
- maxInstructions += 10;
- }
- memcpy(instructions + numInstructions,
- &parse.FullToken.FullInstruction,
- sizeof(instructions[0]));
- numInstructions++;
- break;
-
- default:
- assert( 0 );
- }
- }
- tgsi_parse_free (&parse);
-
- if (mach->Declarations) {
- FREE( mach->Declarations );
- }
- mach->Declarations = declarations;
- mach->NumDeclarations = numDeclarations;
-
- if (mach->Instructions) {
- FREE( mach->Instructions );
- }
- mach->Instructions = instructions;
- mach->NumInstructions = numInstructions;
-}
-
-
-/**
- * Initialize machine state by expanding tokens to full instructions,
- * allocating temporary storage, setting up constants, etc.
- * After this, we can call tgsi_exec_machine_run() many times.
- */
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach,
- const struct tgsi_token *tokens,
- uint numSamplers,
- struct tgsi_sampler *samplers)
-{
- uint i, k;
- struct tgsi_parse_context parse;
-
-#if 0
- tgsi_dump(tokens, 0);
-#endif
-
- mach->Tokens = tokens;
-
- mach->Samplers = samplers;
-
- k = tgsi_parse_init (&parse, mach->Tokens);
- if (k != TGSI_PARSE_OK) {
- debug_printf( "Problem parsing!\n" );
- return;
- }
-
- mach->Processor = parse.FullHeader.Processor.Processor;
- tgsi_parse_free (&parse);
-
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
- mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
-
- /* Setup constants. */
- for( i = 0; i < 4; i++ ) {
- mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
- mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
- mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
- mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
- mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
- mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
- mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
- mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
- }
-
- tgsi_exec_prepare( mach );
-}
-
-
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
-{
- if (mach->Instructions) {
- FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
- FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
- }
-}
-
-
-static void
-micro_abs(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) fabs( (double) src->f[0] );
- dst->f[1] = (float) fabs( (double) src->f[1] );
- dst->f[2] = (float) fabs( (double) src->f[2] );
- dst->f[3] = (float) fabs( (double) src->f[3] );
-}
-
-static void
-micro_add(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] + src1->f[0];
- dst->f[1] = src0->f[1] + src1->f[1];
- dst->f[2] = src0->f[2] + src1->f[2];
- dst->f[3] = src0->f[3] + src1->f[3];
-}
-
-static void
-micro_iadd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] + src1->i[0];
- dst->i[1] = src0->i[1] + src1->i[1];
- dst->i[2] = src0->i[2] + src1->i[2];
- dst->i[3] = src0->i[3] + src1->i[3];
-}
-
-static void
-micro_and(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] & src1->u[0];
- dst->u[1] = src0->u[1] & src1->u[1];
- dst->u[2] = src0->u[2] & src1->u[2];
- dst->u[3] = src0->u[3] & src1->u[3];
-}
-
-static void
-micro_ceil(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) ceil( (double) src->f[0] );
- dst->f[1] = (float) ceil( (double) src->f[1] );
- dst->f[2] = (float) ceil( (double) src->f[2] );
- dst->f[3] = (float) ceil( (double) src->f[3] );
-}
-
-static void
-micro_cos(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) cos( (double) src->f[0] );
- dst->f[1] = (float) cos( (double) src->f[1] );
- dst->f[2] = (float) cos( (double) src->f[2] );
- dst->f[3] = (float) cos( (double) src->f[3] );
-}
-
-static void
-micro_ddx(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_ddy(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] =
- dst->f[1] =
- dst->f[2] =
- dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
-}
-
-static void
-micro_div(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] / src1->f[0];
- dst->f[1] = src0->f[1] / src1->f[1];
- dst->f[2] = src0->f[2] / src1->f[2];
- dst->f[3] = src0->f[3] / src1->f[3];
-}
-
-static void
-micro_udiv(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] / src1->u[0];
- dst->u[1] = src0->u[1] / src1->u[1];
- dst->u[2] = src0->u[2] / src1->u[2];
- dst->u[3] = src0->u[3] / src1->u[3];
-}
-
-static void
-micro_eq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_ieq(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
-}
-
-static void
-micro_exp2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src)
-{
- dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
- dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
- dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
- dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
-}
-
-static void
-micro_f2it(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = (int) src->f[0];
- dst->i[1] = (int) src->f[1];
- dst->i[2] = (int) src->f[2];
- dst->i[3] = (int) src->f[3];
-}
-
-static void
-micro_f2ut(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = (uint) src->f[0];
- dst->u[1] = (uint) src->f[1];
- dst->u[2] = (uint) src->f[2];
- dst->u[3] = (uint) src->f[3];
-}
-
-static void
-micro_flr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) floor( (double) src->f[0] );
- dst->f[1] = (float) floor( (double) src->f[1] );
- dst->f[2] = (float) floor( (double) src->f[2] );
- dst->f[3] = (float) floor( (double) src->f[3] );
-}
-
-static void
-micro_frc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
- dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
- dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
- dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
-}
-
-static void
-micro_ge(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_i2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->i[0];
- dst->f[1] = (float) src->i[1];
- dst->f[2] = (float) src->i[2];
- dst->f[3] = (float) src->i[3];
-}
-
-static void
-micro_lg2(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
- dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
- dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
- dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
-}
-
-static void
-micro_lt(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
- dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
- dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
- dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
-}
-
-static void
-micro_ilt(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
-}
-
-static void
-micro_ult(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2,
- const union tgsi_exec_channel *src3 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
-}
-
-static void
-micro_max(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
- dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
- dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
- dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
-}
-
-static void
-micro_imax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
-}
-
-static void
-micro_umax(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
-}
-
-static void
-micro_min(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
- dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
- dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
- dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
-}
-
-static void
-micro_imin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
- dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
- dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
- dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
-}
-
-static void
-micro_umin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
- dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
- dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
- dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
-}
-
-static void
-micro_umod(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] % src1->u[0];
- dst->u[1] = src0->u[1] % src1->u[1];
- dst->u[2] = src0->u[2] % src1->u[2];
- dst->u[3] = src0->u[3] % src1->u[3];
-}
-
-static void
-micro_mul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] * src1->f[0];
- dst->f[1] = src0->f[1] * src1->f[1];
- dst->f[2] = src0->f[2] * src1->f[2];
- dst->f[3] = src0->f[3] * src1->f[3];
-}
-
-static void
-micro_imul(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] * src1->i[0];
- dst->i[1] = src0->i[1] * src1->i[1];
- dst->i[2] = src0->i[2] * src1->i[2];
- dst->i[3] = src0->i[3] * src1->i[3];
-}
-
-static void
-micro_imul64(
- union tgsi_exec_channel *dst0,
- union tgsi_exec_channel *dst1,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst1->i[0] = src0->i[0] * src1->i[0];
- dst1->i[1] = src0->i[1] * src1->i[1];
- dst1->i[2] = src0->i[2] * src1->i[2];
- dst1->i[3] = src0->i[3] * src1->i[3];
- dst0->i[0] = 0;
- dst0->i[1] = 0;
- dst0->i[2] = 0;
- dst0->i[3] = 0;
-}
-
-static void
-micro_umul64(
- union tgsi_exec_channel *dst0,
- union tgsi_exec_channel *dst1,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst1->u[0] = src0->u[0] * src1->u[0];
- dst1->u[1] = src0->u[1] * src1->u[1];
- dst1->u[2] = src0->u[2] * src1->u[2];
- dst1->u[3] = src0->u[3] * src1->u[3];
- dst0->u[0] = 0;
- dst0->u[1] = 0;
- dst0->u[2] = 0;
- dst0->u[3] = 0;
-}
-
-static void
-micro_movc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1,
- const union tgsi_exec_channel *src2 )
-{
- dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
- dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
- dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
- dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
-}
-
-static void
-micro_neg(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = -src->f[0];
- dst->f[1] = -src->f[1];
- dst->f[2] = -src->f[2];
- dst->f[3] = -src->f[3];
-}
-
-static void
-micro_ineg(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->i[0] = -src->i[0];
- dst->i[1] = -src->i[1];
- dst->i[2] = -src->i[2];
- dst->i[3] = -src->i[3];
-}
-
-static void
-micro_not(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->u[0] = ~src->u[0];
- dst->u[1] = ~src->u[1];
- dst->u[2] = ~src->u[2];
- dst->u[3] = ~src->u[3];
-}
-
-static void
-micro_or(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] | src1->u[0];
- dst->u[1] = src0->u[1] | src1->u[1];
- dst->u[2] = src0->u[2] | src1->u[2];
- dst->u[3] = src0->u[3] | src1->u[3];
-}
-
-static void
-micro_pow(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
- dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
- dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
- dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
-}
-
-static void
-micro_rnd(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
- dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
- dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
- dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
-}
-
-static void
-micro_shl(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] << src1->i[0];
- dst->i[1] = src0->i[1] << src1->i[1];
- dst->i[2] = src0->i[2] << src1->i[2];
- dst->i[3] = src0->i[3] << src1->i[3];
-}
-
-static void
-micro_ishr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->i[0] = src0->i[0] >> src1->i[0];
- dst->i[1] = src0->i[1] >> src1->i[1];
- dst->i[2] = src0->i[2] >> src1->i[2];
- dst->i[3] = src0->i[3] >> src1->i[3];
-}
-
-static void
-micro_trunc(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0 )
-{
- dst->f[0] = (float) (int) src0->f[0];
- dst->f[1] = (float) (int) src0->f[1];
- dst->f[2] = (float) (int) src0->f[2];
- dst->f[3] = (float) (int) src0->f[3];
-}
-
-static void
-micro_ushr(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] >> src1->u[0];
- dst->u[1] = src0->u[1] >> src1->u[1];
- dst->u[2] = src0->u[2] >> src1->u[2];
- dst->u[3] = src0->u[3] >> src1->u[3];
-}
-
-static void
-micro_sin(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) sin( (double) src->f[0] );
- dst->f[1] = (float) sin( (double) src->f[1] );
- dst->f[2] = (float) sin( (double) src->f[2] );
- dst->f[3] = (float) sin( (double) src->f[3] );
-}
-
-static void
-micro_sqrt( union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) sqrt( (double) src->f[0] );
- dst->f[1] = (float) sqrt( (double) src->f[1] );
- dst->f[2] = (float) sqrt( (double) src->f[2] );
- dst->f[3] = (float) sqrt( (double) src->f[3] );
-}
-
-static void
-micro_sub(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->f[0] = src0->f[0] - src1->f[0];
- dst->f[1] = src0->f[1] - src1->f[1];
- dst->f[2] = src0->f[2] - src1->f[2];
- dst->f[3] = src0->f[3] - src1->f[3];
-}
-
-static void
-micro_u2f(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src )
-{
- dst->f[0] = (float) src->u[0];
- dst->f[1] = (float) src->u[1];
- dst->f[2] = (float) src->u[2];
- dst->f[3] = (float) src->u[3];
-}
-
-static void
-micro_xor(
- union tgsi_exec_channel *dst,
- const union tgsi_exec_channel *src0,
- const union tgsi_exec_channel *src1 )
-{
- dst->u[0] = src0->u[0] ^ src1->u[0];
- dst->u[1] = src0->u[1] ^ src1->u[1];
- dst->u[2] = src0->u[2] ^ src1->u[2];
- dst->u[3] = src0->u[3] ^ src1->u[3];
-}
-
-static void
-fetch_src_file_channel(
- const struct tgsi_exec_machine *mach,
- const uint file,
- const uint swizzle,
- const union tgsi_exec_channel *index,
- union tgsi_exec_channel *chan )
-{
- switch( swizzle ) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- switch( file ) {
- case TGSI_FILE_CONSTANT:
- chan->f[0] = mach->Consts[index->i[0]][swizzle];
- chan->f[1] = mach->Consts[index->i[1]][swizzle];
- chan->f[2] = mach->Consts[index->i[2]][swizzle];
- chan->f[3] = mach->Consts[index->i[3]][swizzle];
- break;
-
- case TGSI_FILE_INPUT:
- chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_TEMPORARY:
- chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_IMMEDIATE:
- assert( index->i[0] < (int) mach->ImmLimit );
- chan->f[0] = mach->Imms[index->i[0]][swizzle];
- assert( index->i[1] < (int) mach->ImmLimit );
- chan->f[1] = mach->Imms[index->i[1]][swizzle];
- assert( index->i[2] < (int) mach->ImmLimit );
- chan->f[2] = mach->Imms[index->i[2]][swizzle];
- assert( index->i[3] < (int) mach->ImmLimit );
- chan->f[3] = mach->Imms[index->i[3]][swizzle];
- break;
-
- case TGSI_FILE_ADDRESS:
- chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- case TGSI_FILE_OUTPUT:
- /* vertex/fragment output vars can be read too */
- chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
- chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
- chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
- chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
- break;
-
- default:
- assert( 0 );
- }
- break;
-
- case TGSI_EXTSWIZZLE_ZERO:
- *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
- break;
-
- default:
- assert( 0 );
- }
-}
-
-static void
-fetch_source(
- const struct tgsi_exec_machine *mach,
- union tgsi_exec_channel *chan,
- const struct tgsi_full_src_register *reg,
- const uint chan_index )
-{
- union tgsi_exec_channel index;
- uint swizzle;
-
- index.i[0] =
- index.i[1] =
- index.i[2] =
- index.i[3] = reg->SrcRegister.Index;
-
- if (reg->SrcRegister.Indirect) {
- union tgsi_exec_channel index2;
- union tgsi_exec_channel indir_index;
-
- index2.i[0] =
- index2.i[1] =
- index2.i[2] =
- index2.i[3] = reg->SrcRegisterInd.Index;
-
- swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->SrcRegisterInd.File,
- swizzle,
- &index2,
- &indir_index );
-
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
- }
-
- if( reg->SrcRegister.Dimension ) {
- switch( reg->SrcRegister.File ) {
- case TGSI_FILE_INPUT:
- index.i[0] *= 17;
- index.i[1] *= 17;
- index.i[2] *= 17;
- index.i[3] *= 17;
- break;
- case TGSI_FILE_CONSTANT:
- index.i[0] *= 4096;
- index.i[1] *= 4096;
- index.i[2] *= 4096;
- index.i[3] *= 4096;
- break;
- default:
- assert( 0 );
- }
-
- index.i[0] += reg->SrcRegisterDim.Index;
- index.i[1] += reg->SrcRegisterDim.Index;
- index.i[2] += reg->SrcRegisterDim.Index;
- index.i[3] += reg->SrcRegisterDim.Index;
-
- if (reg->SrcRegisterDim.Indirect) {
- union tgsi_exec_channel index2;
- union tgsi_exec_channel indir_index;
-
- index2.i[0] =
- index2.i[1] =
- index2.i[2] =
- index2.i[3] = reg->SrcRegisterDimInd.Index;
-
- swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X );
- fetch_src_file_channel(
- mach,
- reg->SrcRegisterDimInd.File,
- swizzle,
- &index2,
- &indir_index );
-
- index.i[0] += indir_index.i[0];
- index.i[1] += indir_index.i[1];
- index.i[2] += indir_index.i[2];
- index.i[3] += indir_index.i[3];
- }
- }
-
- swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
- fetch_src_file_channel(
- mach,
- reg->SrcRegister.File,
- swizzle,
- &index,
- chan );
-
- switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
- case TGSI_UTIL_SIGN_CLEAR:
- micro_abs( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_SET:
- micro_abs( chan, chan );
- micro_neg( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- micro_neg( chan, chan );
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- break;
- }
-
- if (reg->SrcRegisterExtMod.Complement) {
- micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
- }
-}
-
-static void
-store_dest(
- struct tgsi_exec_machine *mach,
- const union tgsi_exec_channel *chan,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- uint chan_index )
-{
- union tgsi_exec_channel *dst;
-
- switch( reg->DstRegister.File ) {
- case TGSI_FILE_NULL:
- return;
-
- case TGSI_FILE_OUTPUT:
- dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->DstRegister.Index].xyzw[chan_index];
- break;
-
- case TGSI_FILE_TEMPORARY:
- dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
- break;
-
- case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
- break;
-
- default:
- assert( 0 );
- return;
- }
-
- switch (inst->Instruction.Saturate)
- {
- case TGSI_SAT_NONE:
- if (mach->ExecMask & 0x1)
- dst->i[0] = chan->i[0];
- if (mach->ExecMask & 0x2)
- dst->i[1] = chan->i[1];
- if (mach->ExecMask & 0x4)
- dst->i[2] = chan->i[2];
- if (mach->ExecMask & 0x8)
- dst->i[3] = chan->i[3];
- break;
-
- case TGSI_SAT_ZERO_ONE:
- /* XXX need to obey ExecMask here */
- micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
- micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
- break;
-
- default:
- assert( 0 );
- }
-}
-
-#define FETCH(VAL,INDEX,CHAN)\
- fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
-
-#define STORE(VAL,INDEX,CHAN)\
- store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
-
-
-/**
- * Execute ARB-style KIL which is predicated by a src register.
- * Kill fragment if any of the four values is less than zero.
- */
-static void
-exec_kilp(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
- uint uniquemask;
- uint chan_index;
- uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
- union tgsi_exec_channel r[1];
-
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
- for (chan_index = 0; chan_index < 4; chan_index++)
- {
- uint swizzle;
- uint i;
-
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle (
- &inst->FullSrcRegisters[0],
- chan_index);
-
- /* check if the component has not been already tested */
- if (uniquemask & (1 << swizzle))
- continue;
- uniquemask |= 1 << swizzle;
-
- FETCH(&r[0], 0, chan_index);
- for (i = 0; i < 4; i++)
- if (r[0].f[i] < 0.0f)
- kilmask |= 1 << i;
- }
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
-}
-
-
-/*
- * Fetch a texel using STR texture coordinates.
- */
-static void
-fetch_texel( struct tgsi_sampler *sampler,
- const union tgsi_exec_channel *s,
- const union tgsi_exec_channel *t,
- const union tgsi_exec_channel *p,
- float lodbias, /* XXX should be float[4] */
- union tgsi_exec_channel *r,
- union tgsi_exec_channel *g,
- union tgsi_exec_channel *b,
- union tgsi_exec_channel *a )
-{
- uint j;
- float rgba[NUM_CHANNELS][QUAD_SIZE];
-
- sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
-
- for (j = 0; j < 4; j++) {
- r->f[j] = rgba[0][j];
- g->f[j] = rgba[1][j];
- b->f[j] = rgba[2][j];
- a->f[j] = rgba[3][j];
- }
-}
-
-
-static void
-exec_tex(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst,
- boolean biasLod)
-{
- const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
- union tgsi_exec_channel r[8];
- uint chan_index;
- float lodBias;
-
- /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
-
- switch (inst->InstructionExtTexture.Texture) {
- case TGSI_TEXTURE_1D:
-
- FETCH(&r[0], 0, CHAN_X);
-
- switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
- case TGSI_EXTSWIZZLE_W:
- FETCH(&r[1], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[1] );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- break;
-
- default:
- assert (0);
- }
-
- if (biasLod) {
- FETCH(&r[1], 0, CHAN_W);
- lodBias = r[2].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
- &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
- break;
-
- case TGSI_TEXTURE_2D:
- case TGSI_TEXTURE_RECT:
-
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 0, CHAN_Z);
-
- switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
- case TGSI_EXTSWIZZLE_W:
- FETCH(&r[3], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[3] );
- micro_div( &r[1], &r[1], &r[3] );
- micro_div( &r[2], &r[2], &r[3] );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- break;
-
- default:
- assert (0);
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias, /* inputs */
- &r[0], &r[1], &r[2], &r[3]); /* outputs */
- break;
-
- case TGSI_TEXTURE_3D:
- case TGSI_TEXTURE_CUBE:
-
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 0, CHAN_Z);
-
- switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
- case TGSI_EXTSWIZZLE_W:
- FETCH(&r[3], 0, CHAN_W);
- micro_div( &r[0], &r[0], &r[3] );
- micro_div( &r[1], &r[1], &r[3] );
- micro_div( &r[2], &r[2], &r[3] );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- break;
-
- default:
- assert (0);
- }
-
- if (biasLod) {
- FETCH(&r[3], 0, CHAN_W);
- lodBias = r[3].f[0];
- }
- else
- lodBias = 0.0;
-
- fetch_texel(&mach->Samplers[unit],
- &r[0], &r[1], &r[2], lodBias,
- &r[0], &r[1], &r[2], &r[3]);
- break;
-
- default:
- assert (0);
- }
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
- }
-}
-
-
-/**
- * Evaluate a constant-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_constant_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- unsigned i;
-
- for( i = 0; i < QUAD_SIZE; i++ ) {
- mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
- }
-}
-
-/**
- * Evaluate a linear-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_linear_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- mach->Inputs[attrib].xyzw[chan].f[0] = a0;
- mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
- mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
- mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
-}
-
-/**
- * Evaluate a perspective-valued coefficient at the position of the
- * current quad.
- */
-static void
-eval_perspective_coef(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan )
-{
- const float x = mach->QuadPos.xyzw[0].f[0];
- const float y = mach->QuadPos.xyzw[1].f[0];
- const float dadx = mach->InterpCoefs[attrib].dadx[chan];
- const float dady = mach->InterpCoefs[attrib].dady[chan];
- const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
- const float *w = mach->QuadPos.xyzw[3].f;
- /* divide by W here */
- mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
- mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
- mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
- mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
-}
-
-
-typedef void (* eval_coef_func)(
- struct tgsi_exec_machine *mach,
- unsigned attrib,
- unsigned chan );
-
-static void
-exec_declaration(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_declaration *decl )
-{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- eval_coef_func eval;
-
- assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
-
- first = decl->u.DeclarationRange.First;
- last = decl->u.DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- switch( decl->Interpolation.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- eval = eval_constant_coef;
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- eval = eval_linear_coef;
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- eval = eval_perspective_coef;
- break;
-
- default:
- assert( 0 );
- }
-
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- eval( mach, i, j );
- }
- }
- }
- else {
- unsigned i, j;
-
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- eval( mach, i, j );
- }
- }
- }
- }
- }
- }
-}
-
-static void
-exec_instruction(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst,
- int *pc )
-{
- uint chan_index;
- union tgsi_exec_channel r[8];
-
- (*pc)++;
-
- switch (inst->Instruction.Opcode) {
- case TGSI_OPCODE_ARL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_f2it( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOV:
- /* TGSI_OPCODE_SWZ */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_X );
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[1], 0, CHAN_Y );
- micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-
- FETCH( &r[2], 0, CHAN_W );
- micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
- micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
- micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
- }
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH( &r[0], 0, CHAN_X );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH( &r[0], 0, CHAN_X );
- micro_sqrt( &r[0], &r[0] );
- micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXP:
- assert (0);
- break;
-
- case TGSI_OPCODE_LOG:
- assert (0);
- break;
-
- case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
- {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_ADD:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Z );
- FETCH( &r[2], 1, CHAN_Z );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_W);
- FETCH(&r[2], 1, CHAN_W);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- FETCH( &r[0], 0, CHAN_Y );
- FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MIN:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- /* XXX use micro_min()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_MAX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- /* XXX use micro_max()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
- STORE(&r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_mul( &r[0], &r[0], &r[1] );
- FETCH( &r[1], 2, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SUB:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
-
- micro_sub( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
-
- micro_sub( &r[1], &r[1], &r[2] );
- micro_mul( &r[0], &r[0], &r[1] );
- micro_add( &r[0], &r[0], &r[2] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_CND:
- assert (0);
- break;
-
- case TGSI_OPCODE_CND0:
- assert (0);
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- assert (0);
- break;
-
- case TGSI_OPCODE_INDEX:
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- assert (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_frc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CLAMP:
- assert (0);
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ROUND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_rnd( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
- FETCH(&r[0], 0, CHAN_X);
-
- micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
- FETCH( &r[0], 0, CHAN_X );
- micro_lg2( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_pow( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
- FETCH(&r[0], 0, CHAN_Y);
- FETCH(&r[1], 1, CHAN_Z);
-
- micro_mul( &r[2], &r[0], &r[1] );
-
- FETCH(&r[3], 0, CHAN_Z);
- FETCH(&r[4], 1, CHAN_Y);
-
- micro_mul( &r[5], &r[3], &r[4] );
- micro_sub( &r[2], &r[2], &r[5] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[2], 0, CHAN_X );
- }
-
- FETCH(&r[2], 1, CHAN_X);
-
- micro_mul( &r[3], &r[3], &r[2] );
-
- FETCH(&r[5], 0, CHAN_X);
-
- micro_mul( &r[1], &r[1], &r[5] );
- micro_sub( &r[3], &r[3], &r[1] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- STORE( &r[3], 0, CHAN_Y );
- }
-
- micro_mul( &r[5], &r[5], &r[4] );
- micro_mul( &r[0], &r[0], &r[2] );
- micro_sub( &r[5], &r[5], &r[0] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[5], 0, CHAN_Z );
- }
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MULTIPLYMATRIX:
- assert (0);
- break;
-
- case TGSI_OPCODE_ABS:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
-
- micro_abs( &r[0], &r[0] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_RCC:
- assert (0);
- break;
-
- case TGSI_OPCODE_DPH:
- FETCH(&r[0], 0, CHAN_X);
- FETCH(&r[1], 1, CHAN_X);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Y);
- FETCH(&r[2], 1, CHAN_Y);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 0, CHAN_Z);
- FETCH(&r[2], 1, CHAN_Z);
-
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FETCH(&r[1], 1, CHAN_W);
-
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_COS:
- FETCH(&r[0], 0, CHAN_X);
-
- micro_cos( &r[0], &r[0] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDX:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddx( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDY:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ddy( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_KILP:
- exec_kilp (mach, inst);
- break;
-
- case TGSI_OPCODE_KIL:
- /* for enabled ExecMask bits, set the killed bit */
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
- break;
-
- case TGSI_OPCODE_PK2H:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK2US:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK4B:
- assert (0);
- break;
-
- case TGSI_OPCODE_PK4UB:
- assert (0);
- break;
-
- case TGSI_OPCODE_RFL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SEQ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
- &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SFL:
- assert (0);
- break;
-
- case TGSI_OPCODE_SGT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SIN:
- FETCH( &r[0], 0, CHAN_X );
- micro_sin( &r[0], &r[0] );
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SNE:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_STR:
- assert (0);
- break;
-
- case TGSI_OPCODE_TEX:
- /* simple texture lookup */
- /* src[0] = texcoord */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, FALSE);
- break;
-
- case TGSI_OPCODE_TXB:
- /* Texture lookup with lod bias */
- /* src[0] = texcoord (src[0].w = LOD bias) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE);
- break;
-
- case TGSI_OPCODE_TXD:
- /* Texture lookup with explict partial derivatives */
- /* src[0] = texcoord */
- /* src[1] = d[strq]/dx */
- /* src[2] = d[strq]/dy */
- /* src[3] = sampler unit */
- assert (0);
- break;
-
- case TGSI_OPCODE_TXL:
- /* Texture lookup with explit LOD */
- /* src[0] = texcoord (src[0].w = LOD) */
- /* src[1] = sampler unit */
- exec_tex(mach, inst, TRUE);
- break;
-
- case TGSI_OPCODE_UP2H:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP2US:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP4B:
- assert (0);
- break;
-
- case TGSI_OPCODE_UP4UB:
- assert (0);
- break;
-
- case TGSI_OPCODE_X2D:
- assert (0);
- break;
-
- case TGSI_OPCODE_ARA:
- assert (0);
- break;
-
- case TGSI_OPCODE_ARR:
- assert (0);
- break;
-
- case TGSI_OPCODE_BRA:
- assert (0);
- break;
-
- case TGSI_OPCODE_CAL:
- /* skip the call if no execution channels are enabled */
- if (mach->ExecMask) {
- /* do the call */
-
- /* push the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
-
- assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
- mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
-
- /* note that PC was already incremented above */
- mach->CallStack[mach->CallStackTop++] = *pc;
- *pc = inst->InstructionExtLabel.Label;
- }
- break;
-
- case TGSI_OPCODE_RET:
- mach->FuncMask &= ~mach->ExecMask;
- UPDATE_EXEC_MASK(mach);
-
- if (mach->ExecMask == 0x0) {
- /* really return now (otherwise, keep executing */
-
- if (mach->CallStackTop == 0) {
- /* returning from main() */
- *pc = -1;
- return;
- }
- *pc = mach->CallStack[--mach->CallStackTop];
-
- /* pop the Cond, Loop, Cont stacks */
- assert(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- assert(mach->FuncStackTop > 0);
- mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
-
- UPDATE_EXEC_MASK(mach);
- }
- break;
-
- case TGSI_OPCODE_SSG:
- assert (0);
- break;
-
- case TGSI_OPCODE_CMP:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH(&r[0], 0, chan_index);
- FETCH(&r[1], 1, chan_index);
- FETCH(&r[2], 2, chan_index);
-
- micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
- STORE(&r[0], 0, chan_index);
- }
- break;
-
- case TGSI_OPCODE_SCS:
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( &r[0], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- micro_cos( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_X );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- micro_sin( &r[1], &r[0] );
- STORE( &r[1], 0, CHAN_Y );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
- }
- if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_NRM:
- assert (0);
- break;
-
- case TGSI_OPCODE_DIV:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_DP2:
- FETCH( &r[0], 0, CHAN_X );
- FETCH( &r[1], 1, CHAN_X );
- micro_mul( &r[0], &r[0], &r[1] );
-
- FETCH( &r[1], 0, CHAN_Y );
- FETCH( &r[2], 1, CHAN_Y );
- micro_mul( &r[1], &r[1], &r[2] );
- micro_add( &r[0], &r[0], &r[1] );
-
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_IF:
- /* push CondMask */
- assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
- mach->CondStack[mach->CondStackTop++] = mach->CondMask;
- FETCH( &r[0], 0, CHAN_X );
- /* update CondMask */
- if( ! r[0].u[0] ) {
- mach->CondMask &= ~0x1;
- }
- if( ! r[0].u[1] ) {
- mach->CondMask &= ~0x2;
- }
- if( ! r[0].u[2] ) {
- mach->CondMask &= ~0x4;
- }
- if( ! r[0].u[3] ) {
- mach->CondMask &= ~0x8;
- }
- UPDATE_EXEC_MASK(mach);
- /* Todo: If CondMask==0, jump to ELSE */
- break;
-
- case TGSI_OPCODE_ELSE:
- /* invert CondMask wrt previous mask */
- {
- uint prevMask;
- assert(mach->CondStackTop > 0);
- prevMask = mach->CondStack[mach->CondStackTop - 1];
- mach->CondMask = ~mach->CondMask & prevMask;
- UPDATE_EXEC_MASK(mach);
- /* Todo: If CondMask==0, jump to ENDIF */
- }
- break;
-
- case TGSI_OPCODE_ENDIF:
- /* pop CondMask */
- assert(mach->CondStackTop > 0);
- mach->CondMask = mach->CondStack[--mach->CondStackTop];
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_END:
- /* halt execution */
- *pc = -1;
- break;
-
- case TGSI_OPCODE_REP:
- assert (0);
- break;
-
- case TGSI_OPCODE_ENDREP:
- assert (0);
- break;
-
- case TGSI_OPCODE_PUSHA:
- assert (0);
- break;
-
- case TGSI_OPCODE_POPA:
- assert (0);
- break;
-
- case TGSI_OPCODE_CEIL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_ceil( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_I2F:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_i2f( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_NOT:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_not( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_TRUNC:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SHL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_shl( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SHR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_ishr( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_AND:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_and( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_OR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_or( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOD:
- assert (0);
- break;
-
- case TGSI_OPCODE_XOR:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- FETCH( &r[1], 1, chan_index );
- micro_xor( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SAD:
- assert (0);
- break;
-
- case TGSI_OPCODE_TXF:
- assert (0);
- break;
-
- case TGSI_OPCODE_TXQ:
- assert (0);
- break;
-
- case TGSI_OPCODE_EMIT:
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
- break;
-
- case TGSI_OPCODE_ENDPRIM:
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
- mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
- break;
-
- case TGSI_OPCODE_LOOP:
- /* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
- /* push LoopMask and ContMasks */
- assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
- assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
- mach->ContStack[mach->ContStackTop++] = mach->ContMask;
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- /* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
- /* Restore ContMask, but don't pop */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
- if (mach->LoopMask) {
- /* repeat loop: jump to instruction just past BGNLOOP */
- *pc = inst->InstructionExtLabel.Label + 1;
- }
- else {
- /* exit loop: pop LoopMask */
- assert(mach->LoopStackTop > 0);
- mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
- /* pop ContMask */
- assert(mach->ContStackTop > 0);
- mach->ContMask = mach->ContStack[--mach->ContStackTop];
- }
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_BRK:
- /* turn off loop channels for each enabled exec channel */
- mach->LoopMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_CONT:
- /* turn off cont channels for each enabled exec channel */
- mach->ContMask &= ~mach->ExecMask;
- /* Todo: if mach->LoopMask == 0, jump to end of loop */
- UPDATE_EXEC_MASK(mach);
- break;
-
- case TGSI_OPCODE_BGNSUB:
- /* no-op */
- break;
-
- case TGSI_OPCODE_ENDSUB:
- /* no-op */
- break;
-
- case TGSI_OPCODE_NOISE1:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE2:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE3:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOISE4:
- assert( 0 );
- break;
-
- case TGSI_OPCODE_NOP:
- break;
-
- default:
- assert( 0 );
- }
-}
-
-
-/**
- * Run TGSI interpreter.
- * \return bitmask of "alive" quad components
- */
-uint
-tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
-{
- uint i;
- int pc = 0;
-
- mach->CondMask = 0xf;
- mach->LoopMask = 0xf;
- mach->ContMask = 0xf;
- mach->FuncMask = 0xf;
- mach->ExecMask = 0xf;
-
- mach->CondStackTop = 0; /* temporarily subvert this assertion */
- assert(mach->CondStackTop == 0);
- assert(mach->LoopStackTop == 0);
- assert(mach->ContStackTop == 0);
- assert(mach->CallStackTop == 0);
-
- mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
- mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
-
- if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
- mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
- mach->Primitives[0] = 0;
- }
-
-
- /* execute declarations (interpolants) */
- for (i = 0; i < mach->NumDeclarations; i++) {
- exec_declaration( mach, mach->Declarations+i );
- }
-
- /* execute instructions, until pc is set to -1 */
- while (pc != -1) {
- assert(pc < mach->NumInstructions);
- exec_instruction( mach, mach->Instructions + pc, &pc );
- }
-
-#if 0
- /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
- if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
- /*
- * Scale back depth component.
- */
- for (i = 0; i < 4; i++)
- mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
- }
-#endif
-
- return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
-}
-
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#if !defined TGSI_EXEC_H
-#define TGSI_EXEC_H
-
-#include "pipe/p_compiler.h"
-
-#if defined __cplusplus
-extern "C" {
-#endif
-
-#define NUM_CHANNELS 4 /* R,G,B,A */
-#define QUAD_SIZE 4 /* 4 pixel/quad */
-
-/**
- * Registers may be treated as float, signed int or unsigned int.
- */
-union tgsi_exec_channel
-{
- float f[QUAD_SIZE];
- int i[QUAD_SIZE];
- unsigned u[QUAD_SIZE];
-};
-
-/**
- * A vector[RGBA] of channels[4 pixels]
- */
-struct tgsi_exec_vector
-{
- union tgsi_exec_channel xyzw[NUM_CHANNELS];
-};
-
-/**
- * For fragment programs, information for computing fragment input
- * values from plane equation of the triangle/line.
- */
-struct tgsi_interp_coef
-{
- float a0[NUM_CHANNELS]; /* in an xyzw layout */
- float dadx[NUM_CHANNELS];
- float dady[NUM_CHANNELS];
-};
-
-
-struct softpipe_tile_cache; /**< Opaque to TGSI */
-
-/**
- * Information for sampling textures, which must be implemented
- * by code outside the TGSI executor.
- */
-struct tgsi_sampler
-{
- const struct pipe_sampler_state *state;
- struct pipe_texture *texture;
- /** Get samples for four fragments in a quad */
- void (*get_samples)(struct tgsi_sampler *sampler,
- const float s[QUAD_SIZE],
- const float t[QUAD_SIZE],
- const float p[QUAD_SIZE],
- float lodbias,
- float rgba[NUM_CHANNELS][QUAD_SIZE]);
- void *pipe; /*XXX temporary*/
- struct softpipe_tile_cache *cache;
-};
-
-/**
- * For branching/calling subroutines.
- */
-struct tgsi_exec_labels
-{
- unsigned labels[128][2];
- unsigned count;
-};
-
-/*
- * Locations of various utility registers (_I = Index, _C = Channel)
- */
-#define TGSI_EXEC_TEMP_00000000_I 32
-#define TGSI_EXEC_TEMP_00000000_C 0
-
-#define TGSI_EXEC_TEMP_7FFFFFFF_I 32
-#define TGSI_EXEC_TEMP_7FFFFFFF_C 1
-
-#define TGSI_EXEC_TEMP_80000000_I 32
-#define TGSI_EXEC_TEMP_80000000_C 2
-
-#define TGSI_EXEC_TEMP_FFFFFFFF_I 32
-#define TGSI_EXEC_TEMP_FFFFFFFF_C 3
-
-#define TGSI_EXEC_TEMP_ONE_I 33
-#define TGSI_EXEC_TEMP_ONE_C 0
-
-#define TGSI_EXEC_TEMP_TWO_I 33
-#define TGSI_EXEC_TEMP_TWO_C 1
-
-#define TGSI_EXEC_TEMP_128_I 33
-#define TGSI_EXEC_TEMP_128_C 2
-
-#define TGSI_EXEC_TEMP_MINUS_128_I 33
-#define TGSI_EXEC_TEMP_MINUS_128_C 3
-
-#define TGSI_EXEC_TEMP_KILMASK_I 34
-#define TGSI_EXEC_TEMP_KILMASK_C 0
-
-#define TGSI_EXEC_TEMP_OUTPUT_I 34
-#define TGSI_EXEC_TEMP_OUTPUT_C 1
-
-#define TGSI_EXEC_TEMP_PRIMITIVE_I 34
-#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
-
-#define TGSI_EXEC_TEMP_R0 35
-
-#define TGSI_EXEC_NUM_TEMPS (32 + 4)
-#define TGSI_EXEC_NUM_ADDRS 1
-#define TGSI_EXEC_NUM_IMMEDIATES 256
-
-#define TGSI_EXEC_MAX_COND_NESTING 10
-#define TGSI_EXEC_MAX_LOOP_NESTING 10
-#define TGSI_EXEC_MAX_CALL_NESTING 10
-
-/**
- * Run-time virtual machine state for executing TGSI shader.
- */
-struct tgsi_exec_machine
-{
- /*
- * 32 program temporaries
- * 4 internal temporaries
- * 1 address
- * 1 temporary of padding to align to 16 bytes
- */
- struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_ADDRS + 1];
-
- /*
- * This will point to _Temps after aligning to 16B boundary.
- */
- struct tgsi_exec_vector *Temps;
- struct tgsi_exec_vector *Addrs;
-
- struct tgsi_sampler *Samplers;
-
- float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
- unsigned ImmLimit;
- float (*Consts)[4];
- struct tgsi_exec_vector *Inputs;
- struct tgsi_exec_vector *Outputs;
- const struct tgsi_token *Tokens;
- unsigned Processor;
-
- /* GEOMETRY processor only. */
- unsigned *Primitives;
-
- /* FRAGMENT processor only. */
- const struct tgsi_interp_coef *InterpCoefs;
- struct tgsi_exec_vector QuadPos;
-
- /* Conditional execution masks */
- uint CondMask; /**< For IF/ELSE/ENDIF */
- uint LoopMask; /**< For BGNLOOP/ENDLOOP */
- uint ContMask; /**< For loop CONT statements */
- uint FuncMask; /**< For function calls */
- uint ExecMask; /**< = CondMask & LoopMask */
-
- /** Condition mask stack (for nested conditionals) */
- uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
- int CondStackTop;
-
- /** Loop mask stack (for nested loops) */
- uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int LoopStackTop;
-
- /** Loop continue mask stack (see comments in tgsi_exec.c) */
- uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
- int ContStackTop;
-
- /** Function execution mask stack (for executing subroutine code) */
- uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
- int FuncStackTop;
-
- /** Function call stack for saving/restoring the program counter */
- uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
- int CallStackTop;
-
- struct tgsi_full_instruction *Instructions;
- uint NumInstructions;
-
- struct tgsi_full_declaration *Declarations;
- uint NumDeclarations;
-
- struct tgsi_exec_labels Labels;
-};
-
-
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach,
- const struct tgsi_token *tokens,
- unsigned numSamplers,
- struct tgsi_sampler *samplers);
-
-uint
-tgsi_exec_machine_run(
- struct tgsi_exec_machine *mach );
-
-
-void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
-
-
-#if defined __cplusplus
-} /* extern "C" */
-#endif
-
-#endif /* TGSI_EXEC_H */
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_util.h"
-#include "tgsi_exec.h"
-#include "tgsi_sse2.h"
-
-#include "x86/rtasm/x86sse.h"
-
-#if defined(__i386__) || defined(__386__)
-
-#define DUMP_SSE 0
-
-#if DUMP_SSE
-
-static void
-_print_reg(
- struct x86_reg reg )
-{
- if (reg.mod != mod_REG)
- debug_printf( "[" );
-
- switch( reg.file ) {
- case file_REG32:
- switch( reg.idx ) {
- case reg_AX:
- debug_printf( "EAX" );
- break;
- case reg_CX:
- debug_printf( "ECX" );
- break;
- case reg_DX:
- debug_printf( "EDX" );
- break;
- case reg_BX:
- debug_printf( "EBX" );
- break;
- case reg_SP:
- debug_printf( "ESP" );
- break;
- case reg_BP:
- debug_printf( "EBP" );
- break;
- case reg_SI:
- debug_printf( "ESI" );
- break;
- case reg_DI:
- debug_printf( "EDI" );
- break;
- }
- break;
- case file_MMX:
- assert( 0 );
- break;
- case file_XMM:
- debug_printf( "XMM%u", reg.idx );
- break;
- case file_x87:
- assert( 0 );
- break;
- }
-
- if (reg.mod == mod_DISP8 ||
- reg.mod == mod_DISP32)
- debug_printf("+%d", reg.disp);
-
- if (reg.mod != mod_REG)
- debug_printf( "]" );
-}
-
-static void
-_fill(
- const char *op )
-{
- unsigned count = 10 - strlen( op );
-
- while( count-- ) {
- debug_printf( " " );
- }
-}
-
-#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
-#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
-#define DUMP( OP ) debug_printf( "\n%s", OP )
-#define DUMP_I( OP, I ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- debug_printf( "%u", I ); } while( 0 )
-#define DUMP_R( OP, R0 ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 ); } while( 0 )
-#define DUMP_RR( OP, R0, R1 ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 );\
- debug_printf( ", " );\
- _print_reg( R1 ); } while( 0 )
-#define DUMP_RRI( OP, R0, R1, I ) do {\
- debug_printf( "\n%s", OP );\
- _fill( OP );\
- _print_reg( R0 );\
- debug_printf( ", " );\
- _print_reg( R1 );\
- debug_printf( ", " );\
- debug_printf( "%u", I ); } while( 0 )
-
-#else
-
-#define DUMP_START()
-#define DUMP_END()
-#define DUMP( OP )
-#define DUMP_I( OP, I )
-#define DUMP_R( OP, R0 )
-#define DUMP_RR( OP, R0, R1 )
-#define DUMP_RRI( OP, R0, R1, I )
-
-#endif
-
-#define FOR_EACH_CHANNEL( CHAN )\
- for( CHAN = 0; CHAN < 4; CHAN++ )
-
-#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
-
-#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
- if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
-
-#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
- FOR_EACH_CHANNEL( CHAN )\
- IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
-
-#define CHAN_X 0
-#define CHAN_Y 1
-#define CHAN_Z 2
-#define CHAN_W 3
-
-#define TEMP_R0 TGSI_EXEC_TEMP_R0
-
-/**
- * X86 utility functions.
- */
-
-static struct x86_reg
-make_xmm(
- unsigned xmm )
-{
- return x86_make_reg(
- file_XMM,
- (enum x86_reg_name) xmm );
-}
-
-/**
- * X86 register mapping helpers.
- */
-
-static struct x86_reg
-get_const_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_CX );
-}
-
-static struct x86_reg
-get_input_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_AX );
-}
-
-static struct x86_reg
-get_output_base( void )
-{
- return x86_make_reg(
- file_REG32,
- reg_DX );
-}
-
-static struct x86_reg
-get_temp_base( void )
-{
-#ifdef WIN32
- return x86_make_reg(
- file_REG32,
- reg_BX );
-#else
- return x86_make_reg(
- file_REG32,
- reg_SI );
-#endif
-}
-
-static struct x86_reg
-get_coef_base( void )
-{
- return get_output_base();
-}
-
-/**
- * Data access helpers.
- */
-
-static struct x86_reg
-get_argument(
- unsigned index )
-{
- return x86_make_disp(
- x86_make_reg( file_REG32, reg_SP ),
- (index + 1) * 4 );
-}
-
-static struct x86_reg
-get_const(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_const_base(),
- (vec * 4 + chan) * 4 );
-}
-
-static struct x86_reg
-get_input(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_input_base(),
- (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_output(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_output_base(),
- (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_temp(
- unsigned vec,
- unsigned chan )
-{
- return x86_make_disp(
- get_temp_base(),
- (vec * 4 + chan) * 16 );
-}
-
-static struct x86_reg
-get_coef(
- unsigned vec,
- unsigned chan,
- unsigned member )
-{
- return x86_make_disp(
- get_coef_base(),
- ((vec * 3 + member) * 4 + chan) * 4 );
-}
-
-/**
- * X86 rtasm wrappers.
- */
-
-static void
-emit_addps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ADDPS", dst, src );
- sse_addps( func, dst, src );
-}
-
-static void
-emit_andnps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ANDNPS", dst, src );
- sse_andnps( func, dst, src );
-}
-
-static void
-emit_andps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ANDPS", dst, src );
- sse_andps( func, dst, src );
-}
-
-static void
-emit_call(
- struct x86_function *func,
- void (* addr)() )
-{
- struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-
- DUMP_I( "CALL", addr );
- x86_mov_reg_imm( func, ecx, (unsigned long) addr );
- x86_call( func, ecx );
-}
-
-static void
-emit_cmpps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src,
- enum sse_cc cc )
-{
- DUMP_RRI( "CMPPS", dst, src, cc );
- sse_cmpps( func, dst, src, cc );
-}
-
-static void
-emit_cvttps2dq(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "CVTTPS2DQ", dst, src );
- sse2_cvttps2dq( func, dst, src );
-}
-
-static void
-emit_maxps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MAXPS", dst, src );
- sse_maxps( func, dst, src );
-}
-
-static void
-emit_minps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MINPS", dst, src );
- sse_minps( func, dst, src );
-}
-
-static void
-emit_mov(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOV", dst, src );
- x86_mov( func, dst, src );
-}
-
-static void
-emit_movaps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVAPS", dst, src );
- sse_movaps( func, dst, src );
-}
-
-static void
-emit_movss(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVSS", dst, src );
- sse_movss( func, dst, src );
-}
-
-static void
-emit_movups(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MOVUPS", dst, src );
- sse_movups( func, dst, src );
-}
-
-static void
-emit_mulps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "MULPS", dst, src );
- sse_mulps( func, dst, src );
-}
-
-static void
-emit_or(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "OR", dst, src );
- x86_or( func, dst, src );
-}
-
-static void
-emit_orps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "ORPS", dst, src );
- sse_orps( func, dst, src );
-}
-
-static void
-emit_pmovmskb(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "PMOVMSKB", dst, src );
- sse_pmovmskb( func, dst, src );
-}
-
-static void
-emit_pop(
- struct x86_function *func,
- struct x86_reg dst )
-{
- DUMP_R( "POP", dst );
- x86_pop( func, dst );
-}
-
-static void
-emit_push(
- struct x86_function *func,
- struct x86_reg dst )
-{
- DUMP_R( "PUSH", dst );
- x86_push( func, dst );
-}
-
-static void
-emit_rcpps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "RCPPS", dst, src );
- sse2_rcpps( func, dst, src );
-}
-
-#ifdef WIN32
-static void
-emit_retw(
- struct x86_function *func,
- unsigned size )
-{
- DUMP_I( "RET", size );
- x86_retw( func, size );
-}
-#else
-static void
-emit_ret(
- struct x86_function *func )
-{
- DUMP( "RET" );
- x86_ret( func );
-}
-#endif
-
-static void
-emit_rsqrtps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "RSQRTPS", dst, src );
- sse_rsqrtps( func, dst, src );
-}
-
-static void
-emit_shufps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src,
- unsigned char shuf )
-{
- DUMP_RRI( "SHUFPS", dst, src, shuf );
- sse_shufps( func, dst, src, shuf );
-}
-
-static void
-emit_subps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "SUBPS", dst, src );
- sse_subps( func, dst, src );
-}
-
-static void
-emit_xorps(
- struct x86_function *func,
- struct x86_reg dst,
- struct x86_reg src )
-{
- DUMP_RR( "XORPS", dst, src );
- sse_xorps( func, dst, src );
-}
-
-/**
- * Data fetch helpers.
- */
-
-static void
-emit_const(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_movss(
- func,
- make_xmm( xmm ),
- get_const( vec, chan ) );
- emit_shufps(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ),
- SHUF( 0, 0, 0, 0 ) );
-}
-
-static void
-emit_inputf(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_movups(
- func,
- make_xmm( xmm ),
- get_input( vec, chan ) );
-}
-
-static void
-emit_output(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_movups(
- func,
- get_output( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
-emit_tempf(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_movaps(
- func,
- make_xmm( xmm ),
- get_temp( vec, chan ) );
-}
-
-static void
-emit_coef(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan,
- unsigned member )
-{
- emit_movss(
- func,
- make_xmm( xmm ),
- get_coef( vec, chan, member ) );
- emit_shufps(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ),
- SHUF( 0, 0, 0, 0 ) );
-}
-
-/**
- * Data store helpers.
- */
-
-static void
-emit_inputs(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_movups(
- func,
- get_input( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
-emit_temps(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_movaps(
- func,
- get_temp( vec, chan ),
- make_xmm( xmm ) );
-}
-
-static void
-emit_addrs(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_temps(
- func,
- xmm,
- vec + TGSI_EXEC_NUM_TEMPS,
- chan );
-}
-
-/**
- * Coefficent fetch helpers.
- */
-
-static void
-emit_coef_a0(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 0 );
-}
-
-static void
-emit_coef_dadx(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 1 );
-}
-
-static void
-emit_coef_dady(
- struct x86_function *func,
- unsigned xmm,
- unsigned vec,
- unsigned chan )
-{
- emit_coef(
- func,
- xmm,
- vec,
- chan,
- 2 );
-}
-
-/**
- * Function call helpers.
- */
-
-static void
-emit_push_gp(
- struct x86_function *func )
-{
- emit_push(
- func,
- get_const_base() );
- emit_push(
- func,
- get_input_base() );
- emit_push(
- func,
- get_output_base() );
-
- /* It is important on non-win32 platforms that temp base is pushed last.
- */
- emit_push(
- func,
- get_temp_base() );
-}
-
-static void
-emit_pop_gp(
- struct x86_function *func )
-{
- /* Restore GP registers in a reverse order.
- */
- emit_pop(
- func,
- get_temp_base() );
- emit_pop(
- func,
- get_output_base() );
- emit_pop(
- func,
- get_input_base() );
- emit_pop(
- func,
- get_const_base() );
-}
-
-static void
-emit_func_call_dst(
- struct x86_function *func,
- unsigned xmm_dst,
- void (*code)() )
-{
- emit_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
-
- emit_push_gp(
- func );
-
-#ifdef WIN32
- emit_push(
- func,
- get_temp( TEMP_R0, 0 ) );
-#endif
-
- emit_call(
- func,
- code );
-
- emit_pop_gp(
- func );
-
- emit_movaps(
- func,
- make_xmm( xmm_dst ),
- get_temp( TEMP_R0, 0 ) );
-}
-
-static void
-emit_func_call_dst_src(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src,
- void (*code)() )
-{
- emit_movaps(
- func,
- get_temp( TEMP_R0, 1 ),
- make_xmm( xmm_src ) );
-
- emit_func_call_dst(
- func,
- xmm_dst,
- code );
-}
-
-/**
- * Low-level instruction translators.
- */
-
-static void
-emit_abs(
- struct x86_function *func,
- unsigned xmm )
-{
- emit_andps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_7FFFFFFF_I,
- TGSI_EXEC_TEMP_7FFFFFFF_C ) );
-}
-
-static void
-emit_add(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_addps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void XSTDCALL
-cos4f(
- float *store )
-{
-#ifdef WIN32
- store[0] = (float) cos( (double) store[0] );
- store[1] = (float) cos( (double) store[1] );
- store[2] = (float) cos( (double) store[2] );
- store[3] = (float) cos( (double) store[3] );
-#else
- const unsigned X = TEMP_R0 * 16;
- store[X + 0] = cosf( store[X + 0] );
- store[X + 1] = cosf( store[X + 1] );
- store[X + 2] = cosf( store[X + 2] );
- store[X + 3] = cosf( store[X + 3] );
-#endif
-}
-
-static void
-emit_cos(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- cos4f );
-}
-
-static void XSTDCALL
-ex24f(
- float *store )
-{
-#ifdef WIN32
- store[0] = (float) pow( 2.0, (double) store[0] );
- store[1] = (float) pow( 2.0, (double) store[1] );
- store[2] = (float) pow( 2.0, (double) store[2] );
- store[3] = (float) pow( 2.0, (double) store[3] );
-#else
- const unsigned X = TEMP_R0 * 16;
- store[X + 0] = powf( 2.0f, store[X + 0] );
- store[X + 1] = powf( 2.0f, store[X + 1] );
- store[X + 2] = powf( 2.0f, store[X + 2] );
- store[X + 3] = powf( 2.0f, store[X + 3] );
-#endif
-}
-
-static void
-emit_ex2(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- ex24f );
-}
-
-static void
-emit_f2it(
- struct x86_function *func,
- unsigned xmm )
-{
- emit_cvttps2dq(
- func,
- make_xmm( xmm ),
- make_xmm( xmm ) );
-}
-
-static void XSTDCALL
-flr4f(
- float *store )
-{
-#ifdef WIN32
- const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
- store[X + 0] = (float) floor( (double) store[X + 0] );
- store[X + 1] = (float) floor( (double) store[X + 1] );
- store[X + 2] = (float) floor( (double) store[X + 2] );
- store[X + 3] = (float) floor( (double) store[X + 3] );
-}
-
-static void
-emit_flr(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- flr4f );
-}
-
-static void XSTDCALL
-frc4f(
- float *store )
-{
-#ifdef WIN32
- const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
- store[X + 0] -= (float) floor( (double) store[X + 0] );
- store[X + 1] -= (float) floor( (double) store[X + 1] );
- store[X + 2] -= (float) floor( (double) store[X + 2] );
- store[X + 3] -= (float) floor( (double) store[X + 3] );
-}
-
-static void
-emit_frc(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- frc4f );
-}
-
-static void XSTDCALL
-lg24f(
- float *store )
-{
-#ifdef WIN32
- const unsigned X = 0;
-#else
- const unsigned X = TEMP_R0 * 16;
-#endif
- store[X + 0] = LOG2( store[X + 0] );
- store[X + 1] = LOG2( store[X + 1] );
- store[X + 2] = LOG2( store[X + 2] );
- store[X + 3] = LOG2( store[X + 3] );
-}
-
-static void
-emit_lg2(
- struct x86_function *func,
- unsigned xmm_dst )
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- lg24f );
-}
-
-static void
-emit_MOV(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_movups(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_mul (struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src)
-{
- emit_mulps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_neg(
- struct x86_function *func,
- unsigned xmm )
-{
- emit_xorps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void XSTDCALL
-pow4f(
- float *store )
-{
-#ifdef WIN32
- store[0] = (float) pow( (double) store[0], (double) store[4] );
- store[1] = (float) pow( (double) store[1], (double) store[5] );
- store[2] = (float) pow( (double) store[2], (double) store[6] );
- store[3] = (float) pow( (double) store[3], (double) store[7] );
-#else
- const unsigned X = TEMP_R0 * 16;
- store[X + 0] = powf( store[X + 0], store[X + 4] );
- store[X + 1] = powf( store[X + 1], store[X + 5] );
- store[X + 2] = powf( store[X + 2], store[X + 6] );
- store[X + 3] = powf( store[X + 3], store[X + 7] );
-#endif
-}
-
-static void
-emit_pow(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_func_call_dst_src(
- func,
- xmm_dst,
- xmm_src,
- pow4f );
-}
-
-static void
-emit_rcp (
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_rcpps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_rsqrt(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_rsqrtps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-static void
-emit_setsign(
- struct x86_function *func,
- unsigned xmm )
-{
- emit_orps(
- func,
- make_xmm( xmm ),
- get_temp(
- TGSI_EXEC_TEMP_80000000_I,
- TGSI_EXEC_TEMP_80000000_C ) );
-}
-
-static void XSTDCALL
-sin4f(
- float *store )
-{
-#ifdef WIN32
- store[0] = (float) sin( (double) store[0] );
- store[1] = (float) sin( (double) store[1] );
- store[2] = (float) sin( (double) store[2] );
- store[3] = (float) sin( (double) store[3] );
-#else
- const unsigned X = TEMP_R0 * 16;
- store[X + 0] = sinf( store[X + 0] );
- store[X + 1] = sinf( store[X + 1] );
- store[X + 2] = sinf( store[X + 2] );
- store[X + 3] = sinf( store[X + 3] );
-#endif
-}
-
-static void
-emit_sin (struct x86_function *func,
- unsigned xmm_dst)
-{
- emit_func_call_dst(
- func,
- xmm_dst,
- sin4f );
-}
-
-static void
-emit_sub(
- struct x86_function *func,
- unsigned xmm_dst,
- unsigned xmm_src )
-{
- emit_subps(
- func,
- make_xmm( xmm_dst ),
- make_xmm( xmm_src ) );
-}
-
-/**
- * Register fetch.
- */
-
-static void
-emit_fetch(
- struct x86_function *func,
- unsigned xmm,
- const struct tgsi_full_src_register *reg,
- const unsigned chan_index )
-{
- unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
- switch( swizzle ) {
- case TGSI_EXTSWIZZLE_X:
- case TGSI_EXTSWIZZLE_Y:
- case TGSI_EXTSWIZZLE_Z:
- case TGSI_EXTSWIZZLE_W:
- switch( reg->SrcRegister.File ) {
- case TGSI_FILE_CONSTANT:
- emit_const(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- case TGSI_FILE_INPUT:
- emit_inputf(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- case TGSI_FILE_TEMPORARY:
- emit_tempf(
- func,
- xmm,
- reg->SrcRegister.Index,
- swizzle );
- break;
-
- default:
- assert( 0 );
- }
- break;
-
- case TGSI_EXTSWIZZLE_ZERO:
- emit_tempf(
- func,
- xmm,
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C );
- break;
-
- case TGSI_EXTSWIZZLE_ONE:
- emit_tempf(
- func,
- xmm,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- break;
-
- default:
- assert( 0 );
- }
-
- switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
- case TGSI_UTIL_SIGN_CLEAR:
- emit_abs( func, xmm );
- break;
-
- case TGSI_UTIL_SIGN_SET:
- emit_setsign( func, xmm );
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- emit_neg( func, xmm );
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- break;
- }
-}
-
-#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
- emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
-
-/**
- * Register store.
- */
-
-static void
-emit_store(
- struct x86_function *func,
- unsigned xmm,
- const struct tgsi_full_dst_register *reg,
- const struct tgsi_full_instruction *inst,
- unsigned chan_index )
-{
- switch( reg->DstRegister.File ) {
- case TGSI_FILE_OUTPUT:
- emit_output(
- func,
- xmm,
- reg->DstRegister.Index,
- chan_index );
- break;
-
- case TGSI_FILE_TEMPORARY:
- emit_temps(
- func,
- xmm,
- reg->DstRegister.Index,
- chan_index );
- break;
-
- case TGSI_FILE_ADDRESS:
- emit_addrs(
- func,
- xmm,
- reg->DstRegister.Index,
- chan_index );
- break;
-
- default:
- assert( 0 );
- }
-
- switch( inst->Instruction.Saturate ) {
- case TGSI_SAT_NONE:
- break;
-
- case TGSI_SAT_ZERO_ONE:
-// assert( 0 );
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert( 0 );
- break;
- }
-}
-
-#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
- emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
-
-/**
- * High-level instruction translators.
- */
-
-static void
-emit_kil(
- struct x86_function *func,
- const struct tgsi_full_src_register *reg )
-{
- unsigned uniquemask;
- unsigned registers[4];
- unsigned nextregister = 0;
- unsigned firstchan = ~0;
- unsigned chan_index;
-
- /* This mask stores component bits that were already tested. Note that
- * we test if the value is less than zero, so 1.0 and 0.0 need not to be
- * tested. */
- uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
-
- FOR_EACH_CHANNEL( chan_index ) {
- unsigned swizzle;
-
- /* unswizzle channel */
- swizzle = tgsi_util_get_full_src_register_extswizzle(
- reg,
- chan_index );
-
- /* check if the component has not been already tested */
- if( !(uniquemask & (1 << swizzle)) ) {
- uniquemask |= 1 << swizzle;
-
- /* allocate register */
- registers[chan_index] = nextregister;
- emit_fetch(
- func,
- nextregister,
- reg,
- chan_index );
- nextregister++;
-
- /* mark the first channel used */
- if( firstchan == ~0 ) {
- firstchan = chan_index;
- }
- }
- }
-
- emit_push(
- func,
- x86_make_reg( file_REG32, reg_AX ) );
- emit_push(
- func,
- x86_make_reg( file_REG32, reg_DX ) );
-
- FOR_EACH_CHANNEL( chan_index ) {
- if( uniquemask & (1 << chan_index) ) {
- emit_cmpps(
- func,
- make_xmm( registers[chan_index] ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
-
- if( chan_index == firstchan ) {
- emit_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- make_xmm( registers[chan_index] ) );
- }
- else {
- emit_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_DX ),
- make_xmm( registers[chan_index] ) );
- emit_or(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- x86_make_reg( file_REG32, reg_DX ) );
- }
- }
- }
-
- emit_or(
- func,
- get_temp(
- TGSI_EXEC_TEMP_KILMASK_I,
- TGSI_EXEC_TEMP_KILMASK_C ),
- x86_make_reg( file_REG32, reg_AX ) );
-
- emit_pop(
- func,
- x86_make_reg( file_REG32, reg_DX ) );
- emit_pop(
- func,
- x86_make_reg( file_REG32, reg_AX ) );
-}
-
-static void
-emit_setcc(
- struct x86_function *func,
- struct tgsi_full_instruction *inst,
- enum sse_cc cc )
-{
- unsigned chan_index;
-
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_cmpps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ),
- cc );
- emit_andps(
- func,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
-}
-
-static void
-emit_cmp(
- struct x86_function *func,
- struct tgsi_full_instruction *inst )
-{
- unsigned chan_index;
-
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- FETCH( func, *inst, 2, 2, chan_index );
- emit_cmpps(
- func,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
- emit_andps(
- func,
- make_xmm( 1 ),
- make_xmm( 0 ) );
- emit_andnps(
- func,
- make_xmm( 0 ),
- make_xmm( 2 ) );
- emit_orps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
-}
-
-static int
-emit_instruction(
- struct x86_function *func,
- struct tgsi_full_instruction *inst )
-{
- unsigned chan_index;
-
- switch( inst->Instruction.Opcode ) {
- case TGSI_OPCODE_ARL:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_f2it( func, 0 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MOV:
- /* TGSI_OPCODE_SWZ */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LIT:
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C);
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
- STORE( func, *inst, 0, 0, CHAN_X );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_maxps(
- func,
- make_xmm( 0 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ) );
- STORE( func, *inst, 0, 0, CHAN_Y );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 1, 0, CHAN_Y );
- emit_maxps(
- func,
- make_xmm( 1 ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ) );
- FETCH( func, *inst, 2, 0, CHAN_W );
- emit_minps(
- func,
- make_xmm( 2 ),
- get_temp(
- TGSI_EXEC_TEMP_128_I,
- TGSI_EXEC_TEMP_128_C ) );
- emit_maxps(
- func,
- make_xmm( 2 ),
- get_temp(
- TGSI_EXEC_TEMP_MINUS_128_I,
- TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 1, 2 );
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_xorps(
- func,
- make_xmm( 2 ),
- make_xmm( 2 ) );
- emit_cmpps(
- func,
- make_xmm( 2 ),
- make_xmm( 0 ),
- cc_LessThanEqual );
- emit_andps(
- func,
- make_xmm( 2 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 2, 0, CHAN_Z );
- }
- }
- break;
-
- case TGSI_OPCODE_RCP:
- /* TGSI_OPCODE_RECIP */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_rcp( func, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RSQ:
- /* TGSI_OPCODE_RECIPSQRT */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_rsqrt( func, 0, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_EXP:
- return 0;
- break;
-
- case TGSI_OPCODE_LOG:
- return 0;
- break;
-
- case TGSI_OPCODE_MUL:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_mul( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ADD:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_add( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP3:
- /* TGSI_OPCODE_DOT3 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_mul( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Y );
- FETCH( func, *inst, 2, 1, CHAN_Y );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Z );
- FETCH( func, *inst, 2, 1, CHAN_Z );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DP4:
- /* TGSI_OPCODE_DOT4 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_mul( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Y );
- FETCH( func, *inst, 2, 1, CHAN_Y );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Z );
- FETCH( func, *inst, 2, 1, CHAN_Z );
- emit_mul(func, 1, 2 );
- emit_add(func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_W );
- FETCH( func, *inst, 2, 1, CHAN_W );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DST:
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- STORE( func, *inst, 0, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
- FETCH( func, *inst, 1, 1, CHAN_Y );
- emit_mul( func, 0, 1 );
- STORE( func, *inst, 0, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- FETCH( func, *inst, 0, 0, CHAN_Z );
- STORE( func, *inst, 0, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- FETCH( func, *inst, 0, 1, CHAN_W );
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MIN:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_minps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_MAX:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_maxps(
- func,
- make_xmm( 0 ),
- make_xmm( 1 ) );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLT:
- /* TGSI_OPCODE_SETLT */
- emit_setcc( func, inst, cc_LessThan );
- break;
-
- case TGSI_OPCODE_SGE:
- /* TGSI_OPCODE_SETGE */
- emit_setcc( func, inst, cc_NotLessThan );
- break;
-
- case TGSI_OPCODE_MAD:
- /* TGSI_OPCODE_MADD */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- FETCH( func, *inst, 2, 2, chan_index );
- emit_mul( func, 0, 1 );
- emit_add( func, 0, 2 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SUB:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- emit_sub( func, 0, 1 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- FETCH( func, *inst, 1, 1, chan_index );
- FETCH( func, *inst, 2, 2, chan_index );
- emit_sub( func, 1, 2 );
- emit_mul( func, 0, 1 );
- emit_add( func, 0, 2 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CND:
- return 0;
- break;
-
- case TGSI_OPCODE_CND0:
- return 0;
- break;
-
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
- return 0;
- break;
-
- case TGSI_OPCODE_INDEX:
- return 0;
- break;
-
- case TGSI_OPCODE_NEGATE:
- return 0;
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_frc( func, 0 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CLAMP:
- return 0;
- break;
-
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_flr( func, 0 );
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_ROUND:
- return 0;
- break;
-
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_ex2( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_lg2( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
- FETCH( func, *inst, 1, 1, CHAN_Z );
- FETCH( func, *inst, 3, 0, CHAN_Z );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
- FETCH( func, *inst, 4, 1, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- emit_MOV( func, 2, 0 );
- emit_mul( func, 2, 1 );
- emit_MOV( func, 5, 3 );
- emit_mul( func, 5, 4 );
- emit_sub( func, 2, 5 );
- STORE( func, *inst, 2, 0, CHAN_X );
- }
- if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
- IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
- FETCH( func, *inst, 2, 1, CHAN_X );
- FETCH( func, *inst, 5, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- emit_mul( func, 3, 2 );
- emit_mul( func, 1, 5 );
- emit_sub( func, 3, 1 );
- STORE( func, *inst, 3, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- emit_mul( func, 5, 4 );
- emit_mul( func, 0, 2 );
- emit_sub( func, 5, 0 );
- STORE( func, *inst, 5, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_MULTIPLYMATRIX:
- return 0;
- break;
-
- case TGSI_OPCODE_ABS:
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( func, *inst, 0, 0, chan_index );
- emit_abs( func, 0) ;
-
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_RCC:
- return 0;
- break;
-
- case TGSI_OPCODE_DPH:
- FETCH( func, *inst, 0, 0, CHAN_X );
- FETCH( func, *inst, 1, 1, CHAN_X );
- emit_mul( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Y );
- FETCH( func, *inst, 2, 1, CHAN_Y );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 0, CHAN_Z );
- FETCH( func, *inst, 2, 1, CHAN_Z );
- emit_mul( func, 1, 2 );
- emit_add( func, 0, 1 );
- FETCH( func, *inst, 1, 1, CHAN_W );
- emit_add( func, 0, 1 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_COS:
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_DDX:
- return 0;
- break;
-
- case TGSI_OPCODE_DDY:
- return 0;
- break;
-
- case TGSI_OPCODE_KIL:
- emit_kil( func, &inst->FullSrcRegisters[0] );
- break;
-
- case TGSI_OPCODE_PK2H:
- return 0;
- break;
-
- case TGSI_OPCODE_PK2US:
- return 0;
- break;
-
- case TGSI_OPCODE_PK4B:
- return 0;
- break;
-
- case TGSI_OPCODE_PK4UB:
- return 0;
- break;
-
- case TGSI_OPCODE_RFL:
- return 0;
- break;
-
- case TGSI_OPCODE_SEQ:
- return 0;
- break;
-
- case TGSI_OPCODE_SFL:
- return 0;
- break;
-
- case TGSI_OPCODE_SGT:
- return 0;
- break;
-
- case TGSI_OPCODE_SIN:
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_sin( func, 0 );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_SLE:
- return 0;
- break;
-
- case TGSI_OPCODE_SNE:
- return 0;
- break;
-
- case TGSI_OPCODE_STR:
- return 0;
- break;
-
- case TGSI_OPCODE_TEX:
- emit_tempf(
- func,
- 0,
- TGSI_EXEC_TEMP_ONE_I,
- TGSI_EXEC_TEMP_ONE_C );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- break;
-
- case TGSI_OPCODE_TXD:
- return 0;
- break;
-
- case TGSI_OPCODE_UP2H:
- return 0;
- break;
-
- case TGSI_OPCODE_UP2US:
- return 0;
- break;
-
- case TGSI_OPCODE_UP4B:
- return 0;
- break;
-
- case TGSI_OPCODE_UP4UB:
- return 0;
- break;
-
- case TGSI_OPCODE_X2D:
- return 0;
- break;
-
- case TGSI_OPCODE_ARA:
- return 0;
- break;
-
- case TGSI_OPCODE_ARR:
- return 0;
- break;
-
- case TGSI_OPCODE_BRA:
- return 0;
- break;
-
- case TGSI_OPCODE_CAL:
- return 0;
- break;
-
- case TGSI_OPCODE_RET:
- case TGSI_OPCODE_END:
-#ifdef WIN32
- emit_retw( func, 16 );
-#else
- emit_ret( func );
-#endif
- break;
-
- case TGSI_OPCODE_SSG:
- return 0;
- break;
-
- case TGSI_OPCODE_CMP:
- emit_cmp (func, inst);
- break;
-
- case TGSI_OPCODE_SCS:
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
- FETCH( func, *inst, 0, 0, CHAN_X );
- emit_cos( func, 0 );
- STORE( func, *inst, 0, 0, CHAN_X );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
- FETCH( func, *inst, 0, 0, CHAN_Y );
- emit_sin( func, 0 );
- STORE( func, *inst, 0, 0, CHAN_Y );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
- FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
- STORE( func, *inst, 0, 0, CHAN_Z );
- }
- IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
- FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
- STORE( func, *inst, 0, 0, CHAN_W );
- }
- break;
-
- case TGSI_OPCODE_TXB:
- return 0;
- break;
-
- case TGSI_OPCODE_NRM:
- return 0;
- break;
-
- case TGSI_OPCODE_DIV:
- return 0;
- break;
-
- case TGSI_OPCODE_DP2:
- return 0;
- break;
-
- case TGSI_OPCODE_TXL:
- return 0;
- break;
-
- case TGSI_OPCODE_BRK:
- return 0;
- break;
-
- case TGSI_OPCODE_IF:
- return 0;
- break;
-
- case TGSI_OPCODE_LOOP:
- return 0;
- break;
-
- case TGSI_OPCODE_REP:
- return 0;
- break;
-
- case TGSI_OPCODE_ELSE:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDIF:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDLOOP:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDREP:
- return 0;
- break;
-
- case TGSI_OPCODE_PUSHA:
- return 0;
- break;
-
- case TGSI_OPCODE_POPA:
- return 0;
- break;
-
- case TGSI_OPCODE_CEIL:
- return 0;
- break;
-
- case TGSI_OPCODE_I2F:
- return 0;
- break;
-
- case TGSI_OPCODE_NOT:
- return 0;
- break;
-
- case TGSI_OPCODE_TRUNC:
- return 0;
- break;
-
- case TGSI_OPCODE_SHL:
- return 0;
- break;
-
- case TGSI_OPCODE_SHR:
- return 0;
- break;
-
- case TGSI_OPCODE_AND:
- return 0;
- break;
-
- case TGSI_OPCODE_OR:
- return 0;
- break;
-
- case TGSI_OPCODE_MOD:
- return 0;
- break;
-
- case TGSI_OPCODE_XOR:
- return 0;
- break;
-
- case TGSI_OPCODE_SAD:
- return 0;
- break;
-
- case TGSI_OPCODE_TXF:
- return 0;
- break;
-
- case TGSI_OPCODE_TXQ:
- return 0;
- break;
-
- case TGSI_OPCODE_CONT:
- return 0;
- break;
-
- case TGSI_OPCODE_EMIT:
- return 0;
- break;
-
- case TGSI_OPCODE_ENDPRIM:
- return 0;
- break;
-
- default:
- return 0;
- }
-
- return 1;
-}
-
-static void
-emit_declaration(
- struct x86_function *func,
- struct tgsi_full_declaration *decl )
-{
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- unsigned i, j;
-
- assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
-
- first = decl->u.DeclarationRange.First;
- last = decl->u.DeclarationRange.Last;
- mask = decl->Declaration.UsageMask;
-
- /* Do not touch WPOS.xy */
- if( first == 0 ) {
- mask &= ~TGSI_WRITEMASK_XY;
- if( mask == TGSI_WRITEMASK_NONE ) {
- first++;
- }
- }
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- switch( decl->Interpolation.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- emit_coef_a0( func, 0, i, j );
- emit_inputs( func, 0, i, j );
- break;
-
- case TGSI_INTERPOLATE_LINEAR:
- emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
- emit_coef_dadx( func, 1, i, j );
- emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
- emit_coef_dady( func, 3, i, j );
- emit_mul( func, 0, 1 ); /* x * dadx */
- emit_coef_a0( func, 4, i, j );
- emit_mul( func, 2, 3 ); /* y * dady */
- emit_add( func, 0, 4 ); /* x * dadx + a0 */
- emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
- emit_inputs( func, 0, i, j );
- break;
-
- case TGSI_INTERPOLATE_PERSPECTIVE:
- emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
- emit_coef_dadx( func, 1, i, j );
- emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
- emit_coef_dady( func, 3, i, j );
- emit_mul( func, 0, 1 ); /* x * dadx */
- emit_inputf( func, 4, 0, TGSI_SWIZZLE_W );
- emit_coef_a0( func, 5, i, j );
- emit_rcp( func, 4, 4 ); /* 1.0 / w */
- emit_mul( func, 2, 3 ); /* y * dady */
- emit_add( func, 0, 5 ); /* x * dadx + a0 */
- emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
- emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
- emit_inputs( func, 0, i, j );
- break;
-
- default:
- assert( 0 );
- break;
- }
- }
- }
- }
- }
-}
-
-unsigned
-tgsi_emit_sse2(
- struct tgsi_token *tokens,
- struct x86_function *func )
-{
- struct tgsi_parse_context parse;
- unsigned ok = 1;
-
- DUMP_START();
-
- func->csr = func->store;
-
- emit_mov(
- func,
- get_input_base(),
- get_argument( 0 ) );
- emit_mov(
- func,
- get_output_base(),
- get_argument( 1 ) );
- emit_mov(
- func,
- get_const_base(),
- get_argument( 2 ) );
- emit_mov(
- func,
- get_temp_base(),
- get_argument( 3 ) );
-
- tgsi_parse_init( &parse, tokens );
-
- while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- ok = emit_instruction(
- func,
- &parse.FullToken.FullInstruction );
-
- if (!ok) {
- debug_printf("failed to translate tgsi opcode %d\n",
- parse.FullToken.FullInstruction.Instruction.Opcode );
- }
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX implement this */
- ok = 0;
- debug_printf("failed to emit immediate value\n");
- break;
-
- default:
- assert( 0 );
- ok = 0;
- break;
- }
- }
-
- tgsi_parse_free( &parse );
-
- DUMP_END();
-
- return ok;
-}
-
-/**
- * Fragment shaders are responsible for interpolating shader inputs. Because on
- * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
- * output, const, temp and coef), the code is split into two phases --
- * DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff argument,
- * as outputs are not needed in the DECLARATION phase.
- */
-unsigned
-tgsi_emit_sse2_fs(
- struct tgsi_token *tokens,
- struct x86_function *func )
-{
- struct tgsi_parse_context parse;
- boolean instruction_phase = FALSE;
-
- DUMP_START();
-
- func->csr = func->store;
-
- /* DECLARATION phase, do not load output argument. */
- emit_mov(
- func,
- get_input_base(),
- get_argument( 0 ) );
- emit_mov(
- func,
- get_const_base(),
- get_argument( 2 ) );
- emit_mov(
- func,
- get_temp_base(),
- get_argument( 3 ) );
- emit_mov(
- func,
- get_coef_base(),
- get_argument( 4 ) );
-
- tgsi_parse_init( &parse, tokens );
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- emit_declaration(
- func,
- &parse.FullToken.FullDeclaration );
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- if( !instruction_phase ) {
- /* INSTRUCTION phase, overwrite coeff with output. */
- instruction_phase = TRUE;
- emit_mov(
- func,
- get_output_base(),
- get_argument( 1 ) );
- }
- emit_instruction(
- func,
- &parse.FullToken.FullInstruction );
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- /* XXX implement this */
- assert(0);
- break;
-
- default:
- assert( 0 );
- }
- }
-
- tgsi_parse_free( &parse );
-
- DUMP_END();
-
- return 1;
-}
-
-#endif /* i386 */
+++ /dev/null
-#if !defined TGSI_SSE2_H
-#define TGSI_SSE2_H
-
-#if defined __cplusplus
-extern "C" {
-#endif // defined __cplusplus
-
-struct tgsi_token;
-struct x86_function;
-
-unsigned
-tgsi_emit_sse2(
- struct tgsi_token *tokens,
- struct x86_function *function );
-
-unsigned
-tgsi_emit_sse2_fs(
- struct tgsi_token *tokens,
- struct x86_function *function );
-
-#if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
-
-#endif // !defined TGSI_SSE2_H
-
+++ /dev/null
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_build.h"
-#include "tgsi_parse.h"
-
-/*
- * version
- */
-
-struct tgsi_version
-tgsi_build_version( void )
-{
- struct tgsi_version version;
-
- version.MajorVersion = 1;
- version.MinorVersion = 1;
- version.Padding = 0;
-
- return version;
-}
-
-/*
- * header
- */
-
-struct tgsi_header
-tgsi_build_header( void )
-{
- struct tgsi_header header;
-
- header.HeaderSize = 1;
- header.BodySize = 0;
-
- return header;
-}
-
-static void
-header_headersize_grow( struct tgsi_header *header )
-{
- assert( header->HeaderSize < 0xFF );
- assert( header->BodySize == 0 );
-
- header->HeaderSize++;
-}
-
-static void
-header_bodysize_grow( struct tgsi_header *header )
-{
- assert( header->BodySize < 0xFFFFFF );
-
- header->BodySize++;
-}
-
-struct tgsi_processor
-tgsi_default_processor( void )
-{
- struct tgsi_processor processor;
-
- processor.Processor = TGSI_PROCESSOR_FRAGMENT;
- processor.Padding = 0;
-
- return processor;
-}
-
-struct tgsi_processor
-tgsi_build_processor(
- unsigned type,
- struct tgsi_header *header )
-{
- struct tgsi_processor processor;
-
- processor = tgsi_default_processor();
- processor.Processor = type;
-
- header_headersize_grow( header );
-
- return processor;
-}
-
-/*
- * declaration
- */
-
-struct tgsi_declaration
-tgsi_default_declaration( void )
-{
- struct tgsi_declaration declaration;
-
- declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
- declaration.Size = 1;
- declaration.File = TGSI_FILE_NULL;
- declaration.Declare = TGSI_DECLARE_RANGE;
- declaration.UsageMask = TGSI_WRITEMASK_XYZW;
- declaration.Interpolate = 0;
- declaration.Semantic = 0;
- declaration.Padding = 0;
- declaration.Extended = 0;
-
- return declaration;
-}
-
-struct tgsi_declaration
-tgsi_build_declaration(
- unsigned file,
- unsigned declare,
- unsigned usage_mask,
- unsigned interpolate,
- unsigned semantic,
- struct tgsi_header *header )
-{
- struct tgsi_declaration declaration;
-
- assert( file <= TGSI_FILE_IMMEDIATE );
- assert( declare <= TGSI_DECLARE_MASK );
-
- declaration = tgsi_default_declaration();
- declaration.File = file;
- declaration.Declare = declare;
- declaration.UsageMask = usage_mask;
- declaration.Interpolate = interpolate;
- declaration.Semantic = semantic;
-
- header_bodysize_grow( header );
-
- return declaration;
-}
-
-static void
-declaration_grow(
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- assert( declaration->Size < 0xFF );
-
- declaration->Size++;
-
- header_bodysize_grow( header );
-}
-
-struct tgsi_full_declaration
-tgsi_default_full_declaration( void )
-{
- struct tgsi_full_declaration full_declaration;
-
- full_declaration.Declaration = tgsi_default_declaration();
- full_declaration.Interpolation = tgsi_default_declaration_interpolation();
- full_declaration.Semantic = tgsi_default_declaration_semantic();
-
- return full_declaration;
-}
-
-unsigned
-tgsi_build_full_declaration(
- const struct tgsi_full_declaration *full_decl,
- struct tgsi_token *tokens,
- struct tgsi_header *header,
- unsigned maxsize )
-{
- unsigned size = 0;
- struct tgsi_declaration *declaration;
-
- if( maxsize <= size )
- return 0;
- declaration = (struct tgsi_declaration *) &tokens[size];
- size++;
-
- *declaration = tgsi_build_declaration(
- full_decl->Declaration.File,
- full_decl->Declaration.Declare,
- full_decl->Declaration.UsageMask,
- full_decl->Declaration.Interpolate,
- full_decl->Declaration.Semantic,
- header );
-
- switch( full_decl->Declaration.Declare ) {
- case TGSI_DECLARE_RANGE:
- {
- struct tgsi_declaration_range *dr;
-
- if( maxsize <= size )
- return 0;
- dr = (struct tgsi_declaration_range *) &tokens[size];
- size++;
-
- *dr = tgsi_build_declaration_range(
- full_decl->u.DeclarationRange.First,
- full_decl->u.DeclarationRange.Last,
- declaration,
- header );
- break;
- }
-
- case TGSI_DECLARE_MASK:
- {
- struct tgsi_declaration_mask *dm;
-
- if( maxsize <= size )
- return 0;
- dm = (struct tgsi_declaration_mask *) &tokens[size];
- size++;
-
- *dm = tgsi_build_declaration_mask(
- full_decl->u.DeclarationMask.Mask,
- declaration,
- header );
- break;
- }
-
- default:
- assert( 0 );
- }
-
- if( full_decl->Declaration.Interpolate ) {
- struct tgsi_declaration_interpolation *di;
-
- if( maxsize <= size )
- return 0;
- di = (struct tgsi_declaration_interpolation *) &tokens[size];
- size++;
-
- *di = tgsi_build_declaration_interpolation(
- full_decl->Interpolation.Interpolate,
- declaration,
- header );
- }
-
- if( full_decl->Declaration.Semantic ) {
- struct tgsi_declaration_semantic *ds;
-
- if( maxsize <= size )
- return 0;
- ds = (struct tgsi_declaration_semantic *) &tokens[size];
- size++;
-
- *ds = tgsi_build_declaration_semantic(
- full_decl->Semantic.SemanticName,
- full_decl->Semantic.SemanticIndex,
- declaration,
- header );
- }
-
- return size;
-}
-
-struct tgsi_declaration_range
-tgsi_build_declaration_range(
- unsigned first,
- unsigned last,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_range declaration_range;
-
- assert( last >= first );
- assert( last <= 0xFFFF );
-
- declaration_range.First = first;
- declaration_range.Last = last;
-
- declaration_grow( declaration, header );
-
- return declaration_range;
-}
-
-struct tgsi_declaration_mask
-tgsi_build_declaration_mask(
- unsigned mask,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_mask declaration_mask;
-
- declaration_mask.Mask = mask;
-
- declaration_grow( declaration, header );
-
- return declaration_mask;
-}
-
-struct tgsi_declaration_interpolation
-tgsi_default_declaration_interpolation( void )
-{
- struct tgsi_declaration_interpolation di;
-
- di.Interpolate = TGSI_INTERPOLATE_CONSTANT;
- di.Padding = 0;
-
- return di;
-}
-
-struct tgsi_declaration_interpolation
-tgsi_build_declaration_interpolation(
- unsigned interpolate,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_interpolation di;
-
- assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
-
- di = tgsi_default_declaration_interpolation();
- di.Interpolate = interpolate;
-
- declaration_grow( declaration, header );
-
- return di;
-}
-
-struct tgsi_declaration_semantic
-tgsi_default_declaration_semantic( void )
-{
- struct tgsi_declaration_semantic ds;
-
- ds.SemanticName = TGSI_SEMANTIC_POSITION;
- ds.SemanticIndex = 0;
- ds.Padding = 0;
-
- return ds;
-}
-
-struct tgsi_declaration_semantic
-tgsi_build_declaration_semantic(
- unsigned semantic_name,
- unsigned semantic_index,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header )
-{
- struct tgsi_declaration_semantic ds;
-
- assert( semantic_name <= TGSI_SEMANTIC_COUNT );
- assert( semantic_index <= 0xFFFF );
-
- ds = tgsi_default_declaration_semantic();
- ds.SemanticName = semantic_name;
- ds.SemanticIndex = semantic_index;
-
- declaration_grow( declaration, header );
-
- return ds;
-}
-
-/*
- * immediate
- */
-
-struct tgsi_immediate
-tgsi_default_immediate( void )
-{
- struct tgsi_immediate immediate;
-
- immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
- immediate.Size = 1;
- immediate.DataType = TGSI_IMM_FLOAT32;
- immediate.Padding = 0;
- immediate.Extended = 0;
-
- return immediate;
-}
-
-struct tgsi_immediate
-tgsi_build_immediate(
- struct tgsi_header *header )
-{
- struct tgsi_immediate immediate;
-
- immediate = tgsi_default_immediate();
-
- header_bodysize_grow( header );
-
- return immediate;
-}
-
-struct tgsi_full_immediate
-tgsi_default_full_immediate( void )
-{
- struct tgsi_full_immediate fullimm;
-
- fullimm.Immediate = tgsi_default_immediate();
- fullimm.u.Pointer = (void *) 0;
-
- return fullimm;
-}
-
-static void
-immediate_grow(
- struct tgsi_immediate *immediate,
- struct tgsi_header *header )
-{
- assert( immediate->Size < 0xFF );
-
- immediate->Size++;
-
- header_bodysize_grow( header );
-}
-
-struct tgsi_immediate_float32
-tgsi_build_immediate_float32(
- float value,
- struct tgsi_immediate *immediate,
- struct tgsi_header *header )
-{
- struct tgsi_immediate_float32 immediate_float32;
-
- immediate_float32.Float = value;
-
- immediate_grow( immediate, header );
-
- return immediate_float32;
-}
-
-unsigned
-tgsi_build_full_immediate(
- const struct tgsi_full_immediate *full_imm,
- struct tgsi_token *tokens,
- struct tgsi_header *header,
- unsigned maxsize )
-{
- unsigned size = 0, i;
- struct tgsi_immediate *immediate;
-
- if( maxsize <= size )
- return 0;
- immediate = (struct tgsi_immediate *) &tokens[size];
- size++;
-
- *immediate = tgsi_build_immediate( header );
-
- for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
- struct tgsi_immediate_float32 *if32;
-
- if( maxsize <= size )
- return 0;
- if32 = (struct tgsi_immediate_float32 *) &tokens[size];
- size++;
-
- *if32 = tgsi_build_immediate_float32(
- full_imm->u.ImmediateFloat32[i].Float,
- immediate,
- header );
- }
-
- return size;
-}
-
-/*
- * instruction
- */
-
-struct tgsi_instruction
-tgsi_default_instruction( void )
-{
- struct tgsi_instruction instruction;
-
- instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
- instruction.Size = 1;
- instruction.Opcode = TGSI_OPCODE_MOV;
- instruction.Saturate = TGSI_SAT_NONE;
- instruction.NumDstRegs = 1;
- instruction.NumSrcRegs = 1;
- instruction.Padding = 0;
- instruction.Extended = 0;
-
- return instruction;
-}
-
-struct tgsi_instruction
-tgsi_build_instruction(
- unsigned opcode,
- unsigned saturate,
- unsigned num_dst_regs,
- unsigned num_src_regs,
- struct tgsi_header *header )
-{
- struct tgsi_instruction instruction;
-
- assert (opcode <= TGSI_OPCODE_LAST);
- assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE);
- assert (num_dst_regs <= 3);
- assert (num_src_regs <= 15);
-
- instruction = tgsi_default_instruction();
- instruction.Opcode = opcode;
- instruction.Saturate = saturate;
- instruction.NumDstRegs = num_dst_regs;
- instruction.NumSrcRegs = num_src_regs;
-
- header_bodysize_grow( header );
-
- return instruction;
-}
-
-static void
-instruction_grow(
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- assert (instruction->Size < 0xFF);
-
- instruction->Size++;
-
- header_bodysize_grow( header );
-}
-
-struct tgsi_full_instruction
-tgsi_default_full_instruction( void )
-{
- struct tgsi_full_instruction full_instruction;
- unsigned i;
-
- full_instruction.Instruction = tgsi_default_instruction();
- full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv();
- full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
- full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
- for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
- full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
- }
- for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) {
- full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
- }
-
- return full_instruction;
-}
-
-unsigned
-tgsi_build_full_instruction(
- const struct tgsi_full_instruction *full_inst,
- struct tgsi_token *tokens,
- struct tgsi_header *header,
- unsigned maxsize )
-{
- unsigned size = 0;
- unsigned i;
- struct tgsi_instruction *instruction;
- struct tgsi_token *prev_token;
-
- if( maxsize <= size )
- return 0;
- instruction = (struct tgsi_instruction *) &tokens[size];
- size++;
-
- *instruction = tgsi_build_instruction(
- full_inst->Instruction.Opcode,
- full_inst->Instruction.Saturate,
- full_inst->Instruction.NumDstRegs,
- full_inst->Instruction.NumSrcRegs,
- header );
- prev_token = (struct tgsi_token *) instruction;
-
- if( tgsi_compare_instruction_ext_nv(
- full_inst->InstructionExtNv,
- tgsi_default_instruction_ext_nv() ) ) {
- struct tgsi_instruction_ext_nv *instruction_ext_nv;
-
- if( maxsize <= size )
- return 0;
- instruction_ext_nv =
- (struct tgsi_instruction_ext_nv *) &tokens[size];
- size++;
-
- *instruction_ext_nv = tgsi_build_instruction_ext_nv(
- full_inst->InstructionExtNv.Precision,
- full_inst->InstructionExtNv.CondDstIndex,
- full_inst->InstructionExtNv.CondFlowIndex,
- full_inst->InstructionExtNv.CondMask,
- full_inst->InstructionExtNv.CondSwizzleX,
- full_inst->InstructionExtNv.CondSwizzleY,
- full_inst->InstructionExtNv.CondSwizzleZ,
- full_inst->InstructionExtNv.CondSwizzleW,
- full_inst->InstructionExtNv.CondDstUpdate,
- full_inst->InstructionExtNv.CondFlowEnable,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) instruction_ext_nv;
- }
-
- if( tgsi_compare_instruction_ext_label(
- full_inst->InstructionExtLabel,
- tgsi_default_instruction_ext_label() ) ) {
- struct tgsi_instruction_ext_label *instruction_ext_label;
-
- if( maxsize <= size )
- return 0;
- instruction_ext_label =
- (struct tgsi_instruction_ext_label *) &tokens[size];
- size++;
-
- *instruction_ext_label = tgsi_build_instruction_ext_label(
- full_inst->InstructionExtLabel.Label,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) instruction_ext_label;
- }
-
- if( tgsi_compare_instruction_ext_texture(
- full_inst->InstructionExtTexture,
- tgsi_default_instruction_ext_texture() ) ) {
- struct tgsi_instruction_ext_texture *instruction_ext_texture;
-
- if( maxsize <= size )
- return 0;
- instruction_ext_texture =
- (struct tgsi_instruction_ext_texture *) &tokens[size];
- size++;
-
- *instruction_ext_texture = tgsi_build_instruction_ext_texture(
- full_inst->InstructionExtTexture.Texture,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) instruction_ext_texture;
- }
-
- for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
- const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
- struct tgsi_dst_register *dst_register;
- struct tgsi_token *prev_token;
-
- if( maxsize <= size )
- return 0;
- dst_register = (struct tgsi_dst_register *) &tokens[size];
- size++;
-
- *dst_register = tgsi_build_dst_register(
- reg->DstRegister.File,
- reg->DstRegister.WriteMask,
- reg->DstRegister.Index,
- instruction,
- header );
- prev_token = (struct tgsi_token *) dst_register;
-
- if( tgsi_compare_dst_register_ext_concode(
- reg->DstRegisterExtConcode,
- tgsi_default_dst_register_ext_concode() ) ) {
- struct tgsi_dst_register_ext_concode *dst_register_ext_concode;
-
- if( maxsize <= size )
- return 0;
- dst_register_ext_concode =
- (struct tgsi_dst_register_ext_concode *) &tokens[size];
- size++;
-
- *dst_register_ext_concode = tgsi_build_dst_register_ext_concode(
- reg->DstRegisterExtConcode.CondMask,
- reg->DstRegisterExtConcode.CondSwizzleX,
- reg->DstRegisterExtConcode.CondSwizzleY,
- reg->DstRegisterExtConcode.CondSwizzleZ,
- reg->DstRegisterExtConcode.CondSwizzleW,
- reg->DstRegisterExtConcode.CondSrcIndex,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) dst_register_ext_concode;
- }
-
- if( tgsi_compare_dst_register_ext_modulate(
- reg->DstRegisterExtModulate,
- tgsi_default_dst_register_ext_modulate() ) ) {
- struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate;
-
- if( maxsize <= size )
- return 0;
- dst_register_ext_modulate =
- (struct tgsi_dst_register_ext_modulate *) &tokens[size];
- size++;
-
- *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate(
- reg->DstRegisterExtModulate.Modulate,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) dst_register_ext_modulate;
- }
- }
-
- for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) {
- const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i];
- struct tgsi_src_register *src_register;
- struct tgsi_token *prev_token;
-
- if( maxsize <= size )
- return 0;
- src_register = (struct tgsi_src_register *) &tokens[size];
- size++;
-
- *src_register = tgsi_build_src_register(
- reg->SrcRegister.File,
- reg->SrcRegister.SwizzleX,
- reg->SrcRegister.SwizzleY,
- reg->SrcRegister.SwizzleZ,
- reg->SrcRegister.SwizzleW,
- reg->SrcRegister.Negate,
- reg->SrcRegister.Indirect,
- reg->SrcRegister.Dimension,
- reg->SrcRegister.Index,
- instruction,
- header );
- prev_token = (struct tgsi_token *) src_register;
-
- if( tgsi_compare_src_register_ext_swz(
- reg->SrcRegisterExtSwz,
- tgsi_default_src_register_ext_swz() ) ) {
- struct tgsi_src_register_ext_swz *src_register_ext_swz;
-
- if( maxsize <= size )
- return 0;
- src_register_ext_swz =
- (struct tgsi_src_register_ext_swz *) &tokens[size];
- size++;
-
- *src_register_ext_swz = tgsi_build_src_register_ext_swz(
- reg->SrcRegisterExtSwz.ExtSwizzleX,
- reg->SrcRegisterExtSwz.ExtSwizzleY,
- reg->SrcRegisterExtSwz.ExtSwizzleZ,
- reg->SrcRegisterExtSwz.ExtSwizzleW,
- reg->SrcRegisterExtSwz.NegateX,
- reg->SrcRegisterExtSwz.NegateY,
- reg->SrcRegisterExtSwz.NegateZ,
- reg->SrcRegisterExtSwz.NegateW,
- reg->SrcRegisterExtSwz.ExtDivide,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) src_register_ext_swz;
- }
-
- if( tgsi_compare_src_register_ext_mod(
- reg->SrcRegisterExtMod,
- tgsi_default_src_register_ext_mod() ) ) {
- struct tgsi_src_register_ext_mod *src_register_ext_mod;
-
- if( maxsize <= size )
- return 0;
- src_register_ext_mod =
- (struct tgsi_src_register_ext_mod *) &tokens[size];
- size++;
-
- *src_register_ext_mod = tgsi_build_src_register_ext_mod(
- reg->SrcRegisterExtMod.Complement,
- reg->SrcRegisterExtMod.Bias,
- reg->SrcRegisterExtMod.Scale2X,
- reg->SrcRegisterExtMod.Absolute,
- reg->SrcRegisterExtMod.Negate,
- prev_token,
- instruction,
- header );
- prev_token = (struct tgsi_token *) src_register_ext_mod;
- }
-
- if( reg->SrcRegister.Indirect ) {
- struct tgsi_src_register *ind;
-
- if( maxsize <= size )
- return 0;
- ind = (struct tgsi_src_register *) &tokens[size];
- size++;
-
- *ind = tgsi_build_src_register(
- reg->SrcRegisterInd.File,
- reg->SrcRegisterInd.SwizzleX,
- reg->SrcRegisterInd.SwizzleY,
- reg->SrcRegisterInd.SwizzleZ,
- reg->SrcRegisterInd.SwizzleW,
- reg->SrcRegisterInd.Negate,
- reg->SrcRegisterInd.Indirect,
- reg->SrcRegisterInd.Dimension,
- reg->SrcRegisterInd.Index,
- instruction,
- header );
- }
-
- if( reg->SrcRegister.Dimension ) {
- struct tgsi_dimension *dim;
-
- assert( !reg->SrcRegisterDim.Dimension );
-
- if( maxsize <= size )
- return 0;
- dim = (struct tgsi_dimension *) &tokens[size];
- size++;
-
- *dim = tgsi_build_dimension(
- reg->SrcRegisterDim.Indirect,
- reg->SrcRegisterDim.Index,
- instruction,
- header );
-
- if( reg->SrcRegisterDim.Indirect ) {
- struct tgsi_src_register *ind;
-
- if( maxsize <= size )
- return 0;
- ind = (struct tgsi_src_register *) &tokens[size];
- size++;
-
- *ind = tgsi_build_src_register(
- reg->SrcRegisterDimInd.File,
- reg->SrcRegisterDimInd.SwizzleX,
- reg->SrcRegisterDimInd.SwizzleY,
- reg->SrcRegisterDimInd.SwizzleZ,
- reg->SrcRegisterDimInd.SwizzleW,
- reg->SrcRegisterDimInd.Negate,
- reg->SrcRegisterDimInd.Indirect,
- reg->SrcRegisterDimInd.Dimension,
- reg->SrcRegisterDimInd.Index,
- instruction,
- header );
- }
- }
- }
-
- return size;
-}
-
-struct tgsi_instruction_ext_nv
-tgsi_default_instruction_ext_nv( void )
-{
- struct tgsi_instruction_ext_nv instruction_ext_nv;
-
- instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV;
- instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT;
- instruction_ext_nv.CondDstIndex = 0;
- instruction_ext_nv.CondFlowIndex = 0;
- instruction_ext_nv.CondMask = TGSI_CC_TR;
- instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X;
- instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y;
- instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z;
- instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W;
- instruction_ext_nv.CondDstUpdate = 0;
- instruction_ext_nv.CondFlowEnable = 0;
- instruction_ext_nv.Padding = 0;
- instruction_ext_nv.Extended = 0;
-
- return instruction_ext_nv;
-}
-
-union token_u32
-{
- unsigned u32;
-};
-
-unsigned
-tgsi_compare_instruction_ext_nv(
- struct tgsi_instruction_ext_nv a,
- struct tgsi_instruction_ext_nv b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_instruction_ext_nv
-tgsi_build_instruction_ext_nv(
- unsigned precision,
- unsigned cond_dst_index,
- unsigned cond_flow_index,
- unsigned cond_mask,
- unsigned cond_swizzle_x,
- unsigned cond_swizzle_y,
- unsigned cond_swizzle_z,
- unsigned cond_swizzle_w,
- unsigned cond_dst_update,
- unsigned cond_flow_update,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_instruction_ext_nv instruction_ext_nv;
-
- instruction_ext_nv = tgsi_default_instruction_ext_nv();
- instruction_ext_nv.Precision = precision;
- instruction_ext_nv.CondDstIndex = cond_dst_index;
- instruction_ext_nv.CondFlowIndex = cond_flow_index;
- instruction_ext_nv.CondMask = cond_mask;
- instruction_ext_nv.CondSwizzleX = cond_swizzle_x;
- instruction_ext_nv.CondSwizzleY = cond_swizzle_y;
- instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
- instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
- instruction_ext_nv.CondDstUpdate = cond_dst_update;
- instruction_ext_nv.CondFlowEnable = cond_flow_update;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return instruction_ext_nv;
-}
-
-struct tgsi_instruction_ext_label
-tgsi_default_instruction_ext_label( void )
-{
- struct tgsi_instruction_ext_label instruction_ext_label;
-
- instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
- instruction_ext_label.Label = 0;
- instruction_ext_label.Padding = 0;
- instruction_ext_label.Extended = 0;
-
- return instruction_ext_label;
-}
-
-unsigned
-tgsi_compare_instruction_ext_label(
- struct tgsi_instruction_ext_label a,
- struct tgsi_instruction_ext_label b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_instruction_ext_label
-tgsi_build_instruction_ext_label(
- unsigned label,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_instruction_ext_label instruction_ext_label;
-
- instruction_ext_label = tgsi_default_instruction_ext_label();
- instruction_ext_label.Label = label;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return instruction_ext_label;
-}
-
-struct tgsi_instruction_ext_texture
-tgsi_default_instruction_ext_texture( void )
-{
- struct tgsi_instruction_ext_texture instruction_ext_texture;
-
- instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
- instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN;
- instruction_ext_texture.Padding = 0;
- instruction_ext_texture.Extended = 0;
-
- return instruction_ext_texture;
-}
-
-unsigned
-tgsi_compare_instruction_ext_texture(
- struct tgsi_instruction_ext_texture a,
- struct tgsi_instruction_ext_texture b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_instruction_ext_texture
-tgsi_build_instruction_ext_texture(
- unsigned texture,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_instruction_ext_texture instruction_ext_texture;
-
- instruction_ext_texture = tgsi_default_instruction_ext_texture();
- instruction_ext_texture.Texture = texture;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return instruction_ext_texture;
-}
-
-struct tgsi_src_register
-tgsi_default_src_register( void )
-{
- struct tgsi_src_register src_register;
-
- src_register.File = TGSI_FILE_NULL;
- src_register.SwizzleX = TGSI_SWIZZLE_X;
- src_register.SwizzleY = TGSI_SWIZZLE_Y;
- src_register.SwizzleZ = TGSI_SWIZZLE_Z;
- src_register.SwizzleW = TGSI_SWIZZLE_W;
- src_register.Negate = 0;
- src_register.Indirect = 0;
- src_register.Dimension = 0;
- src_register.Index = 0;
- src_register.Extended = 0;
-
- return src_register;
-}
-
-struct tgsi_src_register
-tgsi_build_src_register(
- unsigned file,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w,
- unsigned negate,
- unsigned indirect,
- unsigned dimension,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_src_register src_register;
-
- assert( file <= TGSI_FILE_IMMEDIATE );
- assert( swizzle_x <= TGSI_SWIZZLE_W );
- assert( swizzle_y <= TGSI_SWIZZLE_W );
- assert( swizzle_z <= TGSI_SWIZZLE_W );
- assert( swizzle_w <= TGSI_SWIZZLE_W );
- assert( negate <= 1 );
- assert( index >= -0x8000 && index <= 0x7FFF );
-
- src_register = tgsi_default_src_register();
- src_register.File = file;
- src_register.SwizzleX = swizzle_x;
- src_register.SwizzleY = swizzle_y;
- src_register.SwizzleZ = swizzle_z;
- src_register.SwizzleW = swizzle_w;
- src_register.Negate = negate;
- src_register.Indirect = indirect;
- src_register.Dimension = dimension;
- src_register.Index = index;
-
- instruction_grow( instruction, header );
-
- return src_register;
-}
-
-struct tgsi_full_src_register
-tgsi_default_full_src_register( void )
-{
- struct tgsi_full_src_register full_src_register;
-
- full_src_register.SrcRegister = tgsi_default_src_register();
- full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz();
- full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
- full_src_register.SrcRegisterInd = tgsi_default_src_register();
- full_src_register.SrcRegisterDim = tgsi_default_dimension();
- full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
-
- return full_src_register;
-}
-
-struct tgsi_src_register_ext_swz
-tgsi_default_src_register_ext_swz( void )
-{
- struct tgsi_src_register_ext_swz src_register_ext_swz;
-
- src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
- src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X;
- src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y;
- src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z;
- src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W;
- src_register_ext_swz.NegateX = 0;
- src_register_ext_swz.NegateY = 0;
- src_register_ext_swz.NegateZ = 0;
- src_register_ext_swz.NegateW = 0;
- src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE;
- src_register_ext_swz.Padding = 0;
- src_register_ext_swz.Extended = 0;
-
- return src_register_ext_swz;
-}
-
-unsigned
-tgsi_compare_src_register_ext_swz(
- struct tgsi_src_register_ext_swz a,
- struct tgsi_src_register_ext_swz b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_src_register_ext_swz
-tgsi_build_src_register_ext_swz(
- unsigned ext_swizzle_x,
- unsigned ext_swizzle_y,
- unsigned ext_swizzle_z,
- unsigned ext_swizzle_w,
- unsigned negate_x,
- unsigned negate_y,
- unsigned negate_z,
- unsigned negate_w,
- unsigned ext_divide,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_src_register_ext_swz src_register_ext_swz;
-
- assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE );
- assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE );
- assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE );
- assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE );
- assert( negate_x <= 1 );
- assert( negate_y <= 1 );
- assert( negate_z <= 1 );
- assert( negate_w <= 1 );
- assert( ext_divide <= TGSI_EXTSWIZZLE_ONE );
-
- src_register_ext_swz = tgsi_default_src_register_ext_swz();
- src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
- src_register_ext_swz.ExtSwizzleY = ext_swizzle_y;
- src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z;
- src_register_ext_swz.ExtSwizzleW = ext_swizzle_w;
- src_register_ext_swz.NegateX = negate_x;
- src_register_ext_swz.NegateY = negate_y;
- src_register_ext_swz.NegateZ = negate_z;
- src_register_ext_swz.NegateW = negate_w;
- src_register_ext_swz.ExtDivide = ext_divide;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return src_register_ext_swz;
-}
-
-struct tgsi_src_register_ext_mod
-tgsi_default_src_register_ext_mod( void )
-{
- struct tgsi_src_register_ext_mod src_register_ext_mod;
-
- src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
- src_register_ext_mod.Complement = 0;
- src_register_ext_mod.Bias = 0;
- src_register_ext_mod.Scale2X = 0;
- src_register_ext_mod.Absolute = 0;
- src_register_ext_mod.Negate = 0;
- src_register_ext_mod.Padding = 0;
- src_register_ext_mod.Extended = 0;
-
- return src_register_ext_mod;
-}
-
-unsigned
-tgsi_compare_src_register_ext_mod(
- struct tgsi_src_register_ext_mod a,
- struct tgsi_src_register_ext_mod b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_src_register_ext_mod
-tgsi_build_src_register_ext_mod(
- unsigned complement,
- unsigned bias,
- unsigned scale_2x,
- unsigned absolute,
- unsigned negate,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_src_register_ext_mod src_register_ext_mod;
-
- assert( complement <= 1 );
- assert( bias <= 1 );
- assert( scale_2x <= 1 );
- assert( absolute <= 1 );
- assert( negate <= 1 );
-
- src_register_ext_mod = tgsi_default_src_register_ext_mod();
- src_register_ext_mod.Complement = complement;
- src_register_ext_mod.Bias = bias;
- src_register_ext_mod.Scale2X = scale_2x;
- src_register_ext_mod.Absolute = absolute;
- src_register_ext_mod.Negate = negate;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return src_register_ext_mod;
-}
-
-struct tgsi_dimension
-tgsi_default_dimension( void )
-{
- struct tgsi_dimension dimension;
-
- dimension.Indirect = 0;
- dimension.Dimension = 0;
- dimension.Padding = 0;
- dimension.Index = 0;
- dimension.Extended = 0;
-
- return dimension;
-}
-
-struct tgsi_dimension
-tgsi_build_dimension(
- unsigned indirect,
- unsigned index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_dimension dimension;
-
- dimension = tgsi_default_dimension();
- dimension.Indirect = indirect;
- dimension.Index = index;
-
- instruction_grow( instruction, header );
-
- return dimension;
-}
-
-struct tgsi_dst_register
-tgsi_default_dst_register( void )
-{
- struct tgsi_dst_register dst_register;
-
- dst_register.File = TGSI_FILE_NULL;
- dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
- dst_register.Indirect = 0;
- dst_register.Dimension = 0;
- dst_register.Index = 0;
- dst_register.Padding = 0;
- dst_register.Extended = 0;
-
- return dst_register;
-}
-
-struct tgsi_dst_register
-tgsi_build_dst_register(
- unsigned file,
- unsigned mask,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_dst_register dst_register;
-
- assert( file <= TGSI_FILE_IMMEDIATE );
- assert( mask <= TGSI_WRITEMASK_XYZW );
- assert( index >= -32768 && index <= 32767 );
-
- dst_register = tgsi_default_dst_register();
- dst_register.File = file;
- dst_register.WriteMask = mask;
- dst_register.Index = index;
-
- instruction_grow( instruction, header );
-
- return dst_register;
-}
-
-struct tgsi_full_dst_register
-tgsi_default_full_dst_register( void )
-{
- struct tgsi_full_dst_register full_dst_register;
-
- full_dst_register.DstRegister = tgsi_default_dst_register();
- full_dst_register.DstRegisterExtConcode =
- tgsi_default_dst_register_ext_concode();
- full_dst_register.DstRegisterExtModulate =
- tgsi_default_dst_register_ext_modulate();
-
- return full_dst_register;
-}
-
-struct tgsi_dst_register_ext_concode
-tgsi_default_dst_register_ext_concode( void )
-{
- struct tgsi_dst_register_ext_concode dst_register_ext_concode;
-
- dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE;
- dst_register_ext_concode.CondMask = TGSI_CC_TR;
- dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X;
- dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y;
- dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z;
- dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W;
- dst_register_ext_concode.CondSrcIndex = 0;
- dst_register_ext_concode.Padding = 0;
- dst_register_ext_concode.Extended = 0;
-
- return dst_register_ext_concode;
-}
-
-unsigned
-tgsi_compare_dst_register_ext_concode(
- struct tgsi_dst_register_ext_concode a,
- struct tgsi_dst_register_ext_concode b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_dst_register_ext_concode
-tgsi_build_dst_register_ext_concode(
- unsigned cc,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w,
- int index,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_dst_register_ext_concode dst_register_ext_concode;
-
- assert( cc <= TGSI_CC_FL );
- assert( swizzle_x <= TGSI_SWIZZLE_W );
- assert( swizzle_y <= TGSI_SWIZZLE_W );
- assert( swizzle_z <= TGSI_SWIZZLE_W );
- assert( swizzle_w <= TGSI_SWIZZLE_W );
- assert( index >= -32768 && index <= 32767 );
-
- dst_register_ext_concode = tgsi_default_dst_register_ext_concode();
- dst_register_ext_concode.CondMask = cc;
- dst_register_ext_concode.CondSwizzleX = swizzle_x;
- dst_register_ext_concode.CondSwizzleY = swizzle_y;
- dst_register_ext_concode.CondSwizzleZ = swizzle_z;
- dst_register_ext_concode.CondSwizzleW = swizzle_w;
- dst_register_ext_concode.CondSrcIndex = index;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return dst_register_ext_concode;
-}
-
-struct tgsi_dst_register_ext_modulate
-tgsi_default_dst_register_ext_modulate( void )
-{
- struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
-
- dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE;
- dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X;
- dst_register_ext_modulate.Padding = 0;
- dst_register_ext_modulate.Extended = 0;
-
- return dst_register_ext_modulate;
-}
-
-unsigned
-tgsi_compare_dst_register_ext_modulate(
- struct tgsi_dst_register_ext_modulate a,
- struct tgsi_dst_register_ext_modulate b )
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
-}
-
-struct tgsi_dst_register_ext_modulate
-tgsi_build_dst_register_ext_modulate(
- unsigned modulate,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header )
-{
- struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
-
- assert( modulate <= TGSI_MODULATE_EIGHTH );
-
- dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate();
- dst_register_ext_modulate.Modulate = modulate;
-
- prev_token->Extended = 1;
- instruction_grow( instruction, header );
-
- return dst_register_ext_modulate;
-}
-
+++ /dev/null
-#if !defined TGSI_BUILD_H
-#define TGSI_BUILD_H
-
-#if defined __cplusplus
-extern "C" {
-#endif // defined __cplusplus
-
-/*
- * version
- */
-
-struct tgsi_version
-tgsi_build_version( void );
-
-/*
- * header
- */
-
-struct tgsi_header
-tgsi_build_header( void );
-
-struct tgsi_processor
-tgsi_default_processor( void );
-
-struct tgsi_processor
-tgsi_build_processor(
- unsigned processor,
- struct tgsi_header *header );
-
-/*
- * declaration
- */
-
-struct tgsi_declaration
-tgsi_default_declaration( void );
-
-struct tgsi_declaration
-tgsi_build_declaration(
- unsigned file,
- unsigned declare,
- unsigned usage_mask,
- unsigned interpolate,
- unsigned semantic,
- struct tgsi_header *header );
-
-struct tgsi_full_declaration
-tgsi_default_full_declaration( void );
-
-unsigned
-tgsi_build_full_declaration(
- const struct tgsi_full_declaration *full_decl,
- struct tgsi_token *tokens,
- struct tgsi_header *header,
- unsigned maxsize );
-
-struct tgsi_declaration_range
-tgsi_build_declaration_range(
- unsigned first,
- unsigned last,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
-struct tgsi_declaration_mask
-tgsi_build_declaration_mask(
- unsigned mask,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
-struct tgsi_declaration_interpolation
-tgsi_default_declaration_interpolation( void );
-
-struct tgsi_declaration_interpolation
-tgsi_build_declaration_interpolation(
- unsigned interpolate,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
-struct tgsi_declaration_semantic
-tgsi_default_declaration_semantic( void );
-
-struct tgsi_declaration_semantic
-tgsi_build_declaration_semantic(
- unsigned semantic_name,
- unsigned semantic_index,
- struct tgsi_declaration *declaration,
- struct tgsi_header *header );
-
-/*
- * immediate
- */
-
-struct tgsi_immediate
-tgsi_default_immediate( void );
-
-struct tgsi_immediate
-tgsi_build_immediate(
- struct tgsi_header *header );
-
-struct tgsi_full_immediate
-tgsi_default_full_immediate( void );
-
-struct tgsi_immediate_float32
-tgsi_build_immediate_float32(
- float value,
- struct tgsi_immediate *immediate,
- struct tgsi_header *header );
-
-unsigned
-tgsi_build_full_immediate(
- const struct tgsi_full_immediate *full_imm,
- struct tgsi_token *tokens,
- struct tgsi_header *header,
- unsigned maxsize );
-
-/*
- * instruction
- */
-
-struct tgsi_instruction
-tgsi_default_instruction( void );
-
-struct tgsi_instruction
-tgsi_build_instruction(
- unsigned opcode,
- unsigned saturate,
- unsigned num_dst_regs,
- unsigned num_src_regs,
- struct tgsi_header *header );
-
-struct tgsi_full_instruction
-tgsi_default_full_instruction( void );
-
-unsigned
-tgsi_build_full_instruction(
- const struct tgsi_full_instruction *full_inst,
- struct tgsi_token *tokens,
- struct tgsi_header *header,
- unsigned maxsize );
-
-struct tgsi_instruction_ext_nv
-tgsi_default_instruction_ext_nv( void );
-
-unsigned
-tgsi_compare_instruction_ext_nv(
- struct tgsi_instruction_ext_nv a,
- struct tgsi_instruction_ext_nv b );
-
-struct tgsi_instruction_ext_nv
-tgsi_build_instruction_ext_nv(
- unsigned precision,
- unsigned cond_dst_index,
- unsigned cond_flow_index,
- unsigned cond_mask,
- unsigned cond_swizzle_x,
- unsigned cond_swizzle_y,
- unsigned cond_swizzle_z,
- unsigned cond_swizzle_w,
- unsigned cond_dst_update,
- unsigned cond_flow_update,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_instruction_ext_label
-tgsi_default_instruction_ext_label( void );
-
-unsigned
-tgsi_compare_instruction_ext_label(
- struct tgsi_instruction_ext_label a,
- struct tgsi_instruction_ext_label b );
-
-struct tgsi_instruction_ext_label
-tgsi_build_instruction_ext_label(
- unsigned label,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_instruction_ext_texture
-tgsi_default_instruction_ext_texture( void );
-
-unsigned
-tgsi_compare_instruction_ext_texture(
- struct tgsi_instruction_ext_texture a,
- struct tgsi_instruction_ext_texture b );
-
-struct tgsi_instruction_ext_texture
-tgsi_build_instruction_ext_texture(
- unsigned texture,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_src_register
-tgsi_default_src_register( void );
-
-struct tgsi_src_register
-tgsi_build_src_register(
- unsigned file,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w,
- unsigned negate,
- unsigned indirect,
- unsigned dimension,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_full_src_register
-tgsi_default_full_src_register( void );
-
-struct tgsi_src_register_ext_swz
-tgsi_default_src_register_ext_swz( void );
-
-unsigned
-tgsi_compare_src_register_ext_swz(
- struct tgsi_src_register_ext_swz a,
- struct tgsi_src_register_ext_swz b );
-
-struct tgsi_src_register_ext_swz
-tgsi_build_src_register_ext_swz(
- unsigned ext_swizzle_x,
- unsigned ext_swizzle_y,
- unsigned ext_swizzle_z,
- unsigned ext_swizzle_w,
- unsigned negate_x,
- unsigned negate_y,
- unsigned negate_z,
- unsigned negate_w,
- unsigned ext_divide,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_src_register_ext_mod
-tgsi_default_src_register_ext_mod( void );
-
-unsigned
-tgsi_compare_src_register_ext_mod(
- struct tgsi_src_register_ext_mod a,
- struct tgsi_src_register_ext_mod b );
-
-struct tgsi_src_register_ext_mod
-tgsi_build_src_register_ext_mod(
- unsigned complement,
- unsigned bias,
- unsigned scale_2x,
- unsigned absolute,
- unsigned negate,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_dimension
-tgsi_default_dimension( void );
-
-struct tgsi_dimension
-tgsi_build_dimension(
- unsigned indirect,
- unsigned index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_dst_register
-tgsi_default_dst_register( void );
-
-struct tgsi_dst_register
-tgsi_build_dst_register(
- unsigned file,
- unsigned mask,
- int index,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_full_dst_register
-tgsi_default_full_dst_register( void );
-
-struct tgsi_dst_register_ext_concode
-tgsi_default_dst_register_ext_concode( void );
-
-unsigned
-tgsi_compare_dst_register_ext_concode(
- struct tgsi_dst_register_ext_concode a,
- struct tgsi_dst_register_ext_concode b );
-
-struct tgsi_dst_register_ext_concode
-tgsi_build_dst_register_ext_concode(
- unsigned cc,
- unsigned swizzle_x,
- unsigned swizzle_y,
- unsigned swizzle_z,
- unsigned swizzle_w,
- int index,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-struct tgsi_dst_register_ext_modulate
-tgsi_default_dst_register_ext_modulate( void );
-
-unsigned
-tgsi_compare_dst_register_ext_modulate(
- struct tgsi_dst_register_ext_modulate a,
- struct tgsi_dst_register_ext_modulate b );
-
-struct tgsi_dst_register_ext_modulate
-tgsi_build_dst_register_ext_modulate(
- unsigned modulate,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header );
-
-#if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
-
-#endif // !defined TGSI_BUILD_H
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <stdio.h>
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_dump.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
-
-struct gen_dump
-{
- unsigned tabs;
- void (* write)(
- struct gen_dump *dump,
- const void *data,
- unsigned size );
-};
-
-struct text_dump
-{
- struct gen_dump base;
- char *text;
- unsigned length;
- unsigned capacity;
-};
-
-static void
-_text_dump_write(
- struct gen_dump *dump,
- const void *data,
- unsigned size )
-{
- struct text_dump *td = (struct text_dump *) dump;
- unsigned new_length = td->length + size;
-
- if( new_length >= td->capacity ) {
- unsigned new_capacity = td->capacity;
-
- do {
- if( new_capacity == 0 ) {
- new_capacity = 256;
- }
- else {
- new_capacity *= 2;
- }
- } while( new_length >= new_capacity );
- td->text = (char *) REALLOC(
- td->text,
- td->capacity,
- new_capacity );
- td->capacity = new_capacity;
- }
- memcpy(
- &td->text[td->length],
- data,
- size );
- td->length = new_length;
- td->text[td->length] = '\0';
-}
-
-struct file_dump
-{
- struct gen_dump base;
- FILE *file;
-};
-
-static void
-_file_dump_write(
- struct gen_dump *dump,
- const void *data,
- unsigned size )
-{
- struct file_dump *fd = (struct file_dump *) dump;
-
-#if 0
- fwrite( data, 1, size, fd->file );
-#else
- {
- unsigned i;
-
- for (i = 0; i < size; i++ ) {
- fprintf( fd->file, "%c", ((const char *) data)[i] );
- }
- }
-#endif
-}
-
-static void
-gen_dump_str(
- struct gen_dump *dump,
- const char *str )
-{
- unsigned i;
- size_t len = strlen( str );
-
- for (i = 0; i < len; i++) {
- dump->write( dump, &str[i], 1 );
- if (str[i] == '\n') {
- unsigned i;
-
- for (i = 0; i < dump->tabs; i++) {
- dump->write( dump, " ", 4 );
- }
- }
- }
-}
-
-static void
-gen_dump_chr(
- struct gen_dump *dump,
- const char chr )
-{
- dump->write( dump, &chr, 1 );
-}
-
-static void
-gen_dump_uix(
- struct gen_dump *dump,
- const unsigned ui )
-{
- char str[36];
-
- sprintf( str, "0x%x", ui );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_uid(
- struct gen_dump *dump,
- const unsigned ui )
-{
- char str[16];
-
- sprintf( str, "%u", ui );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_sid(
- struct gen_dump *dump,
- const int si )
-{
- char str[16];
-
- sprintf( str, "%d", si );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_flt(
- struct gen_dump *dump,
- const float flt )
-{
- char str[48];
-
- sprintf( str, "%10.4f", flt );
- gen_dump_str( dump, str );
-}
-
-static void
-gen_dump_enum(
- struct gen_dump *dump,
- const unsigned e,
- const char **enums,
- const unsigned enums_count )
-{
- if (e >= enums_count) {
- gen_dump_uid( dump, e );
- }
- else {
- gen_dump_str( dump, enums[e] );
- }
-}
-
-static void
-gen_dump_tab(
- struct gen_dump *dump )
-{
- ++dump->tabs;
-}
-
-static void
-gen_dump_untab(
- struct gen_dump *dump )
-{
- assert( dump->tabs > 0 );
-
- --dump->tabs;
-}
-
-#define TXT(S) gen_dump_str( dump, S )
-#define CHR(C) gen_dump_chr( dump, C )
-#define UIX(I) gen_dump_uix( dump, I )
-#define UID(I) gen_dump_uid( dump, I )
-#define SID(I) gen_dump_sid( dump, I )
-#define FLT(F) gen_dump_flt( dump, F )
-#define TAB() gen_dump_tab( dump )
-#define UNT() gen_dump_untab( dump )
-#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
-
-static const char *TGSI_PROCESSOR_TYPES[] =
-{
- "PROCESSOR_FRAGMENT",
- "PROCESSOR_VERTEX",
- "PROCESSOR_GEOMETRY"
-};
-
-static const char *TGSI_PROCESSOR_TYPES_SHORT[] =
-{
- "FRAG",
- "VERT",
- "GEOM"
-};
-
-static const char *TGSI_TOKEN_TYPES[] =
-{
- "TOKEN_TYPE_DECLARATION",
- "TOKEN_TYPE_IMMEDIATE",
- "TOKEN_TYPE_INSTRUCTION"
-};
-
-static const char *TGSI_FILES[] =
-{
- "FILE_NULL",
- "FILE_CONSTANT",
- "FILE_INPUT",
- "FILE_OUTPUT",
- "FILE_TEMPORARY",
- "FILE_SAMPLER",
- "FILE_ADDRESS",
- "FILE_IMMEDIATE"
-};
-
-static const char *TGSI_FILES_SHORT[] =
-{
- "NULL",
- "CONST",
- "IN",
- "OUT",
- "TEMP",
- "SAMP",
- "ADDR",
- "IMM"
-};
-
-static const char *TGSI_DECLARES[] =
-{
- "DECLARE_RANGE",
- "DECLARE_MASK"
-};
-
-static const char *TGSI_INTERPOLATES[] =
-{
- "INTERPOLATE_CONSTANT",
- "INTERPOLATE_LINEAR",
- "INTERPOLATE_PERSPECTIVE",
- "INTERPOLATE_ATTRIB"
-};
-
-static const char *TGSI_INTERPOLATES_SHORT[] =
-{
- "CONSTANT",
- "LINEAR",
- "PERSPECTIVE",
- "ATTRIB"
-};
-
-static const char *TGSI_SEMANTICS[] =
-{
- "SEMANTIC_POSITION",
- "SEMANTIC_COLOR",
- "SEMANTIC_BCOLOR",
- "SEMANTIC_FOG",
- "SEMANTIC_PSIZE",
- "SEMANTIC_GENERIC,"
-};
-
-static const char *TGSI_SEMANTICS_SHORT[] =
-{
- "POSITION",
- "COLOR",
- "BCOLOR",
- "FOG",
- "PSIZE",
- "GENERIC",
-};
-
-static const char *TGSI_IMMS[] =
-{
- "IMM_FLOAT32"
-};
-
-static const char *TGSI_IMMS_SHORT[] =
-{
- "FLT32"
-};
-
-static const char *TGSI_OPCODES[] =
-{
- "OPCODE_ARL",
- "OPCODE_MOV",
- "OPCODE_LIT",
- "OPCODE_RCP",
- "OPCODE_RSQ",
- "OPCODE_EXP",
- "OPCODE_LOG",
- "OPCODE_MUL",
- "OPCODE_ADD",
- "OPCODE_DP3",
- "OPCODE_DP4",
- "OPCODE_DST",
- "OPCODE_MIN",
- "OPCODE_MAX",
- "OPCODE_SLT",
- "OPCODE_SGE",
- "OPCODE_MAD",
- "OPCODE_SUB",
- "OPCODE_LERP",
- "OPCODE_CND",
- "OPCODE_CND0",
- "OPCODE_DOT2ADD",
- "OPCODE_INDEX",
- "OPCODE_NEGATE",
- "OPCODE_FRAC",
- "OPCODE_CLAMP",
- "OPCODE_FLOOR",
- "OPCODE_ROUND",
- "OPCODE_EXPBASE2",
- "OPCODE_LOGBASE2",
- "OPCODE_POWER",
- "OPCODE_CROSSPRODUCT",
- "OPCODE_MULTIPLYMATRIX",
- "OPCODE_ABS",
- "OPCODE_RCC",
- "OPCODE_DPH",
- "OPCODE_COS",
- "OPCODE_DDX",
- "OPCODE_DDY",
- "OPCODE_KILP",
- "OPCODE_PK2H",
- "OPCODE_PK2US",
- "OPCODE_PK4B",
- "OPCODE_PK4UB",
- "OPCODE_RFL",
- "OPCODE_SEQ",
- "OPCODE_SFL",
- "OPCODE_SGT",
- "OPCODE_SIN",
- "OPCODE_SLE",
- "OPCODE_SNE",
- "OPCODE_STR",
- "OPCODE_TEX",
- "OPCODE_TXD",
- "OPCODE_UP2H",
- "OPCODE_UP2US",
- "OPCODE_UP4B",
- "OPCODE_UP4UB",
- "OPCODE_X2D",
- "OPCODE_ARA",
- "OPCODE_ARR",
- "OPCODE_BRA",
- "OPCODE_CAL",
- "OPCODE_RET",
- "OPCODE_SSG",
- "OPCODE_CMP",
- "OPCODE_SCS",
- "OPCODE_TXB",
- "OPCODE_NRM",
- "OPCODE_DIV",
- "OPCODE_DP2",
- "OPCODE_TXL",
- "OPCODE_BRK",
- "OPCODE_IF",
- "OPCODE_LOOP",
- "OPCODE_REP",
- "OPCODE_ELSE",
- "OPCODE_ENDIF",
- "OPCODE_ENDLOOP",
- "OPCODE_ENDREP",
- "OPCODE_PUSHA",
- "OPCODE_POPA",
- "OPCODE_CEIL",
- "OPCODE_I2F",
- "OPCODE_NOT",
- "OPCODE_TRUNC",
- "OPCODE_SHL",
- "OPCODE_SHR",
- "OPCODE_AND",
- "OPCODE_OR",
- "OPCODE_MOD",
- "OPCODE_XOR",
- "OPCODE_SAD",
- "OPCODE_TXF",
- "OPCODE_TXQ",
- "OPCODE_CONT",
- "OPCODE_EMIT",
- "OPCODE_ENDPRIM",
- "OPCODE_BGNLOOP2",
- "OPCODE_BGNSUB",
- "OPCODE_ENDLOOP2",
- "OPCODE_ENDSUB",
- "OPCODE_NOISE1",
- "OPCODE_NOISE2",
- "OPCODE_NOISE3",
- "OPCODE_NOISE4",
- "OPCODE_NOP",
- "OPCODE_TEXBEM",
- "OPCODE_TEXBEML",
- "OPCODE_TEXREG2AR",
- "OPCODE_TEXM3X2PAD",
- "OPCODE_TEXM3X2TEX",
- "OPCODE_TEXM3X3PAD",
- "OPCODE_TEXM3X3TEX",
- "OPCODE_TEXM3X3SPEC",
- "OPCODE_TEXM3X3VSPEC",
- "OPCODE_TEXREG2GB",
- "OPCODE_TEXREG2RGB",
- "OPCODE_TEXDP3TEX",
- "OPCODE_TEXDP3",
- "OPCODE_TEXM3X3",
- "OPCODE_TEXM3X2DEPTH",
- "OPCODE_TEXDEPTH",
- "OPCODE_BEM",
- "OPCODE_M4X3",
- "OPCODE_M3X4",
- "OPCODE_M3X3",
- "OPCODE_M3X2",
- "OPCODE_NRM4",
- "OPCODE_CALLNZ",
- "OPCODE_IFC",
- "OPCODE_BREAKC",
- "OPCODE_KIL",
- "OPCODE_END"
-};
-
-static const char *TGSI_OPCODES_SHORT[] =
-{
- "ARL",
- "MOV",
- "LIT",
- "RCP",
- "RSQ",
- "EXP",
- "LOG",
- "MUL",
- "ADD",
- "DP3",
- "DP4",
- "DST",
- "MIN",
- "MAX",
- "SLT",
- "SGE",
- "MAD",
- "SUB",
- "LERP",
- "CND",
- "CND0",
- "DOT2ADD",
- "INDEX",
- "NEGATE",
- "FRAC",
- "CLAMP",
- "FLOOR",
- "ROUND",
- "EXPBASE2",
- "LOGBASE2",
- "POWER",
- "CROSSPRODUCT",
- "MULTIPLYMATRIX",
- "ABS",
- "RCC",
- "DPH",
- "COS",
- "DDX",
- "DDY",
- "KILP",
- "PK2H",
- "PK2US",
- "PK4B",
- "PK4UB",
- "RFL",
- "SEQ",
- "SFL",
- "SGT",
- "SIN",
- "SLE",
- "SNE",
- "STR",
- "TEX",
- "TXD",
- "UP2H",
- "UP2US",
- "UP4B",
- "UP4UB",
- "X2D",
- "ARA",
- "ARR",
- "BRA",
- "CAL",
- "RET",
- "SSG",
- "CMP",
- "SCS",
- "TXB",
- "NRM",
- "DIV",
- "DP2",
- "TXL",
- "BRK",
- "IF",
- "LOOP",
- "REP",
- "ELSE",
- "ENDIF",
- "ENDLOOP",
- "ENDREP",
- "PUSHA",
- "POPA",
- "CEIL",
- "I2F",
- "NOT",
- "TRUNC",
- "SHL",
- "SHR",
- "AND",
- "OR",
- "MOD",
- "XOR",
- "SAD",
- "TXF",
- "TXQ",
- "CONT",
- "EMIT",
- "ENDPRIM",
- "BGNLOOP2",
- "BGNSUB",
- "ENDLOOP2",
- "ENDSUB",
- "NOISE1",
- "NOISE2",
- "NOISE3",
- "NOISE4",
- "NOP",
- "TEXBEM",
- "TEXBEML",
- "TEXREG2AR",
- "TEXM3X2PAD",
- "TEXM3X2TEX",
- "TEXM3X3PAD",
- "TEXM3X3TEX",
- "TEXM3X3SPEC",
- "TEXM3X3VSPEC",
- "TEXREG2GB",
- "TEXREG2RGB",
- "TEXDP3TEX",
- "TEXDP3",
- "TEXM3X3",
- "TEXM3X2DEPTH",
- "TEXDEPTH",
- "BEM",
- "M4X3",
- "M3X4",
- "M3X3",
- "M3X2",
- "NRM4",
- "CALLNZ",
- "IFC",
- "BREAKC",
- "KIL",
- "END"
-};
-
-static const char *TGSI_SATS[] =
-{
- "SAT_NONE",
- "SAT_ZERO_ONE",
- "SAT_MINUS_PLUS_ONE"
-};
-
-static const char *TGSI_INSTRUCTION_EXTS[] =
-{
- "INSTRUCTION_EXT_TYPE_NV",
- "INSTRUCTION_EXT_TYPE_LABEL",
- "INSTRUCTION_EXT_TYPE_TEXTURE"
-};
-
-static const char *TGSI_PRECISIONS[] =
-{
- "PRECISION_DEFAULT",
- "TGSI_PRECISION_FLOAT32",
- "TGSI_PRECISION_FLOAT16",
- "TGSI_PRECISION_FIXED12"
-};
-
-static const char *TGSI_CCS[] =
-{
- "CC_GT",
- "CC_EQ",
- "CC_LT",
- "CC_UN",
- "CC_GE",
- "CC_LE",
- "CC_NE",
- "CC_TR",
- "CC_FL"
-};
-
-static const char *TGSI_SWIZZLES[] =
-{
- "SWIZZLE_X",
- "SWIZZLE_Y",
- "SWIZZLE_Z",
- "SWIZZLE_W"
-};
-
-static const char *TGSI_SWIZZLES_SHORT[] =
-{
- "x",
- "y",
- "z",
- "w"
-};
-
-static const char *TGSI_TEXTURES[] =
-{
- "TEXTURE_UNKNOWN",
- "TEXTURE_1D",
- "TEXTURE_2D",
- "TEXTURE_3D",
- "TEXTURE_CUBE",
- "TEXTURE_RECT",
- "TEXTURE_SHADOW1D",
- "TEXTURE_SHADOW2D",
- "TEXTURE_SHADOWRECT"
-};
-
-static const char *TGSI_SRC_REGISTER_EXTS[] =
-{
- "SRC_REGISTER_EXT_TYPE_SWZ",
- "SRC_REGISTER_EXT_TYPE_MOD"
-};
-
-static const char *TGSI_EXTSWIZZLES[] =
-{
- "EXTSWIZZLE_X",
- "EXTSWIZZLE_Y",
- "EXTSWIZZLE_Z",
- "EXTSWIZZLE_W",
- "EXTSWIZZLE_ZERO",
- "EXTSWIZZLE_ONE"
-};
-
-static const char *TGSI_EXTSWIZZLES_SHORT[] =
-{
- "x",
- "y",
- "z",
- "w",
- "0",
- "1"
-};
-
-static const char *TGSI_WRITEMASKS[] =
-{
- "0",
- "WRITEMASK_X",
- "WRITEMASK_Y",
- "WRITEMASK_XY",
- "WRITEMASK_Z",
- "WRITEMASK_XZ",
- "WRITEMASK_YZ",
- "WRITEMASK_XYZ",
- "WRITEMASK_W",
- "WRITEMASK_XW",
- "WRITEMASK_YW",
- "WRITEMASK_XYW",
- "WRITEMASK_ZW",
- "WRITEMASK_XZW",
- "WRITEMASK_YZW",
- "WRITEMASK_XYZW"
-};
-
-static const char *TGSI_DST_REGISTER_EXTS[] =
-{
- "DST_REGISTER_EXT_TYPE_CONDCODE",
- "DST_REGISTER_EXT_TYPE_MODULATE"
-};
-
-static const char *TGSI_MODULATES[] =
-{
- "MODULATE_1X",
- "MODULATE_2X",
- "MODULATE_4X",
- "MODULATE_8X",
- "MODULATE_HALF",
- "MODULATE_QUARTER",
- "MODULATE_EIGHTH"
-};
-
-static void
-dump_declaration_short(
- struct gen_dump *dump,
- struct tgsi_full_declaration *decl )
-{
- TXT( "\nDCL " );
- ENM( decl->Declaration.File, TGSI_FILES_SHORT );
-
- switch( decl->Declaration.Declare ) {
- case TGSI_DECLARE_RANGE:
- CHR( '[' );
- UID( decl->u.DeclarationRange.First );
- if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) {
- TXT( ".." );
- UID( decl->u.DeclarationRange.Last );
- }
- CHR( ']' );
- break;
- default:
- assert( 0 );
- }
-
- if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) {
- CHR( '.' );
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
- CHR( 'x' );
- }
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
- CHR( 'y' );
- }
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
- CHR( 'z' );
- }
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
- CHR( 'w' );
- }
- }
-
- if( decl->Declaration.Interpolate ) {
- TXT( ", " );
- ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
- }
-
- if( decl->Declaration.Semantic ) {
- TXT( ", " );
- ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
- CHR( '[' );
- UID( decl->Semantic.SemanticIndex );
- CHR( ']' );
- }
-}
-
-static void
-dump_declaration_verbose(
- struct gen_dump *dump,
- struct tgsi_full_declaration *decl,
- unsigned ignored,
- unsigned deflt,
- struct tgsi_full_declaration *fd )
-{
- TXT( "\nFile : " );
- ENM( decl->Declaration.File, TGSI_FILES );
- TXT( "\nDeclare : " );
- ENM( decl->Declaration.Declare, TGSI_DECLARES );
- if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
- TXT( "\nUsageMask : " );
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
- CHR( 'X' );
- }
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
- CHR( 'Y' );
- }
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
- CHR( 'Z' );
- }
- if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
- CHR( 'W' );
- }
- }
- if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
- TXT( "\nInterpolate: " );
- UID( decl->Declaration.Interpolate );
- }
- if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
- TXT( "\nSemantic : " );
- UID( decl->Declaration.Semantic );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( decl->Declaration.Padding );
- }
-
- CHR( '\n' );
- switch( decl->Declaration.Declare ) {
- case TGSI_DECLARE_RANGE:
- TXT( "\nFirst: " );
- UID( decl->u.DeclarationRange.First );
- TXT( "\nLast : " );
- UID( decl->u.DeclarationRange.Last );
- break;
-
- case TGSI_DECLARE_MASK:
- TXT( "\nMask: " );
- UIX( decl->u.DeclarationMask.Mask );
- break;
-
- default:
- assert( 0 );
- }
-
- if( decl->Declaration.Interpolate ) {
- CHR( '\n' );
- TXT( "\nInterpolate: " );
- ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( decl->Interpolation.Padding );
- }
- }
-
- if( decl->Declaration.Semantic ) {
- CHR( '\n' );
- TXT( "\nSemanticName : " );
- ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
- TXT( "\nSemanticIndex: " );
- UID( decl->Semantic.SemanticIndex );
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( decl->Semantic.Padding );
- }
- }
-}
-
-static void
-dump_immediate_short(
- struct gen_dump *dump,
- struct tgsi_full_immediate *imm )
-{
- unsigned i;
-
- TXT( "\nIMM " );
- ENM( imm->Immediate.DataType, TGSI_IMMS_SHORT );
-
- TXT( " { " );
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- switch( imm->Immediate.DataType ) {
- case TGSI_IMM_FLOAT32:
- FLT( imm->u.ImmediateFloat32[i].Float );
- break;
-
- default:
- assert( 0 );
- }
-
- if( i < imm->Immediate.Size - 2 ) {
- TXT( ", " );
- }
- }
- TXT( " }" );
-}
-
-static void
-dump_immediate_verbose(
- struct gen_dump *dump,
- struct tgsi_full_immediate *imm,
- unsigned ignored )
-{
- unsigned i;
-
- TXT( "\nDataType : " );
- ENM( imm->Immediate.DataType, TGSI_IMMS );
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( imm->Immediate.Padding );
- }
-
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- CHR( '\n' );
- switch( imm->Immediate.DataType ) {
- case TGSI_IMM_FLOAT32:
- TXT( "\nFloat: " );
- FLT( imm->u.ImmediateFloat32[i].Float );
- break;
-
- default:
- assert( 0 );
- }
- }
-}
-
-static void
-dump_instruction_short(
- struct gen_dump *dump,
- struct tgsi_full_instruction *inst,
- unsigned instno )
-{
- unsigned i;
- boolean first_reg = TRUE;
-
- CHR( '\n' );
- UID( instno );
- CHR( ':' );
- ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
-
- switch( inst->Instruction.Saturate ) {
- case TGSI_SAT_NONE:
- break;
- case TGSI_SAT_ZERO_ONE:
- TXT( "_SAT" );
- break;
- case TGSI_SAT_MINUS_PLUS_ONE:
- TXT( "_SAT[-1,1]" );
- break;
- default:
- assert( 0 );
- }
-
- for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
- struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
- if( !first_reg ) {
- CHR( ',' );
- }
- CHR( ' ' );
-
- ENM( dst->DstRegister.File, TGSI_FILES_SHORT );
-
- CHR( '[' );
- SID( dst->DstRegister.Index );
- CHR( ']' );
-
- if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
- CHR( '.' );
- if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
- CHR( 'x' );
- }
- if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Y ) {
- CHR( 'y' );
- }
- if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Z ) {
- CHR( 'z' );
- }
- if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_W ) {
- CHR( 'w' );
- }
- }
-
- first_reg = FALSE;
- }
-
- for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
- struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
-
- if( !first_reg ) {
- CHR( ',' );
- }
- CHR( ' ' );
-
- if( src->SrcRegisterExtMod.Complement ) {
- TXT( "(1 - " );
- }
- if( src->SrcRegisterExtMod.Negate ) {
- CHR( '-' );
- }
- if( src->SrcRegisterExtMod.Absolute ) {
- CHR( '|' );
- }
- if( src->SrcRegister.Negate ) {
- CHR( '-' );
- }
-
- ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
-
- CHR( '[' );
- SID( src->SrcRegister.Index );
- CHR( ']' );
-
- if (src->SrcRegister.Extended) {
- if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
- src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
- src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
- src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
- CHR( '.' );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
- }
- }
- else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
- src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
- src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
- src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) {
- CHR( '.' );
- ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT );
- ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT );
- ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT );
- ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT );
- }
-
- if( src->SrcRegisterExtMod.Absolute ) {
- CHR( '|' );
- }
- if( src->SrcRegisterExtMod.Complement ) {
- CHR( ')' );
- }
-
- first_reg = FALSE;
- }
-
- switch( inst->Instruction.Opcode ) {
- case TGSI_OPCODE_IF:
- case TGSI_OPCODE_ELSE:
- case TGSI_OPCODE_BGNLOOP2:
- case TGSI_OPCODE_ENDLOOP2:
- case TGSI_OPCODE_CAL:
- TXT( " :" );
- UID( inst->InstructionExtLabel.Label );
- break;
- }
-}
-
-static void
-dump_instruction_verbose(
- struct gen_dump *dump,
- struct tgsi_full_instruction *inst,
- unsigned ignored,
- unsigned deflt,
- struct tgsi_full_instruction *fi )
-{
- unsigned i;
-
- TXT( "\nOpcode : " );
- ENM( inst->Instruction.Opcode, TGSI_OPCODES );
- if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
- TXT( "\nSaturate : " );
- ENM( inst->Instruction.Saturate, TGSI_SATS );
- }
- if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
- TXT( "\nNumDstRegs : " );
- UID( inst->Instruction.NumDstRegs );
- }
- if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
- TXT( "\nNumSrcRegs : " );
- UID( inst->Instruction.NumSrcRegs );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( inst->Instruction.Padding );
- }
-
- if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
- if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
- TXT( "\nPrecision : " );
- ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
- }
- if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
- TXT( "\nCondDstIndex : " );
- UID( inst->InstructionExtNv.CondDstIndex );
- }
- if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
- TXT( "\nCondFlowIndex : " );
- UID( inst->InstructionExtNv.CondFlowIndex );
- }
- if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
- TXT( "\nCondMask : " );
- ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
- }
- if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
- TXT( "\nCondSwizzleX : " );
- ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
- }
- if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
- TXT( "\nCondSwizzleY : " );
- ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
- }
- if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
- TXT( "\nCondSwizzleZ : " );
- ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
- }
- if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
- TXT( "\nCondSwizzleW : " );
- ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
- }
- if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
- TXT( "\nCondDstUpdate : " );
- UID( inst->InstructionExtNv.CondDstUpdate );
- }
- if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
- TXT( "\nCondFlowEnable: " );
- UID( inst->InstructionExtNv.CondFlowEnable );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( inst->InstructionExtNv.Padding );
- if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
- TXT( "\nExtended : " );
- UID( inst->InstructionExtNv.Extended );
- }
- }
- }
-
- if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
- if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
- TXT( "\nLabel : " );
- UID( inst->InstructionExtLabel.Label );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( inst->InstructionExtLabel.Padding );
- if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
- TXT( "\nExtended: " );
- UID( inst->InstructionExtLabel.Extended );
- }
- }
- }
-
- if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
- if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
- TXT( "\nTexture : " );
- ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( inst->InstructionExtTexture.Padding );
- if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
- TXT( "\nExtended: " );
- UID( inst->InstructionExtTexture.Extended );
- }
- }
- }
-
- for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
- struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
- struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
-
- CHR( '\n' );
- TXT( "\nFile : " );
- ENM( dst->DstRegister.File, TGSI_FILES );
- if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
- TXT( "\nWriteMask: " );
- ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
- }
- if( ignored ) {
- if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
- TXT( "\nIndirect : " );
- UID( dst->DstRegister.Indirect );
- }
- if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
- TXT( "\nDimension: " );
- UID( dst->DstRegister.Dimension );
- }
- }
- if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
- TXT( "\nIndex : " );
- SID( dst->DstRegister.Index );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( dst->DstRegister.Padding );
- if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
- TXT( "\nExtended : " );
- UID( dst->DstRegister.Extended );
- }
- }
-
- if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
- if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
- TXT( "\nCondMask : " );
- ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
- }
- if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
- TXT( "\nCondSwizzleX: " );
- ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
- }
- if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
- TXT( "\nCondSwizzleY: " );
- ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
- }
- if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
- TXT( "\nCondSwizzleZ: " );
- ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
- }
- if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
- TXT( "\nCondSwizzleW: " );
- ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
- }
- if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
- TXT( "\nCondSrcIndex: " );
- UID( dst->DstRegisterExtConcode.CondSrcIndex );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( dst->DstRegisterExtConcode.Padding );
- if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
- TXT( "\nExtended : " );
- UID( dst->DstRegisterExtConcode.Extended );
- }
- }
- }
-
- if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
- if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
- TXT( "\nModulate: " );
- ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( dst->DstRegisterExtModulate.Padding );
- if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
- TXT( "\nExtended: " );
- UID( dst->DstRegisterExtModulate.Extended );
- }
- }
- }
- }
-
- for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
- struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
- struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
-
- CHR( '\n' );
- TXT( "\nFile : ");
- ENM( src->SrcRegister.File, TGSI_FILES );
- if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
- TXT( "\nSwizzleX : " );
- ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
- }
- if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
- TXT( "\nSwizzleY : " );
- ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
- }
- if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
- TXT( "\nSwizzleZ : " );
- ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
- }
- if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
- TXT( "\nSwizzleW : " );
- ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
- }
- if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
- TXT( "\nNegate : " );
- UID( src->SrcRegister.Negate );
- }
- if( ignored ) {
- if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
- TXT( "\nIndirect : " );
- UID( src->SrcRegister.Indirect );
- }
- if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
- TXT( "\nDimension: " );
- UID( src->SrcRegister.Dimension );
- }
- }
- if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
- TXT( "\nIndex : " );
- SID( src->SrcRegister.Index );
- }
- if( ignored ) {
- if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
- TXT( "\nExtended : " );
- UID( src->SrcRegister.Extended );
- }
- }
-
- if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
- if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
- TXT( "\nExtSwizzleX: " );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
- }
- if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
- TXT( "\nExtSwizzleY: " );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
- }
- if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
- TXT( "\nExtSwizzleZ: " );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
- }
- if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
- TXT( "\nExtSwizzleW: " );
- ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
- }
- if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
- TXT( "\nNegateX : " );
- UID( src->SrcRegisterExtSwz.NegateX );
- }
- if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
- TXT( "\nNegateY : " );
- UID( src->SrcRegisterExtSwz.NegateY );
- }
- if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
- TXT( "\nNegateZ : " );
- UID( src->SrcRegisterExtSwz.NegateZ );
- }
- if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
- TXT( "\nNegateW : " );
- UID( src->SrcRegisterExtSwz.NegateW );
- }
- if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) {
- TXT( "\nExtDivide : " );
- ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( src->SrcRegisterExtSwz.Padding );
- if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
- TXT( "\nExtended : " );
- UID( src->SrcRegisterExtSwz.Extended );
- }
- }
- }
-
- if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
- CHR( '\n' );
- TXT( "\nType : " );
- ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
- if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
- TXT( "\nComplement: " );
- UID( src->SrcRegisterExtMod.Complement );
- }
- if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
- TXT( "\nBias : " );
- UID( src->SrcRegisterExtMod.Bias );
- }
- if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
- TXT( "\nScale2X : " );
- UID( src->SrcRegisterExtMod.Scale2X );
- }
- if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
- TXT( "\nAbsolute : " );
- UID( src->SrcRegisterExtMod.Absolute );
- }
- if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
- TXT( "\nNegate : " );
- UID( src->SrcRegisterExtMod.Negate );
- }
- if( ignored ) {
- TXT( "\nPadding : " );
- UIX( src->SrcRegisterExtMod.Padding );
- if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
- TXT( "\nExtended : " );
- UID( src->SrcRegisterExtMod.Extended );
- }
- }
- }
- }
-}
-
-static void
-dump_gen(
- struct gen_dump *dump,
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct tgsi_parse_context parse;
- struct tgsi_full_instruction fi;
- struct tgsi_full_declaration fd;
- unsigned verbose = flags & TGSI_DUMP_VERBOSE;
- unsigned ignored = !(flags & TGSI_DUMP_NO_IGNORED);
- unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
- unsigned instno = 0;
-
- dump->tabs = 0;
-
- /* sanity check */
- assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
-
- tgsi_parse_init( &parse, tokens );
-
- TXT( "tgsi-dump begin -----------------" );
-
- CHR( '\n' );
- ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
- CHR( ' ' );
- UID( parse.FullVersion.Version.MajorVersion );
- CHR( '.' );
- UID( parse.FullVersion.Version.MinorVersion );
-
- if( verbose ) {
- TXT( "\nMajorVersion: " );
- UID( parse.FullVersion.Version.MajorVersion );
- TXT( "\nMinorVersion: " );
- UID( parse.FullVersion.Version.MinorVersion );
- CHR( '\n' );
-
- TXT( "\nHeaderSize: " );
- UID( parse.FullHeader.Header.HeaderSize );
- TXT( "\nBodySize : " );
- UID( parse.FullHeader.Header.BodySize );
- TXT( "\nProcessor : " );
- ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
- CHR( '\n' );
- }
-
- fi = tgsi_default_full_instruction();
- fd = tgsi_default_full_declaration();
-
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- dump_declaration_short(
- dump,
- &parse.FullToken.FullDeclaration );
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- dump_immediate_short(
- dump,
- &parse.FullToken.FullImmediate );
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- dump_instruction_short(
- dump,
- &parse.FullToken.FullInstruction,
- instno );
- instno++;
- break;
-
- default:
- assert( 0 );
- }
-
- if( verbose ) {
- TXT( "\nType : " );
- ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
- if( ignored ) {
- TXT( "\nSize : " );
- UID( parse.FullToken.Token.Size );
- if( deflt || parse.FullToken.Token.Extended ) {
- TXT( "\nExtended : " );
- UID( parse.FullToken.Token.Extended );
- }
- }
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- dump_declaration_verbose(
- dump,
- &parse.FullToken.FullDeclaration,
- ignored,
- deflt,
- &fd );
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- dump_immediate_verbose(
- dump,
- &parse.FullToken.FullImmediate,
- ignored );
- break;
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- dump_instruction_verbose(
- dump,
- &parse.FullToken.FullInstruction,
- ignored,
- deflt,
- &fi );
- break;
-
- default:
- assert( 0 );
- }
-
- CHR( '\n' );
- }
- }
-
- TXT( "\ntgsi-dump end -------------------\n" );
-
- tgsi_parse_free( &parse );
-}
-
-
-static void
-sanity_checks(void)
-{
- assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
- assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
-}
-
-
-void
-tgsi_dump(
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct file_dump dump;
-
- sanity_checks();
-
- dump.base.write = _file_dump_write;
-#if 0
- {
- static unsigned counter = 0;
- char buffer[64];
- sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ );
- dump.file = fopen( buffer, "wt" );
- }
-#else
- dump.file = stderr;
-#endif
-
- dump_gen(
- &dump.base,
- tokens,
- flags );
-
-#if 0
- fclose( dump.file );
-#endif
-}
-
-void
-tgsi_dump_str(
- char **str,
- const struct tgsi_token *tokens,
- unsigned flags )
-{
- struct text_dump dump;
-
- dump.base.write = _text_dump_write;
- dump.text = NULL;
- dump.length = 0;
- dump.capacity = 0;
-
- dump_gen(
- &dump.base,
- tokens,
- flags );
-
- *str = dump.text;
-}
+++ /dev/null
-#if !defined TGSI_DUMP_H
-#define TGSI_DUMP_H
-
-#if defined __cplusplus
-extern "C" {
-#endif // defined __cplusplus
-
-#define TGSI_DUMP_VERBOSE 1
-#define TGSI_DUMP_NO_IGNORED 2
-#define TGSI_DUMP_NO_DEFAULT 4
-
-void
-tgsi_dump(
- const struct tgsi_token *tokens,
- unsigned flags );
-
-void
-tgsi_dump_str(
- char **str,
- const struct tgsi_token *tokens,
- unsigned flags );
-
-#if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
-
-#endif // !defined TGSI_DUMP_H
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
-
-void
-tgsi_full_token_init(
- union tgsi_full_token *full_token )
-{
- full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION;
-}
-
-void
-tgsi_full_token_free(
- union tgsi_full_token *full_token )
-{
- if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
- FREE( full_token->FullImmediate.u.Pointer );
- }
-}
-
-unsigned
-tgsi_parse_init(
- struct tgsi_parse_context *ctx,
- const struct tgsi_token *tokens )
-{
- ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0];
- if( ctx->FullVersion.Version.MajorVersion > 1 ) {
- return TGSI_PARSE_ERROR;
- }
-
- ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1];
- if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
- ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2];
- }
- else {
- ctx->FullHeader.Processor = tgsi_default_processor();
- }
-
- ctx->Tokens = tokens;
- ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize;
-
- tgsi_full_token_init( &ctx->FullToken );
-
- return TGSI_PARSE_OK;
-}
-
-void
-tgsi_parse_free(
- struct tgsi_parse_context *ctx )
-{
- tgsi_full_token_free( &ctx->FullToken );
-}
-
-boolean
-tgsi_parse_end_of_tokens(
- struct tgsi_parse_context *ctx )
-{
- return ctx->Position >=
- 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
-}
-
-static void
-next_token(
- struct tgsi_parse_context *ctx,
- void *token )
-{
- assert( !tgsi_parse_end_of_tokens( ctx ) );
-
- *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
-}
-
-void
-tgsi_parse_token(
- struct tgsi_parse_context *ctx )
-{
- struct tgsi_token token;
- unsigned i;
-
- tgsi_full_token_free( &ctx->FullToken );
- tgsi_full_token_init( &ctx->FullToken );
-
- next_token( ctx, &token );
-
- switch( token.Type ) {
- case TGSI_TOKEN_TYPE_DECLARATION:
- {
- struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
-
- *decl = tgsi_default_full_declaration();
- decl->Declaration = *(struct tgsi_declaration *) &token;
-
- switch( decl->Declaration.Type ) {
- case TGSI_DECLARE_RANGE:
- next_token( ctx, &decl->u.DeclarationRange );
- break;
-
- case TGSI_DECLARE_MASK:
- next_token( ctx, &decl->u.DeclarationMask );
- break;
-
- default:
- assert (0);
- }
-
- if( decl->Declaration.Interpolate ) {
- next_token( ctx, &decl->Interpolation );
- }
-
- if( decl->Declaration.Semantic ) {
- next_token( ctx, &decl->Semantic );
- }
-
- break;
- }
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
-
- *imm = tgsi_default_full_immediate();
- imm->Immediate = *(struct tgsi_immediate *) &token;
-
- assert( !imm->Immediate.Extended );
-
- switch (imm->Immediate.DataType) {
- case TGSI_IMM_FLOAT32:
- imm->u.Pointer = MALLOC(
- sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
- for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
- next_token( ctx, &imm->u.ImmediateFloat32[i] );
- }
- break;
-
- default:
- assert( 0 );
- }
-
- break;
- }
-
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- {
- struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction;
- unsigned extended;
-
- *inst = tgsi_default_full_instruction();
- inst->Instruction = *(struct tgsi_instruction *) &token;
-
- extended = inst->Instruction.Extended;
-
- while( extended ) {
- struct tgsi_src_register_ext token;
-
- next_token( ctx, &token );
-
- switch( token.Type ) {
- case TGSI_INSTRUCTION_EXT_TYPE_NV:
- inst->InstructionExtNv =
- *(struct tgsi_instruction_ext_nv *) &token;
- break;
-
- case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
- inst->InstructionExtLabel =
- *(struct tgsi_instruction_ext_label *) &token;
- break;
-
- case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
- inst->InstructionExtTexture =
- *(struct tgsi_instruction_ext_texture *) &token;
- break;
-
- default:
- assert( 0 );
- }
-
- extended = token.Extended;
- }
-
- assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
-
- for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
- unsigned extended;
-
- next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
-
- /*
- * No support for indirect or multi-dimensional addressing.
- */
- assert( !inst->FullDstRegisters[i].DstRegister.Indirect );
- assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
-
- extended = inst->FullDstRegisters[i].DstRegister.Extended;
-
- while( extended ) {
- struct tgsi_src_register_ext token;
-
- next_token( ctx, &token );
-
- switch( token.Type ) {
- case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
- inst->FullDstRegisters[i].DstRegisterExtConcode =
- *(struct tgsi_dst_register_ext_concode *) &token;
- break;
-
- case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
- inst->FullDstRegisters[i].DstRegisterExtModulate =
- *(struct tgsi_dst_register_ext_modulate *) &token;
- break;
-
- default:
- assert( 0 );
- }
-
- extended = token.Extended;
- }
- }
-
- assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
-
- for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
- unsigned extended;
-
- next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
-
- extended = inst->FullSrcRegisters[i].SrcRegister.Extended;
-
- while( extended ) {
- struct tgsi_src_register_ext token;
-
- next_token( ctx, &token );
-
- switch( token.Type ) {
- case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
- inst->FullSrcRegisters[i].SrcRegisterExtSwz =
- *(struct tgsi_src_register_ext_swz *) &token;
- break;
-
- case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
- inst->FullSrcRegisters[i].SrcRegisterExtMod =
- *(struct tgsi_src_register_ext_mod *) &token;
- break;
-
- default:
- assert( 0 );
- }
-
- extended = token.Extended;
- }
-
- if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
- next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
-
- /*
- * No support for indirect or multi-dimensional addressing.
- */
- assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
- assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
- assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
- }
-
- if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
- next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim );
-
- /*
- * No support for multi-dimensional addressing.
- */
- assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
- assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended );
-
- if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
- next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
-
- /*
- * No support for indirect or multi-dimensional addressing.
- */
- assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
- assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
- assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
- }
- }
- }
-
- break;
- }
-
- default:
- assert( 0 );
- }
-}
-
+++ /dev/null
-#if !defined TGSI_PARSE_H
-#define TGSI_PARSE_H
-
-#if defined __cplusplus
-extern "C" {
-#endif // defined __cplusplus
-
-struct tgsi_full_version
-{
- struct tgsi_version Version;
-};
-
-struct tgsi_full_header
-{
- struct tgsi_header Header;
- struct tgsi_processor Processor;
-};
-
-struct tgsi_full_dst_register
-{
- struct tgsi_dst_register DstRegister;
- struct tgsi_dst_register_ext_concode DstRegisterExtConcode;
- struct tgsi_dst_register_ext_modulate DstRegisterExtModulate;
-};
-
-struct tgsi_full_src_register
-{
- struct tgsi_src_register SrcRegister;
- struct tgsi_src_register_ext_swz SrcRegisterExtSwz;
- struct tgsi_src_register_ext_mod SrcRegisterExtMod;
- struct tgsi_src_register SrcRegisterInd;
- struct tgsi_dimension SrcRegisterDim;
- struct tgsi_src_register SrcRegisterDimInd;
-};
-
-struct tgsi_full_declaration
-{
- struct tgsi_declaration Declaration;
- union
- {
- struct tgsi_declaration_range DeclarationRange;
- struct tgsi_declaration_mask DeclarationMask;
- } u;
- struct tgsi_declaration_interpolation Interpolation;
- struct tgsi_declaration_semantic Semantic;
-};
-
-struct tgsi_full_immediate
-{
- struct tgsi_immediate Immediate;
- union
- {
- void *Pointer;
- struct tgsi_immediate_float32 *ImmediateFloat32;
- } u;
-};
-
-#define TGSI_FULL_MAX_DST_REGISTERS 2
-#define TGSI_FULL_MAX_SRC_REGISTERS 3
-
-struct tgsi_full_instruction
-{
- struct tgsi_instruction Instruction;
- struct tgsi_instruction_ext_nv InstructionExtNv;
- struct tgsi_instruction_ext_label InstructionExtLabel;
- struct tgsi_instruction_ext_texture InstructionExtTexture;
- struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
- struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
-};
-
-union tgsi_full_token
-{
- struct tgsi_token Token;
- struct tgsi_full_declaration FullDeclaration;
- struct tgsi_full_immediate FullImmediate;
- struct tgsi_full_instruction FullInstruction;
-};
-
-void
-tgsi_full_token_init(
- union tgsi_full_token *full_token );
-
-void
-tgsi_full_token_free(
- union tgsi_full_token *full_token );
-
-struct tgsi_parse_context
-{
- const struct tgsi_token *Tokens;
- unsigned Position;
- struct tgsi_full_version FullVersion;
- struct tgsi_full_header FullHeader;
- union tgsi_full_token FullToken;
-};
-
-#define TGSI_PARSE_OK 0
-#define TGSI_PARSE_ERROR 1
-
-unsigned
-tgsi_parse_init(
- struct tgsi_parse_context *ctx,
- const struct tgsi_token *tokens );
-
-void
-tgsi_parse_free(
- struct tgsi_parse_context *ctx );
-
-boolean
-tgsi_parse_end_of_tokens(
- struct tgsi_parse_context *ctx );
-
-void
-tgsi_parse_token(
- struct tgsi_parse_context *ctx );
-
-#if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
-
-#endif // !defined TGSI_PARSE_H
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * TGSI program transformation utility.
- *
- * Authors: Brian Paul
- */
-
-
-#include "tgsi_transform.h"
-
-
-
-static void
-emit_instruction(struct tgsi_transform_context *ctx,
- const struct tgsi_full_instruction *inst)
-{
- uint ti = ctx->ti;
-
- ti += tgsi_build_full_instruction(inst,
- ctx->tokens_out + ti,
- ctx->header,
- ctx->max_tokens_out - ti);
- ctx->ti = ti;
-}
-
-
-static void
-emit_declaration(struct tgsi_transform_context *ctx,
- const struct tgsi_full_declaration *decl)
-{
- uint ti = ctx->ti;
-
- ti += tgsi_build_full_declaration(decl,
- ctx->tokens_out + ti,
- ctx->header,
- ctx->max_tokens_out - ti);
- ctx->ti = ti;
-}
-
-
-static void
-emit_immediate(struct tgsi_transform_context *ctx,
- const struct tgsi_full_immediate *imm)
-{
- uint ti = ctx->ti;
-
- ti += tgsi_build_full_immediate(imm,
- ctx->tokens_out + ti,
- ctx->header,
- ctx->max_tokens_out - ti);
- ctx->ti = ti;
-}
-
-
-
-/**
- * Apply user-defined transformations to the input shader to produce
- * the output shader.
- * For example, a register search-and-replace operation could be applied
- * by defining a transform_instruction() callback that examined and changed
- * the instruction src/dest regs.
- *
- * \return number of tokens emitted
- */
-int
-tgsi_transform_shader(const struct tgsi_token *tokens_in,
- struct tgsi_token *tokens_out,
- uint max_tokens_out,
- struct tgsi_transform_context *ctx)
-{
- uint procType;
-
- /* input shader */
- struct tgsi_parse_context parse;
-
- /* output shader */
- struct tgsi_processor *processor;
-
-
- /**
- ** callback context init
- **/
- ctx->emit_instruction = emit_instruction;
- ctx->emit_declaration = emit_declaration;
- ctx->emit_immediate = emit_immediate;
- ctx->tokens_out = tokens_out;
- ctx->max_tokens_out = max_tokens_out;
-
-
- /**
- ** Setup to begin parsing input shader
- **/
- if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) {
- debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n");
- return -1;
- }
- procType = parse.FullHeader.Processor.Processor;
- assert(procType == TGSI_PROCESSOR_FRAGMENT ||
- procType == TGSI_PROCESSOR_VERTEX ||
- procType == TGSI_PROCESSOR_GEOMETRY);
-
-
- /**
- ** Setup output shader
- **/
- *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version();
-
- ctx->header = (struct tgsi_header *) (tokens_out + 1);
- *ctx->header = tgsi_build_header();
-
- processor = (struct tgsi_processor *) (tokens_out + 2);
- *processor = tgsi_build_processor( procType, ctx->header );
-
- ctx->ti = 3;
-
-
- /**
- ** Loop over incoming program tokens/instructions
- */
- while( !tgsi_parse_end_of_tokens( &parse ) ) {
-
- tgsi_parse_token( &parse );
-
- switch( parse.FullToken.Token.Type ) {
- case TGSI_TOKEN_TYPE_INSTRUCTION:
- {
- struct tgsi_full_instruction *fullinst
- = &parse.FullToken.FullInstruction;
-
- if (ctx->transform_instruction)
- ctx->transform_instruction(ctx, fullinst);
- else
- ctx->emit_instruction(ctx, fullinst);
- }
- break;
-
- case TGSI_TOKEN_TYPE_DECLARATION:
- {
- struct tgsi_full_declaration *fulldecl
- = &parse.FullToken.FullDeclaration;
-
- if (ctx->transform_declaration)
- ctx->transform_declaration(ctx, fulldecl);
- else
- ctx->emit_declaration(ctx, fulldecl);
- }
- break;
-
- case TGSI_TOKEN_TYPE_IMMEDIATE:
- {
- struct tgsi_full_immediate *fullimm
- = &parse.FullToken.FullImmediate;
-
- if (ctx->transform_immediate)
- ctx->transform_immediate(ctx, fullimm);
- else
- ctx->emit_immediate(ctx, fullimm);
- }
- break;
-
- default:
- assert( 0 );
- }
- }
-
- if (ctx->epilog) {
- ctx->epilog(ctx);
- }
-
- tgsi_parse_free (&parse);
-
- return ctx->ti;
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef TGSI_TRANSFORM_H
-#define TGSI_TRANSFORM_H
-
-
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi/util/tgsi_parse.h"
-#include "tgsi/util/tgsi_build.h"
-
-
-
-/**
- * Subclass this to add caller-specific data
- */
-struct tgsi_transform_context
-{
-/**** PUBLIC ***/
-
- /**
- * User-defined callbacks invoked per instruction.
- */
- void (*transform_instruction)(struct tgsi_transform_context *ctx,
- struct tgsi_full_instruction *inst);
-
- void (*transform_declaration)(struct tgsi_transform_context *ctx,
- struct tgsi_full_declaration *decl);
-
- void (*transform_immediate)(struct tgsi_transform_context *ctx,
- struct tgsi_full_immediate *imm);
-
- /**
- * Called at end of input program to allow caller to append extra
- * instructions. Return number of tokens emitted.
- */
- void (*epilog)(struct tgsi_transform_context *ctx);
-
-
-/*** PRIVATE ***/
-
- /**
- * These are setup by tgsi_transform_shader() and cannot be overridden.
- * Meant to be called from in the above user callback functions.
- */
- void (*emit_instruction)(struct tgsi_transform_context *ctx,
- const struct tgsi_full_instruction *inst);
- void (*emit_declaration)(struct tgsi_transform_context *ctx,
- const struct tgsi_full_declaration *decl);
- void (*emit_immediate)(struct tgsi_transform_context *ctx,
- const struct tgsi_full_immediate *imm);
-
- struct tgsi_header *header;
- uint max_tokens_out;
- struct tgsi_token *tokens_out;
- uint ti;
-};
-
-
-
-extern int
-tgsi_transform_shader(const struct tgsi_token *tokens_in,
- struct tgsi_token *tokens_out,
- uint max_tokens_out,
- struct tgsi_transform_context *ctx);
-
-
-#endif /* TGSI_TRANSFORM_H */
+++ /dev/null
-#include "pipe/p_debug.h"
-#include "pipe/p_util.h"
-#include "pipe/p_shader_tokens.h"
-#include "tgsi_parse.h"
-#include "tgsi_build.h"
-#include "tgsi_util.h"
-
-union pointer_hack
-{
- void *pointer;
- unsigned long long uint64;
-};
-
-void *
-tgsi_align_128bit(
- void *unaligned )
-{
- union pointer_hack ph;
-
- ph.uint64 = 0;
- ph.pointer = unaligned;
- ph.uint64 = (ph.uint64 + 15) & ~15;
- return ph.pointer;
-}
-
-unsigned
-tgsi_util_get_src_register_swizzle(
- const struct tgsi_src_register *reg,
- unsigned component )
-{
- switch( component ) {
- case 0:
- return reg->SwizzleX;
- case 1:
- return reg->SwizzleY;
- case 2:
- return reg->SwizzleZ;
- case 3:
- return reg->SwizzleW;
- default:
- assert( 0 );
- }
- return 0;
-}
-
-unsigned
-tgsi_util_get_src_register_extswizzle(
- const struct tgsi_src_register_ext_swz *reg,
- unsigned component )
-{
- switch( component ) {
- case 0:
- return reg->ExtSwizzleX;
- case 1:
- return reg->ExtSwizzleY;
- case 2:
- return reg->ExtSwizzleZ;
- case 3:
- return reg->ExtSwizzleW;
- default:
- assert( 0 );
- }
- return 0;
-}
-
-unsigned
-tgsi_util_get_full_src_register_extswizzle(
- const struct tgsi_full_src_register *reg,
- unsigned component )
-{
- unsigned swizzle;
-
- /*
- * First, calculate the extended swizzle for a given channel. This will give
- * us either a channel index into the simple swizzle or a constant 1 or 0.
- */
- swizzle = tgsi_util_get_src_register_extswizzle(
- ®->SrcRegisterExtSwz,
- component );
-
- assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
- assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
- assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
- assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
- assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
- assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
-
- /*
- * Second, calculate the simple swizzle for the unswizzled channel index.
- * Leave the constants intact, they are not affected by the simple swizzle.
- */
- if( swizzle <= TGSI_SWIZZLE_W ) {
- swizzle = tgsi_util_get_src_register_swizzle(
- ®->SrcRegister,
- component );
- }
-
- return swizzle;
-}
-
-void
-tgsi_util_set_src_register_swizzle(
- struct tgsi_src_register *reg,
- unsigned swizzle,
- unsigned component )
-{
- switch( component ) {
- case 0:
- reg->SwizzleX = swizzle;
- break;
- case 1:
- reg->SwizzleY = swizzle;
- break;
- case 2:
- reg->SwizzleZ = swizzle;
- break;
- case 3:
- reg->SwizzleW = swizzle;
- break;
- default:
- assert( 0 );
- }
-}
-
-void
-tgsi_util_set_src_register_extswizzle(
- struct tgsi_src_register_ext_swz *reg,
- unsigned swizzle,
- unsigned component )
-{
- switch( component ) {
- case 0:
- reg->ExtSwizzleX = swizzle;
- break;
- case 1:
- reg->ExtSwizzleY = swizzle;
- break;
- case 2:
- reg->ExtSwizzleZ = swizzle;
- break;
- case 3:
- reg->ExtSwizzleW = swizzle;
- break;
- default:
- assert( 0 );
- }
-}
-
-unsigned
-tgsi_util_get_src_register_extnegate(
- const struct tgsi_src_register_ext_swz *reg,
- unsigned component )
-{
- switch( component ) {
- case 0:
- return reg->NegateX;
- case 1:
- return reg->NegateY;
- case 2:
- return reg->NegateZ;
- case 3:
- return reg->NegateW;
- default:
- assert( 0 );
- }
- return 0;
-}
-
-void
-tgsi_util_set_src_register_extnegate(
- struct tgsi_src_register_ext_swz *reg,
- unsigned negate,
- unsigned component )
-{
- switch( component ) {
- case 0:
- reg->NegateX = negate;
- break;
- case 1:
- reg->NegateY = negate;
- break;
- case 2:
- reg->NegateZ = negate;
- break;
- case 3:
- reg->NegateW = negate;
- break;
- default:
- assert( 0 );
- }
-}
-
-unsigned
-tgsi_util_get_full_src_register_sign_mode(
- const struct tgsi_full_src_register *reg,
- unsigned component )
-{
- unsigned sign_mode;
-
- if( reg->SrcRegisterExtMod.Absolute ) {
- /* Consider only the post-abs negation. */
-
- if( reg->SrcRegisterExtMod.Negate ) {
- sign_mode = TGSI_UTIL_SIGN_SET;
- }
- else {
- sign_mode = TGSI_UTIL_SIGN_CLEAR;
- }
- }
- else {
- /* Accumulate the three negations. */
-
- unsigned negate;
-
- negate = reg->SrcRegister.Negate;
- if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) {
- negate = !negate;
- }
- if( reg->SrcRegisterExtMod.Negate ) {
- negate = !negate;
- }
-
- if( negate ) {
- sign_mode = TGSI_UTIL_SIGN_TOGGLE;
- }
- else {
- sign_mode = TGSI_UTIL_SIGN_KEEP;
- }
- }
-
- return sign_mode;
-}
-
-void
-tgsi_util_set_full_src_register_sign_mode(
- struct tgsi_full_src_register *reg,
- unsigned sign_mode )
-{
- reg->SrcRegisterExtSwz.NegateX = 0;
- reg->SrcRegisterExtSwz.NegateY = 0;
- reg->SrcRegisterExtSwz.NegateZ = 0;
- reg->SrcRegisterExtSwz.NegateW = 0;
-
- switch (sign_mode)
- {
- case TGSI_UTIL_SIGN_CLEAR:
- reg->SrcRegister.Negate = 0;
- reg->SrcRegisterExtMod.Absolute = 1;
- reg->SrcRegisterExtMod.Negate = 0;
- break;
-
- case TGSI_UTIL_SIGN_SET:
- reg->SrcRegister.Negate = 0;
- reg->SrcRegisterExtMod.Absolute = 1;
- reg->SrcRegisterExtMod.Negate = 1;
- break;
-
- case TGSI_UTIL_SIGN_TOGGLE:
- reg->SrcRegister.Negate = 1;
- reg->SrcRegisterExtMod.Absolute = 0;
- reg->SrcRegisterExtMod.Negate = 0;
- break;
-
- case TGSI_UTIL_SIGN_KEEP:
- reg->SrcRegister.Negate = 0;
- reg->SrcRegisterExtMod.Absolute = 0;
- reg->SrcRegisterExtMod.Negate = 0;
- break;
-
- default:
- assert( 0 );
- }
-}
-
+++ /dev/null
-#if !defined TGSI_UTIL_H
-#define TGSI_UTIL_H
-
-#if defined __cplusplus
-extern "C" {
-#endif // defined __cplusplus
-
-void *
-tgsi_align_128bit(
- void *unaligned );
-
-unsigned
-tgsi_util_get_src_register_swizzle(
- const struct tgsi_src_register *reg,
- unsigned component );
-
-unsigned
-tgsi_util_get_src_register_extswizzle(
- const struct tgsi_src_register_ext_swz *reg,
- unsigned component);
-
-unsigned
-tgsi_util_get_full_src_register_extswizzle(
- const struct tgsi_full_src_register *reg,
- unsigned component );
-
-void
-tgsi_util_set_src_register_swizzle(
- struct tgsi_src_register *reg,
- unsigned swizzle,
- unsigned component );
-
-void
-tgsi_util_set_src_register_extswizzle(
- struct tgsi_src_register_ext_swz *reg,
- unsigned swizzle,
- unsigned component );
-
-unsigned
-tgsi_util_get_src_register_extnegate(
- const struct tgsi_src_register_ext_swz *reg,
- unsigned component );
-
-void
-tgsi_util_set_src_register_extnegate(
- struct tgsi_src_register_ext_swz *reg,
- unsigned negate,
- unsigned component );
-
-#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */
-#define TGSI_UTIL_SIGN_SET 1 /* Force negative */
-#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */
-#define TGSI_UTIL_SIGN_KEEP 3 /* No change */
-
-unsigned
-tgsi_util_get_full_src_register_sign_mode(
- const struct tgsi_full_src_register *reg,
- unsigned component );
-
-void
-tgsi_util_set_full_src_register_sign_mode(
- struct tgsi_full_src_register *reg,
- unsigned sign_mode );
-
-#if defined __cplusplus
-} // extern "C"
-#endif // defined __cplusplus
-
-#endif // !defined TGSI_UTIL_H
-
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <stdarg.h>
-
-#ifdef WIN32
-#include <windows.h>
-#include <winddi.h>
-#else
-#include <stdio.h>
-#include <stdlib.h>
-#endif
-
-#include "pipe/p_debug.h"
-#include "pipe/p_compiler.h"
-
-
-void debug_vprintf(const char *format, va_list ap)
-{
-#ifdef WIN32
- EngDebugPrint("Gallium3D: ", (PCHAR)format, ap);
-#else
- vfprintf(stderr, format, ap);
-#endif
-}
-
-
-void debug_printf(const char *format, ...)
-{
- va_list ap;
- va_start(ap, format);
- debug_vprintf(format, ap);
- va_end(ap);
-}
-
-
-static INLINE void debug_abort(void)
-{
-#ifdef WIN32
- EngDebugBreak();
-#else
- abort();
-#endif
-}
-
-
-void debug_assert_fail(const char *expr, const char *file, unsigned line)
-{
- debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr);
- debug_abort();
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * RGBA/float tile get/put functions.
- * Usable both by drivers and state trackers.
- * Surfaces should already be in a mapped state.
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_util.h"
-#include "pipe/p_inlines.h"
-
-#include "p_tile.h"
-
-
-
-/**
- * Move raw block of pixels from surface to user memory.
- * This should be usable by any hw driver that has mappable surfaces.
- */
-void
-pipe_get_tile_raw(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- void *p, int dst_stride)
-{
- const uint cpp = ps->cpp;
- const ubyte *pSrc;
- const uint src_stride = ps->pitch * cpp;
- ubyte *pDest;
- uint i;
-
- if (dst_stride == 0) {
- dst_stride = w * cpp;
- }
-
- if (pipe_clip_tile(x, y, &w, &h, ps))
- return;
-
- pSrc = (const ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp;
- pDest = (ubyte *) p;
-
- for (i = 0; i < h; i++) {
- memcpy(pDest, pSrc, w * cpp);
- pDest += dst_stride;
- pSrc += src_stride;
- }
-
- pipe_surface_unmap(ps);
-}
-
-
-/**
- * Move raw block of pixels from user memory to surface.
- * This should be usable by any hw driver that has mappable surfaces.
- */
-void
-pipe_put_tile_raw(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- const void *p, int src_stride)
-{
- const uint cpp = ps->cpp;
- const ubyte *pSrc;
- const uint dst_stride = ps->pitch * cpp;
- ubyte *pDest;
- uint i;
-
- if (src_stride == 0) {
- src_stride = w * cpp;
- }
-
- if (pipe_clip_tile(x, y, &w, &h, ps))
- return;
-
- pSrc = (const ubyte *) p;
- pDest = (ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp;
-
- for (i = 0; i < h; i++) {
- memcpy(pDest, pSrc, w * cpp);
- pDest += dst_stride;
- pSrc += src_stride;
- }
-
- pipe_surface_unmap(ps);
-}
-
-
-
-
-/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */
-#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F))
-
-#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \
- us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) )
-
-
-
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
-
-static void
-a8r8g8b8_get_tile_rgba(unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
- pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
- pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
- pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-a8r8g8b8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]);
- *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
-
-static void
-b8g8r8a8_get_tile_rgba(unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const unsigned pixel = *src++;
- pRow[0] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
- pRow[1] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
- pRow[2] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff);
- pRow[3] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-b8g8r8a8_put_tile_rgba(unsigned *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- unsigned r, g, b, a;
- UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]);
- UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]);
- UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]);
- UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]);
- *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
- }
- p += src_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
-
-static void
-a1r5g5b5_get_tile_rgba(ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const ushort pixel = *src++;
- pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f);
- pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f);
- pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
- pRow[3] = ((pixel >> 15) ) * 1.0f;
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
-
-static void
-a4r4g4b4_get_tile_rgba(ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const ushort pixel = *src++;
- pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f);
- pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f);
- pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f);
- pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f);
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_R5G6B5_UNORM ***/
-
-static void
-r5g6b5_get_tile_rgba(ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- const ushort pixel = *src++;
- pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f);
- pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f);
- pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
- pRow[3] = 1.0f;
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r5g5b5_put_tile_rgba(ushort *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0);
- uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0);
- uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0);
- *dst++ = (r << 11) | (g << 5) | (b);
- }
- p += src_stride;
- }
-}
-
-
-
-/*** PIPE_FORMAT_Z16_UNORM ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z16_get_tile_rgba(ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- const float scale = 1.0f / 65535.0f;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] =
- pRow[3] = *src++ * scale;
- }
- p += dst_stride;
- }
-}
-
-
-
-
-/*** PIPE_FORMAT_U_L8 ***/
-
-static void
-l8_get_tile_rgba(ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] = UBYTE_TO_FLOAT(*src);
- pRow[3] = 1.0;
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_U_A8 ***/
-
-static void
-a8_get_tile_rgba(ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] = 0.0;
- pRow[3] = UBYTE_TO_FLOAT(*src);
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
-
-static void
-r16g16b16a16_get_tile_rgba(short *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src += 4, pRow += 4) {
- pRow[0] = SHORT_TO_FLOAT(src[0]);
- pRow[1] = SHORT_TO_FLOAT(src[1]);
- pRow[2] = SHORT_TO_FLOAT(src[2]);
- pRow[3] = SHORT_TO_FLOAT(src[3]);
- }
- p += dst_stride;
- }
-}
-
-
-static void
-r16g16b16a16_put_tile_rgba(short *dst,
- unsigned w, unsigned h,
- const float *p,
- unsigned src_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- const float *pRow = p;
- for (j = 0; j < w; j++, dst += 4, pRow += 4) {
- UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]);
- UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]);
- UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]);
- UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]);
- }
- p += src_stride;
- }
-}
-
-
-
-/*** PIPE_FORMAT_U_I8 ***/
-
-static void
-i8_get_tile_rgba(ubyte *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, src++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] =
- pRow[3] = UBYTE_TO_FLOAT(*src);
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_U_A8_L8 ***/
-
-static void
-a8_l8_get_tile_rgba(ushort *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- ushort p = *src++;
- pRow[0] =
- pRow[1] =
- pRow[2] = UBYTE_TO_FLOAT(p & 0xff);
- pRow[3] = UBYTE_TO_FLOAT(p >> 8);
- }
- p += dst_stride;
- }
-}
-
-
-
-
-/*** PIPE_FORMAT_Z32_UNORM ***/
-
-/**
- * Return each Z value as four floats in [0,1].
- */
-static void
-z32_get_tile_rgba(unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- const double scale = 1.0 / (double) 0xffffffff;
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] =
- pRow[3] = (float) (*src++ * scale);
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_S8Z24_UNORM ***/
-
-/**
- * Return Z component as four float in [0,1]. Stencil part ignored.
- */
-static void
-s8z24_get_tile_rgba(unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- const double scale = 1.0 / ((1 << 24) - 1);
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] =
- pRow[3] = (float) (scale * (*src++ & 0xffffff));
- }
- p += dst_stride;
- }
-}
-
-
-/*** PIPE_FORMAT_Z24S8_UNORM ***/
-
-/**
- * Return Z component as four float in [0,1]. Stencil part ignored.
- */
-static void
-z24s8_get_tile_rgba(unsigned *src,
- unsigned w, unsigned h,
- float *p,
- unsigned dst_stride)
-{
- const double scale = 1.0 / ((1 << 24) - 1);
- unsigned i, j;
-
- for (i = 0; i < h; i++) {
- float *pRow = p;
- for (j = 0; j < w; j++, pRow += 4) {
- pRow[0] =
- pRow[1] =
- pRow[2] =
- pRow[3] = (float) (scale * (*src++ >> 8));
- }
- p += dst_stride;
- }
-}
-
-
-void
-pipe_get_tile_rgba(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- float *p)
-{
- unsigned dst_stride = w * 4;
- void *packed;
-
- if (pipe_clip_tile(x, y, &w, &h, ps))
- return;
-
- packed = MALLOC(h * w * ps->cpp);
-
- if (!packed)
- return;
-
- pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp);
-
- switch (ps->format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- a1r5g5b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_A4R4G4B4_UNORM:
- a4r4g4b4_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_U_L8:
- l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_U_A8:
- a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_U_I8:
- i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_U_A8_L8:
- a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_get_tile_rgba((short *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- z16_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_Z32_UNORM:
- z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- z24s8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
- break;
- default:
- assert(0);
- }
-
- FREE(packed);
-}
-
-
-void
-pipe_put_tile_rgba(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- const float *p)
-{
- unsigned src_stride = w * 4;
- void *packed;
-
- if (pipe_clip_tile(x, y, &w, &h, ps))
- return;
-
- packed = MALLOC(h * w * ps->cpp);
-
- if (!packed)
- return;
-
- switch (ps->format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_A1R5G5B5_UNORM:
- /*a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_R5G6B5_UNORM:
- r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_R8G8B8A8_UNORM:
- break;
- case PIPE_FORMAT_U_L8:
- /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_U_A8:
- /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_U_I8:
- /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_U_A8_L8:
- /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_R16G16B16A16_SNORM:
- r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride);
- break;
- case PIPE_FORMAT_Z16_UNORM:
- /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_Z32_UNORM:
- /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_S8Z24_UNORM:
- /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
- break;
- case PIPE_FORMAT_Z24S8_UNORM:
- /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
- break;
- default:
- assert(0);
- }
-
- pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp);
-
- FREE(packed);
-}
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef P_TILE_H
-#define P_TILE_H
-
-#include "pipe/p_compiler.h"
-
-struct pipe_context;
-struct pipe_surface;
-
-
-/**
- * Clip tile against surface dims.
- * \return TRUE if tile is totally clipped, FALSE otherwise
- */
-static INLINE boolean
-pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps)
-{
- if (x >= ps->width)
- return TRUE;
- if (y >= ps->height)
- return TRUE;
- if (x + *w > ps->width)
- *w = ps->width - x;
- if (y + *h > ps->height)
- *h = ps->height - y;
- return FALSE;
-}
-
-
-extern void
-pipe_get_tile_raw(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- void *p, int dst_stride);
-
-extern void
-pipe_put_tile_raw(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- const void *p, int src_stride);
-
-
-extern void
-pipe_get_tile_rgba(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- float *p);
-
-extern void
-pipe_put_tile_rgba(struct pipe_context *pipe,
- struct pipe_surface *ps,
- uint x, uint y, uint w, uint h,
- const float *p);
-
-#endif
+++ /dev/null
-/**************************************************************************
- *
- * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-/**
- * Miscellaneous utility functions.
- */
-
-
-#include "pipe/p_defines.h"
-#include "pipe/p_util.h"
-
-
-/**
- * Copy 2D rect from one place to another.
- * Position and sizes are in pixels.
- */
-void
-pipe_copy_rect(ubyte * dst,
- unsigned cpp,
- unsigned dst_pitch,
- unsigned dst_x,
- unsigned dst_y,
- unsigned width,
- unsigned height,
- const ubyte * src,
- int src_pitch,
- unsigned src_x,
- int src_y)
-{
- unsigned i;
-
- dst_pitch *= cpp;
- src_pitch *= cpp;
- dst += dst_x * cpp;
- src += src_x * cpp;
- dst += dst_y * dst_pitch;
- src += src_y * src_pitch;
- width *= cpp;
-
- if (width == dst_pitch && width == src_pitch)
- memcpy(dst, src, height * width);
- else {
- for (i = 0; i < height; i++) {
- memcpy(dst, src, width);
- dst += dst_pitch;
- src += src_pitch;
- }
- }
-}
--- /dev/null
+TOP = ../../..
+include $(TOP)/configs/current
+
+
+ifeq ($(CONFIG_NAME), linux-llvm)
+LLVM_DIR = llvm
+endif
+
+SUBDIRS = pipebuffer $(LLVM_DIR)
+
+
+default: subdirs
+
+
+subdirs:
+ @for dir in $(SUBDIRS) ; do \
+ if [ -d $$dir ] ; then \
+ (cd $$dir && $(MAKE)) || exit 1 ; \
+ fi \
+ done
+
+
+clean:
+ rm -f `find . -name \*.[oa]`
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Zack Rusin <zack@tungstengraphics.com>
+ */
+
+#include "cso_cache.h"
+#include "cso_hash.h"
+
+#if 1
+static unsigned hash_key(const void *key, unsigned key_size)
+{
+ unsigned *ikey = (unsigned *)key;
+ unsigned hash = 0, i;
+
+ assert(key_size % 4 == 0);
+
+ /* I'm sure this can be improved on:
+ */
+ for (i = 0; i < key_size/4; i++)
+ hash ^= ikey[i];
+
+ return hash;
+}
+#else
+static unsigned hash_key(const unsigned char *p, int n)
+{
+ unsigned h = 0;
+ unsigned g;
+
+ while (n--) {
+ h = (h << 4) + *p++;
+ if ((g = (h & 0xf0000000)) != 0)
+ h ^= g >> 23;
+ h &= ~g;
+ }
+ return h;
+}
+#endif
+
+unsigned cso_construct_key(void *item, int item_size)
+{
+ return hash_key((item), item_size);
+}
+
+static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
+{
+ struct cso_hash *hash = 0;
+
+ switch(type) {
+ case CSO_BLEND:
+ hash = sc->blend_hash;
+ break;
+ case CSO_SAMPLER:
+ hash = sc->sampler_hash;
+ break;
+ case CSO_DEPTH_STENCIL_ALPHA:
+ hash = sc->depth_stencil_hash;
+ break;
+ case CSO_RASTERIZER:
+ hash = sc->rasterizer_hash;
+ break;
+ case CSO_FRAGMENT_SHADER:
+ hash = sc->fs_hash;
+ break;
+ case CSO_VERTEX_SHADER:
+ hash = sc->vs_hash;
+ break;
+ }
+
+ return hash;
+}
+
+static int _cso_size_for_type(enum cso_cache_type type)
+{
+ switch(type) {
+ case CSO_BLEND:
+ return sizeof(struct pipe_blend_state);
+ case CSO_SAMPLER:
+ return sizeof(struct pipe_sampler_state);
+ case CSO_DEPTH_STENCIL_ALPHA:
+ return sizeof(struct pipe_depth_stencil_alpha_state);
+ case CSO_RASTERIZER:
+ return sizeof(struct pipe_rasterizer_state);
+ case CSO_FRAGMENT_SHADER:
+ return sizeof(struct pipe_shader_state);
+ case CSO_VERTEX_SHADER:
+ return sizeof(struct pipe_shader_state);
+ }
+ return 0;
+}
+
+struct cso_hash_iter
+cso_insert_state(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type,
+ void *state)
+{
+ struct cso_hash *hash = _cso_hash_for_type(sc, type);
+ return cso_hash_insert(hash, hash_key, state);
+}
+
+struct cso_hash_iter
+cso_find_state(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type)
+{
+ struct cso_hash *hash = _cso_hash_for_type(sc, type);
+
+ return cso_hash_find(hash, hash_key);
+}
+
+struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type,
+ void *templ)
+{
+ struct cso_hash_iter iter = cso_find_state(sc, hash_key, type);
+ int size = _cso_size_for_type(type);
+ while (!cso_hash_iter_is_null(iter)) {
+ void *iter_data = cso_hash_iter_data(iter);
+ if (!memcmp(iter_data, templ, size))
+ return iter;
+ iter = cso_hash_iter_next(iter);
+ }
+ return iter;
+}
+
+void * cso_take_state(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type)
+{
+ struct cso_hash *hash = _cso_hash_for_type(sc, type);
+ return cso_hash_take(hash, hash_key);
+}
+
+struct cso_cache *cso_cache_create(void)
+{
+ struct cso_cache *sc = malloc(sizeof(struct cso_cache));
+
+ sc->blend_hash = cso_hash_create();
+ sc->sampler_hash = cso_hash_create();
+ sc->depth_stencil_hash = cso_hash_create();
+ sc->rasterizer_hash = cso_hash_create();
+ sc->fs_hash = cso_hash_create();
+ sc->vs_hash = cso_hash_create();
+
+ return sc;
+}
+
+void cso_cache_delete(struct cso_cache *sc)
+{
+ assert(sc);
+ cso_hash_delete(sc->blend_hash);
+ cso_hash_delete(sc->sampler_hash);
+ cso_hash_delete(sc->depth_stencil_hash);
+ cso_hash_delete(sc->rasterizer_hash);
+ cso_hash_delete(sc->fs_hash);
+ cso_hash_delete(sc->vs_hash);
+ free(sc);
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin <zack@tungstengraphics.com>
+ */
+
+#ifndef CSO_CACHE_H
+#define CSO_CACHE_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+
+struct cso_hash;
+
+struct cso_cache {
+ struct cso_hash *blend_hash;
+ struct cso_hash *depth_stencil_hash;
+ struct cso_hash *fs_hash;
+ struct cso_hash *vs_hash;
+ struct cso_hash *rasterizer_hash;
+ struct cso_hash *sampler_hash;
+};
+
+struct cso_blend {
+ struct pipe_blend_state state;
+ void *data;
+};
+
+struct cso_depth_stencil_alpha {
+ struct pipe_depth_stencil_alpha_state state;
+ void *data;
+};
+
+struct cso_rasterizer {
+ struct pipe_rasterizer_state state;
+ void *data;
+};
+
+struct cso_fragment_shader {
+ struct pipe_shader_state state;
+ void *data;
+};
+
+struct cso_vertex_shader {
+ struct pipe_shader_state state;
+ void *data;
+};
+
+struct cso_sampler {
+ struct pipe_sampler_state state;
+ void *data;
+};
+
+
+enum cso_cache_type {
+ CSO_BLEND,
+ CSO_SAMPLER,
+ CSO_DEPTH_STENCIL_ALPHA,
+ CSO_RASTERIZER,
+ CSO_FRAGMENT_SHADER,
+ CSO_VERTEX_SHADER
+};
+
+unsigned cso_construct_key(void *item, int item_size);
+
+struct cso_cache *cso_cache_create(void);
+void cso_cache_delete(struct cso_cache *sc);
+
+struct cso_hash_iter cso_insert_state(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type,
+ void *state);
+struct cso_hash_iter cso_find_state(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type);
+struct cso_hash_iter cso_find_state_template(struct cso_cache *sc,
+ unsigned hash_key, enum cso_cache_type type,
+ void *templ);
+void * cso_take_state(struct cso_cache *sc, unsigned hash_key,
+ enum cso_cache_type type);
+
+#endif
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin <zack@tungstengraphics.com>
+ */
+
+#include "cso_hash.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define MAX(a, b) ((a > b) ? (a) : (b))
+
+static const int MinNumBits = 4;
+
+static const unsigned char prime_deltas[] = {
+ 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3,
+ 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0
+};
+
+static int primeForNumBits(int numBits)
+{
+ return (1 << numBits) + prime_deltas[numBits];
+}
+
+/*
+ Returns the smallest integer n such that
+ primeForNumBits(n) >= hint.
+*/
+static int countBits(int hint)
+{
+ int numBits = 0;
+ int bits = hint;
+
+ while (bits > 1) {
+ bits >>= 1;
+ numBits++;
+ }
+
+ if (numBits >= (int)sizeof(prime_deltas)) {
+ numBits = sizeof(prime_deltas) - 1;
+ } else if (primeForNumBits(numBits) < hint) {
+ ++numBits;
+ }
+ return numBits;
+}
+
+struct cso_node {
+ struct cso_node *next;
+ unsigned key;
+ void *value;
+};
+
+struct cso_hash_data {
+ struct cso_node *fakeNext;
+ struct cso_node **buckets;
+ int size;
+ int nodeSize;
+ short userNumBits;
+ short numBits;
+ int numBuckets;
+};
+
+struct cso_hash {
+ union {
+ struct cso_hash_data *d;
+ struct cso_node *e;
+ } data;
+};
+
+static void *cso_data_allocate_node(struct cso_hash_data *hash)
+{
+ return malloc(hash->nodeSize);
+}
+
+static void cso_data_free_node(struct cso_node *node)
+{
+ /* XXX still a leak here.
+ * Need to cast value ptr to original cso type, then free the
+ * driver-specific data hanging off of it. For example:
+ struct cso_sampler *csamp = (struct cso_sampler *) node->value;
+ free(csamp->data);
+ */
+ free(node->value);
+ free(node);
+}
+
+static struct cso_node *
+cso_hash_create_node(struct cso_hash *hash,
+ unsigned akey, void *avalue,
+ struct cso_node **anextNode)
+{
+ struct cso_node *node = cso_data_allocate_node(hash->data.d);
+ node->key = akey;
+ node->value = avalue;
+
+ node->next = (struct cso_node*)(*anextNode);
+ *anextNode = node;
+ ++hash->data.d->size;
+ return node;
+}
+
+static void cso_data_rehash(struct cso_hash_data *hash, int hint)
+{
+ if (hint < 0) {
+ hint = countBits(-hint);
+ if (hint < MinNumBits)
+ hint = MinNumBits;
+ hash->userNumBits = hint;
+ while (primeForNumBits(hint) < (hash->size >> 1))
+ ++hint;
+ } else if (hint < MinNumBits) {
+ hint = MinNumBits;
+ }
+
+ if (hash->numBits != hint) {
+ struct cso_node *e = (struct cso_node *)(hash);
+ struct cso_node **oldBuckets = hash->buckets;
+ int oldNumBuckets = hash->numBuckets;
+ int i = 0;
+
+ hash->numBits = hint;
+ hash->numBuckets = primeForNumBits(hint);
+ hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets);
+ for (i = 0; i < hash->numBuckets; ++i)
+ hash->buckets[i] = e;
+
+ for (i = 0; i < oldNumBuckets; ++i) {
+ struct cso_node *firstNode = oldBuckets[i];
+ while (firstNode != e) {
+ unsigned h = firstNode->key;
+ struct cso_node *lastNode = firstNode;
+ while (lastNode->next != e && lastNode->next->key == h)
+ lastNode = lastNode->next;
+
+ struct cso_node *afterLastNode = lastNode->next;
+ struct cso_node **beforeFirstNode = &hash->buckets[h % hash->numBuckets];
+ while (*beforeFirstNode != e)
+ beforeFirstNode = &(*beforeFirstNode)->next;
+ lastNode->next = *beforeFirstNode;
+ *beforeFirstNode = firstNode;
+ firstNode = afterLastNode;
+ }
+ }
+ free(oldBuckets);
+ }
+}
+
+static void cso_data_might_grow(struct cso_hash_data *hash)
+{
+ if (hash->size >= hash->numBuckets)
+ cso_data_rehash(hash, hash->numBits + 1);
+}
+
+static void cso_data_has_shrunk(struct cso_hash_data *hash)
+{
+ if (hash->size <= (hash->numBuckets >> 3) &&
+ hash->numBits > hash->userNumBits) {
+ int max = MAX(hash->numBits-2, hash->userNumBits);
+ cso_data_rehash(hash, max);
+ }
+}
+
+static struct cso_node *cso_data_first_node(struct cso_hash_data *hash)
+{
+ struct cso_node *e = (struct cso_node *)(hash);
+ struct cso_node **bucket = hash->buckets;
+ int n = hash->numBuckets;
+ while (n--) {
+ if (*bucket != e)
+ return *bucket;
+ ++bucket;
+ }
+ return e;
+}
+
+static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey)
+{
+ struct cso_node **node;
+
+ if (hash->data.d->numBuckets) {
+ node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]);
+ assert(*node == hash->data.e || (*node)->next);
+ while (*node != hash->data.e && (*node)->key != akey)
+ node = &(*node)->next;
+ } else {
+ node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e));
+ }
+ return node;
+}
+
+struct cso_hash_iter cso_hash_insert(struct cso_hash *hash,
+ unsigned key, void *data)
+{
+ cso_data_might_grow(hash->data.d);
+
+ struct cso_node **nextNode = cso_hash_find_node(hash, key);
+ struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode);
+ struct cso_hash_iter iter = {hash, node};
+ return iter;
+}
+
+struct cso_hash * cso_hash_create(void)
+{
+ struct cso_hash *hash = malloc(sizeof(struct cso_hash));
+ hash->data.d = malloc(sizeof(struct cso_hash_data));
+ hash->data.d->fakeNext = 0;
+ hash->data.d->buckets = 0;
+ hash->data.d->size = 0;
+ hash->data.d->nodeSize = sizeof(struct cso_node);
+ hash->data.d->userNumBits = MinNumBits;
+ hash->data.d->numBits = 0;
+ hash->data.d->numBuckets = 0;
+
+ return hash;
+}
+
+void cso_hash_delete(struct cso_hash *hash)
+{
+ struct cso_node *e_for_x = (struct cso_node *)(hash->data.d);
+ struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets);
+ int n = hash->data.d->numBuckets;
+ while (n--) {
+ struct cso_node *cur = *bucket++;
+ while (cur != e_for_x) {
+ struct cso_node *next = cur->next;
+ cso_data_free_node(cur);
+ cur = next;
+ }
+ }
+ free(hash->data.d->buckets);
+ free(hash->data.d);
+ free(hash);
+}
+
+struct cso_hash_iter cso_hash_find(struct cso_hash *hash,
+ unsigned key)
+{
+ struct cso_node **nextNode = cso_hash_find_node(hash, key);
+ struct cso_hash_iter iter = {hash, *nextNode};
+ return iter;
+}
+
+unsigned cso_hash_iter_key(struct cso_hash_iter iter)
+{
+ if (!iter.node || iter.hash->data.e == iter.node)
+ return 0;
+ return iter.node->key;
+}
+
+void * cso_hash_iter_data(struct cso_hash_iter iter)
+{
+ if (!iter.node || iter.hash->data.e == iter.node)
+ return 0;
+ return iter.node->value;
+}
+
+static struct cso_node *cso_hash_data_next(struct cso_node *node)
+{
+ union {
+ struct cso_node *next;
+ struct cso_node *e;
+ struct cso_hash_data *d;
+ } a;
+ a.next = node->next;
+ if (!a.next) {
+ fprintf(stderr, "iterating beyond the last element\n");
+ return 0;
+ }
+ if (a.next->next)
+ return a.next;
+
+ int start = (node->key % a.d->numBuckets) + 1;
+ struct cso_node **bucket = a.d->buckets + start;
+ int n = a.d->numBuckets - start;
+ while (n--) {
+ if (*bucket != a.e)
+ return *bucket;
+ ++bucket;
+ }
+ return a.e;
+}
+
+
+static struct cso_node *cso_hash_data_prev(struct cso_node *node)
+{
+ union {
+ struct cso_node *e;
+ struct cso_hash_data *d;
+ } a;
+
+ a.e = node;
+ while (a.e->next)
+ a.e = a.e->next;
+
+ int start;
+ if (node == a.e)
+ start = a.d->numBuckets - 1;
+ else
+ start = node->key % a.d->numBuckets;
+
+ struct cso_node *sentinel = node;
+ struct cso_node **bucket = a.d->buckets + start;
+ while (start >= 0) {
+ if (*bucket != sentinel) {
+ struct cso_node *prev = *bucket;
+ while (prev->next != sentinel)
+ prev = prev->next;
+ return prev;
+ }
+
+ sentinel = a.e;
+ --bucket;
+ --start;
+ }
+ fprintf(stderr, "iterating backward beyond first element\n");
+ return a.e;
+}
+
+struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter)
+{
+ struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)};
+ return next;
+}
+
+int cso_hash_iter_is_null(struct cso_hash_iter iter)
+{
+ if (!iter.node || iter.node == iter.hash->data.e)
+ return 1;
+ return 0;
+}
+
+void * cso_hash_take(struct cso_hash *hash,
+ unsigned akey)
+{
+ struct cso_node **node = cso_hash_find_node(hash, akey);
+ if (*node != hash->data.e) {
+ void *t = (*node)->value;
+ struct cso_node *next = (*node)->next;
+ cso_data_free_node(*node);
+ *node = next;
+ --hash->data.d->size;
+ cso_data_has_shrunk(hash->data.d);
+ return t;
+ }
+ return 0;
+}
+
+struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter)
+{
+ struct cso_hash_iter prev = {iter.hash,
+ cso_hash_data_prev(iter.node)};
+ return prev;
+}
+
+struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash)
+{
+ struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)};
+ return iter;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin <zack@tungstengraphics.com>
+ */
+
+#ifndef CSO_HASH_H
+#define CSO_HASH_H
+
+struct cso_hash;
+struct cso_node;
+
+struct cso_hash_iter {
+ struct cso_hash *hash;
+ struct cso_node *node;
+};
+
+struct cso_hash *cso_hash_create(void);
+void cso_hash_delete(struct cso_hash *hash);
+
+struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key,
+ void *data);
+void *cso_hash_take(struct cso_hash *hash, unsigned key);
+
+struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash);
+struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key);
+
+
+int cso_hash_iter_is_null(struct cso_hash_iter iter);
+unsigned cso_hash_iter_key(struct cso_hash_iter iter);
+void *cso_hash_iter_data(struct cso_hash_iter iter);
+
+struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter);
+struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter);
+
+#endif
--- /dev/null
+default:
+ cd .. ; make
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Clipping stage
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "draw_context.h"
+#include "draw_private.h"
+
+
+#ifndef IS_NEGATIVE
+#define IS_NEGATIVE(X) ((X) < 0.0)
+#endif
+
+#ifndef DIFFERENT_SIGNS
+#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F)
+#endif
+
+#ifndef MAX_CLIPPED_VERTICES
+#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1)
+#endif
+
+
+
+struct clipper {
+ struct draw_stage stage; /**< base class */
+
+ /* Basically duplicate some of the flatshading logic here:
+ */
+ boolean flat;
+ uint num_color_attribs;
+ uint color_attribs[4]; /* front/back primary/secondary colors */
+
+ float (*plane)[4];
+};
+
+
+/* This is a bit confusing:
+ */
+static INLINE struct clipper *clipper_stage( struct draw_stage *stage )
+{
+ return (struct clipper *)stage;
+}
+
+
+#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT)))
+
+
+/* All attributes are float[4], so this is easy:
+ */
+static void interp_attr( float *fdst,
+ float t,
+ const float *fin,
+ const float *fout )
+{
+ fdst[0] = LINTERP( t, fout[0], fin[0] );
+ fdst[1] = LINTERP( t, fout[1], fin[1] );
+ fdst[2] = LINTERP( t, fout[2], fin[2] );
+ fdst[3] = LINTERP( t, fout[3], fin[3] );
+}
+
+static void copy_colors( struct draw_stage *stage,
+ struct vertex_header *dst,
+ const struct vertex_header *src )
+{
+ const struct clipper *clipper = clipper_stage(stage);
+ uint i;
+ for (i = 0; i < clipper->num_color_attribs; i++) {
+ const uint attr = clipper->color_attribs[i];
+ COPY_4FV(dst->data[attr], src->data[attr]);
+ }
+}
+
+
+
+/* Interpolate between two vertices to produce a third.
+ */
+static void interp( const struct clipper *clip,
+ struct vertex_header *dst,
+ float t,
+ const struct vertex_header *out,
+ const struct vertex_header *in )
+{
+ const unsigned nr_attrs = clip->stage.draw->num_vs_outputs;
+ unsigned j;
+
+ /* Vertex header.
+ */
+ {
+ dst->clipmask = 0;
+ dst->edgeflag = 0;
+ dst->pad = 0;
+ dst->vertex_id = UNDEFINED_VERTEX_ID;
+ }
+
+ /* Clip coordinates: interpolate normally
+ */
+ {
+ interp_attr(dst->clip, t, in->clip, out->clip);
+ }
+
+ /* Do the projective divide and insert window coordinates:
+ */
+ {
+ const float *pos = dst->clip;
+ const float *scale = clip->stage.draw->viewport.scale;
+ const float *trans = clip->stage.draw->viewport.translate;
+ const float oow = 1.0f / pos[3];
+
+ dst->data[0][0] = pos[0] * oow * scale[0] + trans[0];
+ dst->data[0][1] = pos[1] * oow * scale[1] + trans[1];
+ dst->data[0][2] = pos[2] * oow * scale[2] + trans[2];
+ dst->data[0][3] = oow;
+ }
+
+ /* Other attributes
+ * Note: start at 1 to skip winpos (data[0]) since we just computed
+ * it above.
+ */
+ for (j = 1; j < nr_attrs; j++) {
+ interp_attr(dst->data[j], t, in->data[j], out->data[j]);
+ }
+}
+
+
+static void emit_poly( struct draw_stage *stage,
+ struct vertex_header **inlist,
+ unsigned n,
+ const struct prim_header *origPrim)
+{
+ struct prim_header header;
+ unsigned i;
+
+ /* later stages may need the determinant, but only the sign matters */
+ header.det = origPrim->det;
+
+ for (i = 2; i < n; i++) {
+ header.v[0] = inlist[i-1];
+ header.v[1] = inlist[i];
+ header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
+
+ {
+ unsigned tmp1 = header.v[1]->edgeflag;
+ unsigned tmp2 = header.v[2]->edgeflag;
+
+ if (i != n-1) header.v[1]->edgeflag = 0;
+ if (i != 2) header.v[2]->edgeflag = 0;
+
+ header.edgeflags = ((header.v[0]->edgeflag << 0) |
+ (header.v[1]->edgeflag << 1) |
+ (header.v[2]->edgeflag << 2));
+
+ stage->next->tri( stage->next, &header );
+
+ header.v[1]->edgeflag = tmp1;
+ header.v[2]->edgeflag = tmp2;
+ }
+ }
+}
+
+
+
+
+/* Clip a triangle against the viewport and user clip planes.
+ */
+static void
+do_clip_tri( struct draw_stage *stage,
+ struct prim_header *header,
+ unsigned clipmask )
+{
+ struct clipper *clipper = clipper_stage( stage );
+ struct vertex_header *a[MAX_CLIPPED_VERTICES];
+ struct vertex_header *b[MAX_CLIPPED_VERTICES];
+ struct vertex_header **inlist = a;
+ struct vertex_header **outlist = b;
+ unsigned tmpnr = 0;
+ unsigned n = 3;
+ unsigned i;
+
+ inlist[0] = header->v[0];
+ inlist[1] = header->v[1];
+ inlist[2] = header->v[2];
+
+ while (clipmask && n >= 3) {
+ const unsigned plane_idx = ffs(clipmask)-1;
+ const float *plane = clipper->plane[plane_idx];
+ struct vertex_header *vert_prev = inlist[0];
+ float dp_prev = dot4( vert_prev->clip, plane );
+ unsigned outcount = 0;
+
+ clipmask &= ~(1<<plane_idx);
+
+ inlist[n] = inlist[0]; /* prevent rotation of vertices */
+
+ for (i = 1; i <= n; i++) {
+ struct vertex_header *vert = inlist[i];
+
+ float dp = dot4( vert->clip, plane );
+
+ if (!IS_NEGATIVE(dp_prev)) {
+ outlist[outcount++] = vert_prev;
+ }
+
+ if (DIFFERENT_SIGNS(dp, dp_prev)) {
+ struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++];
+ outlist[outcount++] = new_vert;
+
+ if (IS_NEGATIVE(dp)) {
+ /* Going out of bounds. Avoid division by zero as we
+ * know dp != dp_prev from DIFFERENT_SIGNS, above.
+ */
+ float t = dp / (dp - dp_prev);
+ interp( clipper, new_vert, t, vert, vert_prev );
+
+ /* Force edgeflag true in this case:
+ */
+ new_vert->edgeflag = 1;
+ } else {
+ /* Coming back in.
+ */
+ float t = dp_prev / (dp_prev - dp);
+ interp( clipper, new_vert, t, vert_prev, vert );
+
+ /* Copy starting vert's edgeflag:
+ */
+ new_vert->edgeflag = vert_prev->edgeflag;
+ }
+ }
+
+ vert_prev = vert;
+ dp_prev = dp;
+ }
+
+ {
+ struct vertex_header **tmp = inlist;
+ inlist = outlist;
+ outlist = tmp;
+ n = outcount;
+ }
+ }
+
+ /* If flat-shading, copy color to new provoking vertex.
+ */
+ if (clipper->flat && inlist[0] != header->v[2]) {
+ if (1) {
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ }
+
+ copy_colors(stage, inlist[0], header->v[2]);
+ }
+
+
+
+ /* Emit the polygon as triangles to the setup stage:
+ */
+ if (n >= 3)
+ emit_poly( stage, inlist, n, header );
+}
+
+
+/* Clip a line against the viewport and user clip planes.
+ */
+static void
+do_clip_line( struct draw_stage *stage,
+ struct prim_header *header,
+ unsigned clipmask )
+{
+ const struct clipper *clipper = clipper_stage( stage );
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ const float *pos0 = v0->clip;
+ const float *pos1 = v1->clip;
+ float t0 = 0.0F;
+ float t1 = 0.0F;
+ struct prim_header newprim;
+
+ while (clipmask) {
+ const unsigned plane_idx = ffs(clipmask)-1;
+ const float *plane = clipper->plane[plane_idx];
+ const float dp0 = dot4( pos0, plane );
+ const float dp1 = dot4( pos1, plane );
+
+ if (dp1 < 0.0F) {
+ float t = dp1 / (dp1 - dp0);
+ t1 = MAX2(t1, t);
+ }
+
+ if (dp0 < 0.0F) {
+ float t = dp0 / (dp0 - dp1);
+ t0 = MAX2(t0, t);
+ }
+
+ if (t0 + t1 >= 1.0F)
+ return; /* discard */
+
+ clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */
+ }
+
+ if (v0->clipmask) {
+ interp( clipper, stage->tmp[0], t0, v0, v1 );
+
+ if (clipper->flat)
+ copy_colors(stage, stage->tmp[0], v0);
+
+ newprim.v[0] = stage->tmp[0];
+ }
+ else {
+ newprim.v[0] = v0;
+ }
+
+ if (v1->clipmask) {
+ interp( clipper, stage->tmp[1], t1, v1, v0 );
+ newprim.v[1] = stage->tmp[1];
+ }
+ else {
+ newprim.v[1] = v1;
+ }
+
+ stage->next->line( stage->next, &newprim );
+}
+
+
+static void
+clip_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ if (header->v[0]->clipmask == 0)
+ stage->next->point( stage->next, header );
+}
+
+
+static void
+clip_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ unsigned clipmask = (header->v[0]->clipmask |
+ header->v[1]->clipmask);
+
+ if (clipmask == 0) {
+ /* no clipping needed */
+ stage->next->line( stage->next, header );
+ }
+ else if ((header->v[0]->clipmask &
+ header->v[1]->clipmask) == 0) {
+ do_clip_line(stage, header, clipmask);
+ }
+ /* else, totally clipped */
+}
+
+
+static void
+clip_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ unsigned clipmask = (header->v[0]->clipmask |
+ header->v[1]->clipmask |
+ header->v[2]->clipmask);
+
+ if (clipmask == 0) {
+ /* no clipping needed */
+ stage->next->tri( stage->next, header );
+ }
+ else if ((header->v[0]->clipmask &
+ header->v[1]->clipmask &
+ header->v[2]->clipmask) == 0) {
+ do_clip_tri(stage, header, clipmask);
+ }
+}
+
+/* Update state. Could further delay this until we hit the first
+ * primitive that really requires clipping.
+ */
+static void
+clip_init_state( struct draw_stage *stage )
+{
+ struct clipper *clipper = clipper_stage( stage );
+
+ clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
+
+ if (clipper->flat) {
+ const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
+ uint i;
+
+ clipper->num_color_attribs = 0;
+ for (i = 0; i < vs->num_outputs; i++) {
+ if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
+ vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ clipper->color_attribs[clipper->num_color_attribs++] = i;
+ }
+ }
+ }
+
+ stage->tri = clip_tri;
+ stage->line = clip_line;
+}
+
+
+
+static void clip_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ clip_init_state( stage );
+ stage->tri( stage, header );
+}
+
+static void clip_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ clip_init_state( stage );
+ stage->line( stage, header );
+}
+
+
+static void clip_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->tri = clip_first_tri;
+ stage->line = clip_first_line;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void clip_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void clip_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Allocate a new clipper stage.
+ * \return pointer to new stage object
+ */
+struct draw_stage *draw_clip_stage( struct draw_context *draw )
+{
+ struct clipper *clipper = CALLOC_STRUCT(clipper);
+
+ draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 );
+
+ clipper->stage.draw = draw;
+ clipper->stage.point = clip_point;
+ clipper->stage.line = clip_first_line;
+ clipper->stage.tri = clip_first_tri;
+ clipper->stage.flush = clip_flush;
+ clipper->stage.reset_stipple_counter = clip_reset_stipple_counter;
+ clipper->stage.destroy = clip_destroy;
+
+ clipper->plane = draw->plane;
+
+ return &clipper->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_util.h"
+#include "draw_context.h"
+#include "draw_private.h"
+
+
+
+struct draw_context *draw_create( void )
+{
+ struct draw_context *draw = CALLOC_STRUCT( draw_context );
+
+#if defined(__i386__) || defined(__386__)
+ draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL;
+#else
+ draw->use_sse = FALSE;
+#endif
+
+ /* create pipeline stages */
+ draw->pipeline.wide = draw_wide_stage( draw );
+ draw->pipeline.stipple = draw_stipple_stage( draw );
+ draw->pipeline.unfilled = draw_unfilled_stage( draw );
+ draw->pipeline.twoside = draw_twoside_stage( draw );
+ draw->pipeline.offset = draw_offset_stage( draw );
+ draw->pipeline.clip = draw_clip_stage( draw );
+ draw->pipeline.flatshade = draw_flatshade_stage( draw );
+ draw->pipeline.cull = draw_cull_stage( draw );
+ draw->pipeline.validate = draw_validate_stage( draw );
+ draw->pipeline.first = draw->pipeline.validate;
+
+ ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 );
+ ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 );
+ ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 );
+ ASSIGN_4V( draw->plane[3], 0, 1, 0, 1 );
+ ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */
+ ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */
+ draw->nr_planes = 6;
+
+ /* Statically allocate maximum sized vertices for the cache - could be cleverer...
+ */
+ {
+ uint i;
+ const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f;
+ char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16);
+
+ for (i = 0; i < Elements(draw->vcache.vertex); i++)
+ draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size);
+ }
+
+ draw->shader_queue_flush = draw_vertex_shader_queue_flush;
+
+ draw->convert_wide_points = TRUE;
+ draw->convert_wide_lines = TRUE;
+
+ draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */
+
+ draw_vertex_cache_invalidate( draw );
+ draw_set_mapped_element_buffer( draw, 0, NULL );
+
+ return draw;
+}
+
+
+void draw_destroy( struct draw_context *draw )
+{
+ draw->pipeline.wide->destroy( draw->pipeline.wide );
+ draw->pipeline.stipple->destroy( draw->pipeline.stipple );
+ draw->pipeline.unfilled->destroy( draw->pipeline.unfilled );
+ draw->pipeline.twoside->destroy( draw->pipeline.twoside );
+ draw->pipeline.offset->destroy( draw->pipeline.offset );
+ draw->pipeline.clip->destroy( draw->pipeline.clip );
+ draw->pipeline.flatshade->destroy( draw->pipeline.flatshade );
+ draw->pipeline.cull->destroy( draw->pipeline.cull );
+ draw->pipeline.validate->destroy( draw->pipeline.validate );
+ if (draw->pipeline.rasterize)
+ draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
+ tgsi_exec_machine_free_data(&draw->machine);
+ align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */
+ FREE( draw );
+}
+
+
+
+void draw_flush( struct draw_context *draw )
+{
+ draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+}
+
+
+
+/**
+ * Register new primitive rasterization/rendering state.
+ * This causes the drawing pipeline to be rebuilt.
+ */
+void draw_set_rasterizer_state( struct draw_context *draw,
+ const struct pipe_rasterizer_state *raster )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->rasterizer = raster;
+}
+
+
+/**
+ * Plug in the primitive rendering/rasterization stage (which is the last
+ * stage in the drawing pipeline).
+ * This is provided by the device driver.
+ */
+void draw_set_rasterize_stage( struct draw_context *draw,
+ struct draw_stage *stage )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->pipeline.rasterize = stage;
+}
+
+
+/**
+ * Set the draw module's clipping state.
+ */
+void draw_set_clip_state( struct draw_context *draw,
+ const struct pipe_clip_state *clip )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ assert(clip->nr <= PIPE_MAX_CLIP_PLANES);
+ memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0]));
+ draw->nr_planes = 6 + clip->nr;
+}
+
+
+/**
+ * Set the draw module's viewport state.
+ */
+void draw_set_viewport_state( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport )
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->viewport = *viewport; /* struct copy */
+}
+
+
+
+void
+draw_set_vertex_buffer(struct draw_context *draw,
+ unsigned attr,
+ const struct pipe_vertex_buffer *buffer)
+{
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
+ assert(attr < PIPE_ATTRIB_MAX);
+ draw->vertex_buffer[attr] = *buffer;
+}
+
+
+void
+draw_set_vertex_element(struct draw_context *draw,
+ unsigned attr,
+ const struct pipe_vertex_element *element)
+{
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
+ assert(attr < PIPE_ATTRIB_MAX);
+ draw->vertex_element[attr] = *element;
+}
+
+
+/**
+ * Tell drawing context where to find mapped vertex buffers.
+ */
+void
+draw_set_mapped_vertex_buffer(struct draw_context *draw,
+ unsigned attr, const void *buffer)
+{
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
+ draw->user.vbuffer[attr] = buffer;
+}
+
+
+void
+draw_set_mapped_constant_buffer(struct draw_context *draw,
+ const void *buffer)
+{
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ );
+ draw->user.constants = buffer;
+}
+
+
+/**
+ * Tells the draw module whether to convert wide points (size != 1)
+ * into triangles.
+ */
+void
+draw_convert_wide_points(struct draw_context *draw, boolean enable)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->convert_wide_points = enable;
+}
+
+
+/**
+ * Tells the draw module whether to convert wide lines (width != 1)
+ * into triangles.
+ */
+void
+draw_convert_wide_lines(struct draw_context *draw, boolean enable)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->convert_wide_lines = enable;
+}
+
+
+/**
+ * Allocate space for temporary post-transform vertices, such as for clipping.
+ */
+void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr )
+{
+ assert(!stage->tmp);
+
+ stage->nr_tmps = nr;
+
+ if (nr) {
+ ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr );
+ unsigned i;
+
+ stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr );
+
+ for (i = 0; i < nr; i++)
+ stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE);
+ }
+}
+
+
+void draw_free_temp_verts( struct draw_stage *stage )
+{
+ if (stage->tmp) {
+ FREE( stage->tmp[0] );
+ FREE( stage->tmp );
+ stage->tmp = NULL;
+ }
+}
+
+
+boolean draw_use_sse(struct draw_context *draw)
+{
+ return (boolean) draw->use_sse;
+}
+
+
+void draw_reset_vertex_ids(struct draw_context *draw)
+{
+ struct draw_stage *stage = draw->pipeline.first;
+
+ while (stage) {
+ unsigned i;
+
+ for (i = 0; i < stage->nr_tmps; i++)
+ stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID;
+
+ stage = stage->next;
+ }
+
+ draw_vertex_cache_reset_vertex_ids(draw);
+}
--- /dev/null
+
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Public interface into the drawing module.
+ */
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef DRAW_CONTEXT_H
+#define DRAW_CONTEXT_H
+
+
+#include "pipe/p_state.h"
+
+
+struct vertex_buffer;
+struct vertex_info;
+struct draw_context;
+struct draw_stage;
+struct draw_vertex_shader;
+
+
+/**
+ * Clipmask flags
+ */
+/*@{*/
+#define CLIP_RIGHT_BIT 0x01
+#define CLIP_LEFT_BIT 0x02
+#define CLIP_TOP_BIT 0x04
+#define CLIP_BOTTOM_BIT 0x08
+#define CLIP_NEAR_BIT 0x10
+#define CLIP_FAR_BIT 0x20
+/*@}*/
+
+/**
+ * Bitshift for each clip flag
+ */
+/*@{*/
+#define CLIP_RIGHT_SHIFT 0
+#define CLIP_LEFT_SHIFT 1
+#define CLIP_TOP_SHIFT 2
+#define CLIP_BOTTOM_SHIFT 3
+#define CLIP_NEAR_SHIFT 4
+#define CLIP_FAR_SHIFT 5
+/*@}*/
+
+
+struct draw_context *draw_create( void );
+
+void draw_destroy( struct draw_context *draw );
+
+void draw_set_viewport_state( struct draw_context *draw,
+ const struct pipe_viewport_state *viewport );
+
+void draw_set_clip_state( struct draw_context *pipe,
+ const struct pipe_clip_state *clip );
+
+void draw_set_rasterizer_state( struct draw_context *draw,
+ const struct pipe_rasterizer_state *raster );
+
+void draw_set_rasterize_stage( struct draw_context *draw,
+ struct draw_stage *stage );
+
+void draw_convert_wide_points(struct draw_context *draw, boolean enable);
+
+void draw_convert_wide_lines(struct draw_context *draw, boolean enable);
+
+
+struct draw_vertex_shader *
+draw_create_vertex_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader);
+void draw_bind_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs);
+void draw_delete_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs);
+
+boolean draw_use_sse(struct draw_context *draw);
+
+void draw_set_vertex_buffer(struct draw_context *draw,
+ unsigned attr,
+ const struct pipe_vertex_buffer *buffer);
+
+void draw_set_vertex_element(struct draw_context *draw,
+ unsigned attr,
+ const struct pipe_vertex_element *element);
+
+void draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize, void *elements );
+
+void draw_set_mapped_vertex_buffer(struct draw_context *draw,
+ unsigned attr, const void *buffer);
+
+void draw_set_mapped_constant_buffer(struct draw_context *draw,
+ const void *buffer);
+
+
+/***********************************************************************
+ * draw_prim.c
+ */
+
+void draw_arrays(struct draw_context *draw, unsigned prim,
+ unsigned start, unsigned count);
+
+void draw_flush(struct draw_context *draw);
+
+/***********************************************************************
+ * draw_debug.c
+ */
+boolean draw_validate_prim( unsigned prim, unsigned length );
+unsigned draw_trim_prim( unsigned mode, unsigned count );
+
+
+
+#endif /* DRAW_CONTEXT_H */
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Drawing stage for polygon culling
+ */
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "draw_private.h"
+
+
+struct cull_stage {
+ struct draw_stage stage;
+ unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */
+};
+
+
+static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
+{
+ return (struct cull_stage *)stage;
+}
+
+
+
+
+static void cull_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ /* Window coords: */
+ const float *v0 = header->v[0]->data[0];
+ const float *v1 = header->v[1]->data[0];
+ const float *v2 = header->v[2]->data[0];
+
+ /* edge vectors e = v0 - v2, f = v1 - v2 */
+ const float ex = v0[0] - v2[0];
+ const float ey = v0[1] - v2[1];
+ const float fx = v1[0] - v2[0];
+ const float fy = v1[1] - v2[1];
+
+ /* det = cross(e,f).z */
+ header->det = ex * fy - ey * fx;
+
+ if (header->det != 0) {
+ /* if (det < 0 then Z points toward camera and triangle is
+ * counter-clockwise winding.
+ */
+ unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW;
+
+ if ((winding & cull_stage(stage)->winding) == 0) {
+ /* triangle is not culled, pass to next stage */
+ stage->next->tri( stage->next, header );
+ }
+ }
+}
+
+static void cull_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct cull_stage *cull = cull_stage(stage);
+
+ cull->winding = stage->draw->rasterizer->cull_mode;
+
+ stage->tri = cull_tri;
+ stage->tri( stage, header );
+}
+
+
+
+static void cull_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->line( stage->next, header );
+}
+
+
+static void cull_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->point( stage->next, header );
+}
+
+
+static void cull_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->tri = cull_first_tri;
+ stage->next->flush( stage->next, flags );
+}
+
+static void cull_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void cull_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create a new polygon culling stage.
+ */
+struct draw_stage *draw_cull_stage( struct draw_context *draw )
+{
+ struct cull_stage *cull = CALLOC_STRUCT(cull_stage);
+
+ draw_alloc_temp_verts( &cull->stage, 0 );
+
+ cull->stage.draw = draw;
+ cull->stage.next = NULL;
+ cull->stage.point = cull_point;
+ cull->stage.line = cull_line;
+ cull->stage.tri = cull_first_tri;
+ cull->stage.flush = cull_flush;
+ cull->stage.reset_stipple_counter = cull_reset_stipple_counter;
+ cull->stage.destroy = cull_destroy;
+
+ return &cull->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+
+static void
+draw_prim_info(unsigned prim, unsigned *first, unsigned *incr)
+{
+ assert(prim >= PIPE_PRIM_POINTS);
+ assert(prim <= PIPE_PRIM_POLYGON);
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ *first = 1;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_LINES:
+ *first = 2;
+ *incr = 2;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ *first = 2;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_LINE_LOOP:
+ *first = 2;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ *first = 3;
+ *incr = 3;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ *first = 3;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ case PIPE_PRIM_POLYGON:
+ *first = 3;
+ *incr = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ *first = 4;
+ *incr = 4;
+ break;
+ case PIPE_PRIM_QUAD_STRIP:
+ *first = 4;
+ *incr = 2;
+ break;
+ default:
+ assert(0);
+ *first = 1;
+ *incr = 1;
+ break;
+ }
+}
+
+
+unsigned
+draw_trim_prim( unsigned mode, unsigned count )
+{
+ unsigned length, first, incr;
+
+ draw_prim_info( mode, &first, &incr );
+
+ if (count < first)
+ length = 0;
+ else
+ length = count - (count - first) % incr;
+
+ return length;
+}
+
+
+boolean
+draw_validate_prim( unsigned mode, unsigned count )
+{
+ return (count > 0 &&
+ count == draw_trim_prim( mode, count ));
+}
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+
+
+/** subclass of draw_stage */
+struct flat_stage
+{
+ struct draw_stage stage;
+
+ uint num_color_attribs;
+ uint color_attribs[4]; /* front/back primary/secondary colors */
+};
+
+
+static INLINE struct flat_stage *
+flat_stage(struct draw_stage *stage)
+{
+ return (struct flat_stage *) stage;
+}
+
+
+/** Copy all the color attributes from 'src' vertex to 'dst' vertex */
+static INLINE void copy_colors( struct draw_stage *stage,
+ struct vertex_header *dst,
+ const struct vertex_header *src )
+{
+ const struct flat_stage *flat = flat_stage(stage);
+ uint i;
+ for (i = 0; i < flat->num_color_attribs; i++) {
+ const uint attr = flat->color_attribs[i];
+ COPY_4FV(dst->data[attr], src->data[attr]);
+ }
+}
+
+
+/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */
+static INLINE void copy_colors2( struct draw_stage *stage,
+ struct vertex_header *dst0,
+ struct vertex_header *dst1,
+ const struct vertex_header *src )
+{
+ const struct flat_stage *flat = flat_stage(stage);
+ uint i;
+ for (i = 0; i < flat->num_color_attribs; i++) {
+ const uint attr = flat->color_attribs[i];
+ COPY_4FV(dst0->data[attr], src->data[attr]);
+ COPY_4FV(dst1->data[attr], src->data[attr]);
+ }
+}
+
+
+/**
+ * Flatshade tri. Required for clipping and when unfilled tris are
+ * active, otherwise handled by hardware.
+ */
+static void flatshade_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.edgeflags = header->edgeflags;
+ tmp.v[0] = dup_vert(stage, header->v[0], 0);
+ tmp.v[1] = dup_vert(stage, header->v[1], 1);
+ tmp.v[2] = header->v[2];
+
+ copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]);
+
+ stage->next->tri( stage->next, &tmp );
+}
+
+
+/**
+ * Flatshade line. Required for clipping.
+ */
+static void flatshade_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.v[0] = dup_vert(stage, header->v[0], 0);
+ tmp.v[1] = header->v[1];
+
+ copy_colors(stage, tmp.v[0], tmp.v[1]);
+
+ stage->next->line( stage->next, &tmp );
+}
+
+
+static void flatshade_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->point( stage->next, header );
+}
+
+
+static void flatshade_init_state( struct draw_stage *stage )
+{
+ struct flat_stage *flat = flat_stage(stage);
+ const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
+ uint i;
+
+ /* Find which vertex shader outputs are colors, make a list */
+ flat->num_color_attribs = 0;
+ for (i = 0; i < vs->num_outputs; i++) {
+ if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
+ vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ flat->color_attribs[flat->num_color_attribs++] = i;
+ }
+ }
+
+ stage->line = flatshade_line;
+ stage->tri = flatshade_tri;
+}
+
+static void flatshade_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ flatshade_init_state( stage );
+ stage->tri( stage, header );
+}
+
+static void flatshade_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ flatshade_init_state( stage );
+ stage->line( stage, header );
+}
+
+
+static void flatshade_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->tri = flatshade_first_tri;
+ stage->line = flatshade_first_line;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void flatshade_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void flatshade_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create flatshading drawing stage.
+ */
+struct draw_stage *draw_flatshade_stage( struct draw_context *draw )
+{
+ struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage);
+
+ draw_alloc_temp_verts( &flatshade->stage, 2 );
+
+ flatshade->stage.draw = draw;
+ flatshade->stage.next = NULL;
+ flatshade->stage.point = flatshade_point;
+ flatshade->stage.line = flatshade_first_line;
+ flatshade->stage.tri = flatshade_first_tri;
+ flatshade->stage.flush = flatshade_flush;
+ flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter;
+ flatshade->stage.destroy = flatshade_destroy;
+
+ return &flatshade->stage;
+}
+
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief polygon offset state
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Brian Paul
+ */
+
+#include "pipe/p_util.h"
+#include "draw_private.h"
+
+
+
+struct offset_stage {
+ struct draw_stage stage;
+
+ float scale;
+ float units;
+};
+
+
+
+static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
+{
+ return (struct offset_stage *) stage;
+}
+
+
+
+
+
+/**
+ * Offset tri Z. Some hardware can handle this, but not usually when
+ * doing unfilled rendering.
+ */
+static void do_offset_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct offset_stage *offset = offset_stage(stage);
+ float inv_det = 1.0f / header->det;
+
+ /* Window coords:
+ */
+ float *v0 = header->v[0]->data[0];
+ float *v1 = header->v[1]->data[0];
+ float *v2 = header->v[2]->data[0];
+
+ /* edge vectors e = v0 - v2, f = v1 - v2 */
+ float ex = v0[0] - v2[0];
+ float ey = v0[1] - v2[1];
+ float ez = v0[2] - v2[2];
+ float fx = v1[0] - v2[0];
+ float fy = v1[1] - v2[1];
+ float fz = v1[2] - v2[2];
+
+ /* (a,b) = cross(e,f).xy */
+ float a = ey*fz - ez*fy;
+ float b = ez*fx - ex*fz;
+
+ float dzdx = FABSF(a * inv_det);
+ float dzdy = FABSF(b * inv_det);
+
+ float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale;
+
+ /*
+ * Note: we're applying the offset and clamping per-vertex.
+ * Ideally, the offset is applied per-fragment prior to fragment shading.
+ */
+ v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f);
+ v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f);
+ v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f);
+
+ stage->next->tri( stage->next, header );
+}
+
+
+static void offset_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.edgeflags = header->edgeflags;
+ tmp.v[0] = dup_vert(stage, header->v[0], 0);
+ tmp.v[1] = dup_vert(stage, header->v[1], 1);
+ tmp.v[2] = dup_vert(stage, header->v[2], 2);
+
+ do_offset_tri( stage, &tmp );
+}
+
+
+static void offset_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct offset_stage *offset = offset_stage(stage);
+ float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */
+
+ offset->units = stage->draw->rasterizer->offset_units * mrd;
+ offset->scale = stage->draw->rasterizer->offset_scale;
+
+ stage->tri = offset_tri;
+ stage->tri( stage, header );
+}
+
+
+static void offset_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->line( stage->next, header );
+}
+
+
+static void offset_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->point( stage->next, header );
+}
+
+
+static void offset_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->tri = offset_first_tri;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void offset_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void offset_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create polygon offset drawing stage.
+ */
+struct draw_stage *draw_offset_stage( struct draw_context *draw )
+{
+ struct offset_stage *offset = CALLOC_STRUCT(offset_stage);
+
+ draw_alloc_temp_verts( &offset->stage, 3 );
+
+ offset->stage.draw = draw;
+ offset->stage.next = NULL;
+ offset->stage.point = offset_point;
+ offset->stage.line = offset_line;
+ offset->stage.tri = offset_first_tri;
+ offset->stage.flush = offset_flush;
+ offset->stage.reset_stipple_counter = offset_reset_stipple_counter;
+ offset->stage.destroy = offset_destroy;
+
+ return &offset->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_debug.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+
+#define RP_NONE 0
+#define RP_POINT 1
+#define RP_LINE 2
+#define RP_TRI 3
+
+
+static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = {
+ RP_POINT,
+ RP_LINE,
+ RP_LINE,
+ RP_LINE,
+ RP_TRI,
+ RP_TRI,
+ RP_TRI,
+ RP_TRI,
+ RP_TRI,
+ RP_TRI
+};
+
+
+static void draw_prim_queue_flush( struct draw_context *draw )
+{
+ unsigned i;
+
+ if (0)
+ debug_printf("Flushing with %d prims, %d verts\n",
+ draw->pq.queue_nr, draw->vs.queue_nr);
+
+ assert (draw->pq.queue_nr != 0);
+
+ /* NOTE: we cannot save draw->pipeline->first in a local var because
+ * draw->pipeline->first is often changed by the first call to tri(),
+ * line(), etc.
+ */
+ if (draw->rasterizer->line_stipple_enable) {
+ switch (draw->reduced_prim) {
+ case RP_TRI:
+ for (i = 0; i < draw->pq.queue_nr; i++) {
+ if (draw->pq.queue[i].reset_line_stipple)
+ draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
+
+ draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
+ }
+ break;
+ case RP_LINE:
+ for (i = 0; i < draw->pq.queue_nr; i++) {
+ if (draw->pq.queue[i].reset_line_stipple)
+ draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
+
+ draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
+ }
+ break;
+ case RP_POINT:
+ draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
+ }
+ }
+ else {
+ switch (draw->reduced_prim) {
+ case RP_TRI:
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
+ case RP_LINE:
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
+ case RP_POINT:
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
+ }
+ }
+
+ draw->pq.queue_nr = 0;
+ draw_vertex_cache_unreference( draw );
+}
+
+
+
+void draw_do_flush( struct draw_context *draw, unsigned flags )
+{
+ if (0)
+ debug_printf("Flushing with %d verts, %d prims\n",
+ draw->vs.queue_nr,
+ draw->pq.queue_nr );
+
+
+ if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
+ if (draw->vs.queue_nr)
+ (*draw->shader_queue_flush)(draw);
+
+ if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
+ if (draw->pq.queue_nr)
+ draw_prim_queue_flush(draw);
+
+ if (flags >= DRAW_FLUSH_VERTEX_CACHE) {
+ draw_vertex_cache_invalidate(draw);
+
+ if (flags >= DRAW_FLUSH_STATE_CHANGE) {
+ draw->pipeline.first->flush( draw->pipeline.first, flags );
+ draw->pipeline.first = draw->pipeline.validate;
+ draw->reduced_prim = ~0;
+ }
+ }
+ }
+ }
+}
+
+
+
+/* Return a pointer to a freshly queued primitive header. Ensure that
+ * there is room in the vertex cache for a maximum of "nr_verts" new
+ * vertices. Flush primitive and/or vertex queues if necessary to
+ * make space.
+ */
+static struct prim_header *get_queued_prim( struct draw_context *draw,
+ unsigned nr_verts )
+{
+ if (!draw_vertex_cache_check_space( draw, nr_verts )) {
+// debug_printf("v");
+ draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE );
+ }
+ else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) {
+// debug_printf("p");
+ draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE );
+ }
+
+ assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH);
+
+ return &draw->pq.queue[draw->pq.queue_nr++];
+}
+
+
+
+/**
+ * Add a point to the primitive queue.
+ * \param i0 index into user's vertex arrays
+ */
+static void do_point( struct draw_context *draw,
+ unsigned i0 )
+{
+ struct prim_header *prim = get_queued_prim( draw, 1 );
+
+ prim->reset_line_stipple = 0;
+ prim->edgeflags = 1;
+ prim->pad = 0;
+ prim->v[0] = draw->vcache.get_vertex( draw, i0 );
+}
+
+
+/**
+ * Add a line to the primitive queue.
+ * \param i0 index into user's vertex arrays
+ * \param i1 index into user's vertex arrays
+ */
+static void do_line( struct draw_context *draw,
+ boolean reset_stipple,
+ unsigned i0,
+ unsigned i1 )
+{
+ struct prim_header *prim = get_queued_prim( draw, 2 );
+
+ prim->reset_line_stipple = reset_stipple;
+ prim->edgeflags = 1;
+ prim->pad = 0;
+ prim->v[0] = draw->vcache.get_vertex( draw, i0 );
+ prim->v[1] = draw->vcache.get_vertex( draw, i1 );
+}
+
+/**
+ * Add a triangle to the primitive queue.
+ */
+static void do_triangle( struct draw_context *draw,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
+{
+ struct prim_header *prim = get_queued_prim( draw, 3 );
+
+ prim->reset_line_stipple = 1;
+ prim->edgeflags = ~0;
+ prim->pad = 0;
+ prim->v[0] = draw->vcache.get_vertex( draw, i0 );
+ prim->v[1] = draw->vcache.get_vertex( draw, i1 );
+ prim->v[2] = draw->vcache.get_vertex( draw, i2 );
+}
+
+static void do_ef_triangle( struct draw_context *draw,
+ boolean reset_stipple,
+ unsigned ef_mask,
+ unsigned i0,
+ unsigned i1,
+ unsigned i2 )
+{
+ struct prim_header *prim = get_queued_prim( draw, 3 );
+ struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 );
+ struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 );
+ struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 );
+
+ prim->reset_line_stipple = reset_stipple;
+
+ prim->edgeflags = ef_mask & ((v0->edgeflag << 0) |
+ (v1->edgeflag << 1) |
+ (v2->edgeflag << 2));
+ prim->pad = 0;
+ prim->v[0] = v0;
+ prim->v[1] = v1;
+ prim->v[2] = v2;
+}
+
+
+static void do_ef_quad( struct draw_context *draw,
+ unsigned v0,
+ unsigned v1,
+ unsigned v2,
+ unsigned v3 )
+{
+ const unsigned omitEdge2 = ~(1 << 1);
+ const unsigned omitEdge3 = ~(1 << 2);
+ do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 );
+ do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 );
+}
+
+static void do_quad( struct draw_context *draw,
+ unsigned v0,
+ unsigned v1,
+ unsigned v2,
+ unsigned v3 )
+{
+ do_triangle( draw, v0, v1, v3 );
+ do_triangle( draw, v1, v2, v3 );
+}
+
+
+/**
+ * Main entrypoint to draw some number of points/lines/triangles
+ */
+static void
+draw_prim( struct draw_context *draw,
+ unsigned prim, unsigned start, unsigned count )
+{
+ unsigned i;
+ boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL);
+
+// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
+
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ for (i = 0; i < count; i ++) {
+ do_point( draw,
+ start + i );
+ }
+ break;
+
+ case PIPE_PRIM_LINES:
+ for (i = 0; i+1 < count; i += 2) {
+ do_line( draw,
+ TRUE,
+ start + i + 0,
+ start + i + 1);
+ }
+ break;
+
+ case PIPE_PRIM_LINE_LOOP:
+ if (count >= 2) {
+ for (i = 1; i < count; i++) {
+ do_line( draw,
+ i == 1, /* XXX: only if vb not split */
+ start + i - 1,
+ start + i );
+ }
+
+ do_line( draw,
+ 0,
+ start + count - 1,
+ start + 0 );
+ }
+ break;
+
+ case PIPE_PRIM_LINE_STRIP:
+ for (i = 1; i < count; i++) {
+ do_line( draw,
+ i == 1,
+ start + i - 1,
+ start + i );
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLES:
+ if (unfilled) {
+ for (i = 0; i+2 < count; i += 3) {
+ do_ef_triangle( draw,
+ 1,
+ ~0,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2 );
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i += 3) {
+ do_triangle( draw,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2 );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ for (i = 0; i+2 < count; i++) {
+ if (i & 1) {
+ do_triangle( draw,
+ start + i + 1,
+ start + i + 0,
+ start + i + 2 );
+ }
+ else {
+ do_triangle( draw,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2 );
+ }
+ }
+ break;
+
+ case PIPE_PRIM_TRIANGLE_FAN:
+ if (count >= 3) {
+ for (i = 0; i+2 < count; i++) {
+ do_triangle( draw,
+ start + 0,
+ start + i + 1,
+ start + i + 2 );
+ }
+ }
+ break;
+
+
+ case PIPE_PRIM_QUADS:
+ if (unfilled) {
+ for (i = 0; i+3 < count; i += 4) {
+ do_ef_quad( draw,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2,
+ start + i + 3);
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 4) {
+ do_quad( draw,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2,
+ start + i + 3);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_QUAD_STRIP:
+ if (unfilled) {
+ for (i = 0; i+3 < count; i += 2) {
+ do_ef_quad( draw,
+ start + i + 2,
+ start + i + 0,
+ start + i + 1,
+ start + i + 3);
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 2) {
+ do_quad( draw,
+ start + i + 2,
+ start + i + 0,
+ start + i + 1,
+ start + i + 3);
+ }
+ }
+ break;
+
+ case PIPE_PRIM_POLYGON:
+ if (unfilled) {
+ unsigned ef_mask = (1<<2) | (1<<0);
+
+ for (i = 0; i+2 < count; i++) {
+
+ if (i + 3 >= count)
+ ef_mask |= (1<<1);
+
+ do_ef_triangle( draw,
+ i == 0,
+ ef_mask,
+ start + i + 1,
+ start + i + 2,
+ start + 0);
+
+ ef_mask &= ~(1<<2);
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i++) {
+ do_triangle( draw,
+ start + i + 1,
+ start + i + 2,
+ start + 0);
+ }
+ }
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+
+
+
+/**
+ * Draw vertex arrays
+ * This is the main entrypoint into the drawing module.
+ * \param prim one of PIPE_PRIM_x
+ * \param start index of first vertex to draw
+ * \param count number of vertices to draw
+ */
+void
+draw_arrays(struct draw_context *draw, unsigned prim,
+ unsigned start, unsigned count)
+{
+ if (reduced_prim[prim] != draw->reduced_prim) {
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+ draw->reduced_prim = reduced_prim[prim];
+ }
+
+ /* drawing done here: */
+ draw_prim(draw, prim, start, count);
+}
+
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Private data structures, etc for the draw module.
+ */
+
+
+/**
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+
+#ifndef DRAW_PRIVATE_H
+#define DRAW_PRIVATE_H
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+
+#include "x86/rtasm/x86sse.h"
+#include "tgsi/exec/tgsi_exec.h"
+
+
+struct gallivm_prog;
+struct gallivm_cpu_engine;
+
+/**
+ * Basic vertex info.
+ * Carry some useful information around with the vertices in the prim pipe.
+ */
+struct vertex_header {
+ unsigned clipmask:12;
+ unsigned edgeflag:1;
+ unsigned pad:3;
+ unsigned vertex_id:16;
+
+ float clip[4];
+
+ float data[][4]; /* Note variable size */
+};
+
+/* NOTE: It should match vertex_id size above */
+#define UNDEFINED_VERTEX_ID 0xffff
+
+/* XXX This is too large */
+#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float))
+
+
+
+/**
+ * Basic info for a point/line/triangle primitive.
+ */
+struct prim_header {
+ float det; /**< front/back face determinant */
+ unsigned reset_line_stipple:1;
+ unsigned edgeflags:3;
+ unsigned pad:28;
+ struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */
+};
+
+
+
+struct draw_context;
+
+/**
+ * Base class for all primitive drawing stages.
+ */
+struct draw_stage
+{
+ struct draw_context *draw; /**< parent context */
+
+ struct draw_stage *next; /**< next stage in pipeline */
+
+ struct vertex_header **tmp; /**< temp vert storage, such as for clipping */
+ unsigned nr_tmps;
+
+ void (*point)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*line)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*tri)( struct draw_stage *,
+ struct prim_header * );
+
+ void (*flush)( struct draw_stage *,
+ unsigned flags );
+
+ void (*reset_stipple_counter)( struct draw_stage * );
+
+ void (*destroy)( struct draw_stage * );
+};
+
+
+#define PRIM_QUEUE_LENGTH 16
+#define VCACHE_SIZE 32
+#define VCACHE_OVERFLOW 4
+#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */
+
+/**
+ * Private version of the compiled vertex_shader
+ */
+struct draw_vertex_shader {
+ const struct pipe_shader_state *state;
+#if defined(__i386__) || defined(__386__)
+ struct x86_function sse2_program;
+#endif
+#ifdef MESA_LLVM
+ struct gallivm_prog *llvm_prog;
+#endif
+};
+
+
+/* Internal function for vertex fetch.
+ */
+typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*full_fetch_func)( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count );
+
+
+
+/**
+ * Private context for the drawing module.
+ */
+struct draw_context
+{
+ /** Drawing/primitive pipeline stages */
+ struct {
+ struct draw_stage *first; /**< one of the following */
+
+ struct draw_stage *validate;
+
+ /* stages (in logical order) */
+ struct draw_stage *flatshade;
+ struct draw_stage *clip;
+ struct draw_stage *cull;
+ struct draw_stage *twoside;
+ struct draw_stage *offset;
+ struct draw_stage *unfilled;
+ struct draw_stage *stipple;
+ struct draw_stage *wide;
+ struct draw_stage *rasterize;
+ } pipeline;
+
+ /* pipe state that we need: */
+ const struct pipe_rasterizer_state *rasterizer;
+ struct pipe_viewport_state viewport;
+ struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
+ struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
+ const struct draw_vertex_shader *vertex_shader;
+
+ uint num_vs_outputs; /**< convenience, from vertex_shader */
+
+ /* user-space vertex data, buffers */
+ struct {
+ /** vertex element/index buffer (ex: glDrawElements) */
+ const void *elts;
+ /** bytes per index (0, 1, 2 or 4) */
+ unsigned eltSize;
+
+ /** vertex arrays */
+ const void *vbuffer[PIPE_ATTRIB_MAX];
+
+ /** constant buffer (for vertex shader) */
+ const void *constants;
+ } user;
+
+ /* Clip derived state:
+ */
+ float plane[12][4];
+ unsigned nr_planes;
+
+ boolean convert_wide_points; /**< convert wide points to tris? */
+ boolean convert_wide_lines; /**< convert side lines to tris? */
+
+ unsigned reduced_prim;
+
+ /** TGSI program interpreter runtime state */
+ struct tgsi_exec_machine machine;
+
+ /* Vertex fetch internal state
+ */
+ struct {
+ const ubyte *src_ptr[PIPE_ATTRIB_MAX];
+ unsigned pitch[PIPE_ATTRIB_MAX];
+ fetch_func fetch[PIPE_ATTRIB_MAX];
+ unsigned nr_attrs;
+ full_fetch_func fetch_func;
+ } vertex_fetch;
+
+ /* Post-tnl vertex cache:
+ */
+ struct {
+ unsigned referenced; /**< bitfield */
+ unsigned idx[VCACHE_SIZE + VCACHE_OVERFLOW];
+ struct vertex_header *vertex[VCACHE_SIZE + VCACHE_OVERFLOW];
+ unsigned overflow;
+
+ /** To find space in the vertex cache: */
+ struct vertex_header *(*get_vertex)( struct draw_context *draw,
+ unsigned i );
+ } vcache;
+
+ /* Vertex shader queue:
+ */
+ struct {
+ struct {
+ unsigned elt; /**< index into the user's vertex arrays */
+ struct vertex_header *dest; /**< points into vcache.vertex[] array */
+ } queue[VS_QUEUE_LENGTH];
+ unsigned queue_nr;
+ } vs;
+
+ /**
+ * Run the vertex shader on all vertices in the vertex queue.
+ */
+ void (*shader_queue_flush)(struct draw_context *draw);
+
+ /* Prim pipeline queue:
+ */
+ struct {
+ /* Need to queue up primitives until their vertices have been
+ * transformed by a vs queue flush.
+ */
+ struct prim_header queue[PRIM_QUEUE_LENGTH];
+ unsigned queue_nr;
+ } pq;
+
+ int use_sse : 1;
+#ifdef MESA_LLVM
+ struct gallivm_cpu_engine *engine;
+#endif
+
+ void *driver_private;
+};
+
+
+
+extern struct draw_stage *draw_unfilled_stage( struct draw_context *context );
+extern struct draw_stage *draw_twoside_stage( struct draw_context *context );
+extern struct draw_stage *draw_offset_stage( struct draw_context *context );
+extern struct draw_stage *draw_clip_stage( struct draw_context *context );
+extern struct draw_stage *draw_flatshade_stage( struct draw_context *context );
+extern struct draw_stage *draw_cull_stage( struct draw_context *context );
+extern struct draw_stage *draw_stipple_stage( struct draw_context *context );
+extern struct draw_stage *draw_wide_stage( struct draw_context *context );
+extern struct draw_stage *draw_validate_stage( struct draw_context *context );
+
+
+extern void draw_free_temp_verts( struct draw_stage *stage );
+
+extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr );
+
+extern void draw_reset_vertex_ids( struct draw_context *draw );
+
+
+extern int draw_vertex_cache_check_space( struct draw_context *draw,
+ unsigned nr_verts );
+
+extern void draw_vertex_cache_invalidate( struct draw_context *draw );
+extern void draw_vertex_cache_unreference( struct draw_context *draw );
+extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw );
+
+
+extern void draw_vertex_shader_queue_flush( struct draw_context *draw );
+#ifdef MESA_LLVM
+extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw );
+#endif
+
+struct tgsi_exec_machine;
+
+extern void draw_update_vertex_fetch( struct draw_context *draw );
+
+
+#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
+#define DRAW_FLUSH_PRIM_QUEUE 0x2
+#define DRAW_FLUSH_VERTEX_CACHE 0x4
+#define DRAW_FLUSH_STATE_CHANGE 0x8
+#define DRAW_FLUSH_BACKEND 0x10
+
+
+void draw_do_flush( struct draw_context *draw, unsigned flags );
+
+
+
+/**
+ * Get a writeable copy of a vertex.
+ * \param stage drawing stage info
+ * \param vert the vertex to copy (source)
+ * \param idx index into stage's tmp[] array to put the copy (dest)
+ * \return pointer to the copied vertex
+ */
+static INLINE struct vertex_header *
+dup_vert( struct draw_stage *stage,
+ const struct vertex_header *vert,
+ unsigned idx )
+{
+ struct vertex_header *tmp = stage->tmp[idx];
+ const uint vsize = sizeof(struct vertex_header)
+ + stage->draw->num_vs_outputs * 4 * sizeof(float);
+ memcpy(tmp, vert, vsize);
+ tmp->vertex_id = UNDEFINED_VERTEX_ID;
+ return tmp;
+}
+
+static INLINE float
+dot4(const float *a, const float *b)
+{
+ float result = (a[0]*b[0] +
+ a[1]*b[1] +
+ a[2]*b[2] +
+ a[3]*b[3]);
+
+ return result;
+}
+
+#endif /* DRAW_PRIVATE_H */
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/* Implement line stipple by cutting lines up into smaller lines.
+ * There are hundreds of ways to implement line stipple, this is one
+ * choice that should work in all situations, requires no state
+ * manipulations, but with a penalty in terms of large amounts of
+ * generated geometry.
+ */
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+
+
+/** Subclass of draw_stage */
+struct stipple_stage {
+ struct draw_stage stage;
+ float counter;
+ uint pattern;
+ uint factor;
+};
+
+
+static INLINE struct stipple_stage *
+stipple_stage(struct draw_stage *stage)
+{
+ return (struct stipple_stage *) stage;
+}
+
+
+/**
+ * Compute interpolated vertex attributes for 'dst' at position 't'
+ * between 'v0' and 'v1'.
+ * XXX using linear interpolation for all attribs at this time.
+ */
+static void
+screen_interp( struct draw_context *draw,
+ struct vertex_header *dst,
+ float t,
+ const struct vertex_header *v0,
+ const struct vertex_header *v1 )
+{
+ uint attr;
+ for (attr = 0; attr < draw->num_vs_outputs; attr++) {
+ const float *val0 = v0->data[attr];
+ const float *val1 = v1->data[attr];
+ float *newv = dst->data[attr];
+ uint i;
+ for (i = 0; i < 4; i++) {
+ newv[i] = val0[i] + t * (val1[i] - val0[i]);
+ }
+ }
+}
+
+
+static void
+emit_segment(struct draw_stage *stage, struct prim_header *header,
+ float t0, float t1)
+{
+ struct vertex_header *v0new = dup_vert(stage, header->v[0], 0);
+ struct vertex_header *v1new = dup_vert(stage, header->v[1], 1);
+ struct prim_header newprim = *header;
+
+ if (t0 > 0.0) {
+ screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] );
+ newprim.v[0] = v0new;
+ }
+
+ if (t1 < 1.0) {
+ screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] );
+ newprim.v[1] = v1new;
+ }
+
+ stage->next->line( stage->next, &newprim );
+}
+
+
+static INLINE unsigned
+stipple_test(int counter, ushort pattern, int factor)
+{
+ int b = (counter / factor) & 0xf;
+ return (1 << b) & pattern;
+}
+
+
+static void
+stipple_line(struct draw_stage *stage, struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ const float *pos0 = v0->data[0];
+ const float *pos1 = v1->data[0];
+ float start = 0;
+ int state = 0;
+
+ float x0 = pos0[0];
+ float x1 = pos1[0];
+ float y0 = pos0[1];
+ float y1 = pos1[1];
+
+ float dx = x0 > x1 ? x0 - x1 : x1 - x0;
+ float dy = y0 > y1 ? y0 - y1 : y1 - y0;
+
+ float length = MAX2(dx, dy);
+ int i;
+
+ /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table.
+ */
+ for (i = 0; i < length; i++) {
+ int result = stipple_test( (int) stipple->counter+i,
+ (ushort) stipple->pattern, stipple->factor );
+ if (result != state) {
+ /* changing from "off" to "on" or vice versa */
+ if (state) {
+ if (start != i) {
+ /* finishing an "on" segment */
+ emit_segment( stage, header, start / length, i / length );
+ }
+ }
+ else {
+ /* starting an "on" segment */
+ start = (float) i;
+ }
+ state = result;
+ }
+ }
+
+ if (state && start < length)
+ emit_segment( stage, header, start / length, 1.0 );
+
+ stipple->counter += length;
+}
+
+
+static void
+reset_stipple_counter(struct draw_stage *stage)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ stipple->counter = 0;
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void
+stipple_first_line(struct draw_stage *stage,
+ struct prim_header *header)
+{
+ struct stipple_stage *stipple = stipple_stage(stage);
+ struct draw_context *draw = stage->draw;
+
+ stipple->pattern = draw->rasterizer->line_stipple_pattern;
+ stipple->factor = draw->rasterizer->line_stipple_factor + 1;
+
+ stage->line = stipple_line;
+ stage->line( stage, header );
+}
+
+
+static void
+stipple_flush(struct draw_stage *stage, unsigned flags)
+{
+ stage->line = stipple_first_line;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void
+passthrough_point(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->point( stage->next, header );
+}
+
+
+static void
+passthrough_tri(struct draw_stage *stage, struct prim_header *header)
+{
+ stage->next->tri(stage->next, header);
+}
+
+
+static void
+stipple_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create line stippler stage
+ */
+struct draw_stage *draw_stipple_stage( struct draw_context *draw )
+{
+ struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage);
+
+ draw_alloc_temp_verts( &stipple->stage, 2 );
+
+ stipple->stage.draw = draw;
+ stipple->stage.next = NULL;
+ stipple->stage.point = passthrough_point;
+ stipple->stage.line = stipple_first_line;
+ stipple->stage.tri = passthrough_tri;
+ stipple->stage.reset_stipple_counter = reset_stipple_counter;
+ stipple->stage.flush = stipple_flush;
+ stipple->stage.destroy = stipple_destroy;
+
+ return &stipple->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+
+
+struct twoside_stage {
+ struct draw_stage stage;
+ float sign; /**< +1 or -1 */
+ uint attrib_front0, attrib_back0;
+ uint attrib_front1, attrib_back1;
+};
+
+
+static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
+{
+ return (struct twoside_stage *)stage;
+}
+
+
+
+
+/**
+ * Copy back color(s) to front color(s).
+ */
+static INLINE struct vertex_header *
+copy_bfc( struct twoside_stage *twoside,
+ const struct vertex_header *v,
+ unsigned idx )
+{
+ struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx );
+
+ if (twoside->attrib_back0) {
+ COPY_4FV(tmp->data[twoside->attrib_front0],
+ tmp->data[twoside->attrib_back0]);
+ }
+ if (twoside->attrib_back1) {
+ COPY_4FV(tmp->data[twoside->attrib_front1],
+ tmp->data[twoside->attrib_back1]);
+ }
+
+ return tmp;
+}
+
+
+/* Twoside tri:
+ */
+static void twoside_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct twoside_stage *twoside = twoside_stage(stage);
+
+ if (header->det * twoside->sign < 0.0) {
+ /* this is a back-facing triangle */
+ struct prim_header tmp;
+
+ tmp.det = header->det;
+ tmp.edgeflags = header->edgeflags;
+ /* copy back attribs to front attribs */
+ tmp.v[0] = copy_bfc(twoside, header->v[0], 0);
+ tmp.v[1] = copy_bfc(twoside, header->v[1], 1);
+ tmp.v[2] = copy_bfc(twoside, header->v[2], 2);
+
+ stage->next->tri( stage->next, &tmp );
+ }
+ else {
+ stage->next->tri( stage->next, header );
+ }
+}
+
+
+static void twoside_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ /* pass-through */
+ stage->next->line( stage->next, header );
+}
+
+
+static void twoside_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ /* pass-through */
+ stage->next->point( stage->next, header );
+}
+
+
+static void twoside_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct twoside_stage *twoside = twoside_stage(stage);
+ const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
+ uint i;
+
+ twoside->attrib_front0 = 0;
+ twoside->attrib_front1 = 0;
+ twoside->attrib_back0 = 0;
+ twoside->attrib_back1 = 0;
+
+ /* Find which vertex shader outputs are front/back colors */
+ for (i = 0; i < vs->num_outputs; i++) {
+ if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
+ if (vs->output_semantic_index[i] == 0)
+ twoside->attrib_front0 = i;
+ else
+ twoside->attrib_front1 = i;
+ }
+ if (vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ if (vs->output_semantic_index[i] == 0)
+ twoside->attrib_back0 = i;
+ else
+ twoside->attrib_back1 = i;
+ }
+ }
+
+ if (!twoside->attrib_back0)
+ twoside->attrib_front0 = 0;
+
+ if (!twoside->attrib_back1)
+ twoside->attrib_front1 = 0;
+
+ /*
+ * We'll multiply the primitive's determinant by this sign to determine
+ * if the triangle is back-facing (negative).
+ * sign = -1 for CCW, +1 for CW
+ */
+ twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f;
+
+ stage->tri = twoside_tri;
+ stage->tri( stage, header );
+}
+
+
+static void twoside_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->tri = twoside_first_tri;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void twoside_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void twoside_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create twoside pipeline stage.
+ */
+struct draw_stage *draw_twoside_stage( struct draw_context *draw )
+{
+ struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage);
+
+ draw_alloc_temp_verts( &twoside->stage, 3 );
+
+ twoside->stage.draw = draw;
+ twoside->stage.next = NULL;
+ twoside->stage.point = twoside_point;
+ twoside->stage.line = twoside_line;
+ twoside->stage.tri = twoside_first_tri;
+ twoside->stage.flush = twoside_flush;
+ twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter;
+ twoside->stage.destroy = twoside_destroy;
+
+ return &twoside->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \brief Drawing stage for handling glPolygonMode(line/point).
+ * Convert triangles to points or lines as needed.
+ */
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "draw_private.h"
+
+
+struct unfilled_stage {
+ struct draw_stage stage;
+
+ /** [0] = front face, [1] = back face.
+ * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE,
+ * and PIPE_POLYGON_MODE_POINT,
+ */
+ unsigned mode[2];
+};
+
+
+static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
+{
+ return (struct unfilled_stage *)stage;
+}
+
+
+
+static void point( struct draw_stage *stage,
+ struct vertex_header *v0 )
+{
+ struct prim_header tmp;
+ tmp.v[0] = v0;
+ stage->next->point( stage->next, &tmp );
+}
+
+static void line( struct draw_stage *stage,
+ struct vertex_header *v0,
+ struct vertex_header *v1 )
+{
+ struct prim_header tmp;
+ tmp.v[0] = v0;
+ tmp.v[1] = v1;
+ stage->next->line( stage->next, &tmp );
+}
+
+
+static void points( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ struct vertex_header *v2 = header->v[2];
+
+ if (header->edgeflags & 0x1) point( stage, v0 );
+ if (header->edgeflags & 0x2) point( stage, v1 );
+ if (header->edgeflags & 0x4) point( stage, v2 );
+}
+
+
+static void lines( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct vertex_header *v0 = header->v[0];
+ struct vertex_header *v1 = header->v[1];
+ struct vertex_header *v2 = header->v[2];
+
+#if 0
+ assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag);
+ assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag);
+ assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag);
+#endif
+
+ if (header->edgeflags & 0x1) line( stage, v0, v1 );
+ if (header->edgeflags & 0x2) line( stage, v1, v2 );
+ if (header->edgeflags & 0x4) line( stage, v2, v0 );
+}
+
+
+/* Unfilled tri:
+ *
+ * Note edgeflags in the vertex struct is not sufficient as we will
+ * need to manipulate them when decomposing primitives???
+ */
+static void unfilled_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct unfilled_stage *unfilled = unfilled_stage(stage);
+ unsigned mode = unfilled->mode[header->det >= 0.0];
+
+ switch (mode) {
+ case PIPE_POLYGON_MODE_FILL:
+ stage->next->tri( stage->next, header );
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ lines( stage, header );
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ points( stage, header );
+ break;
+ default:
+ abort();
+ }
+}
+
+
+static void unfilled_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct unfilled_stage *unfilled = unfilled_stage(stage);
+
+ unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */
+ unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */
+
+ stage->tri = unfilled_tri;
+ stage->tri( stage, header );
+}
+
+
+static void unfilled_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->line( stage->next, header );
+}
+
+
+static void unfilled_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->point( stage->next, header );
+}
+
+
+static void unfilled_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ stage->next->flush( stage->next, flags );
+
+ stage->tri = unfilled_first_tri;
+}
+
+
+static void unfilled_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void unfilled_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+/**
+ * Create unfilled triangle stage.
+ */
+struct draw_stage *draw_unfilled_stage( struct draw_context *draw )
+{
+ struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage);
+
+ draw_alloc_temp_verts( &unfilled->stage, 0 );
+
+ unfilled->stage.draw = draw;
+ unfilled->stage.next = NULL;
+ unfilled->stage.tmp = NULL;
+ unfilled->stage.point = unfilled_point;
+ unfilled->stage.line = unfilled_line;
+ unfilled->stage.tri = unfilled_first_tri;
+ unfilled->stage.flush = unfilled_flush;
+ unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter;
+ unfilled->stage.destroy = unfilled_destroy;
+
+ return &unfilled->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "draw_private.h"
+
+
+
+
+
+/**
+ * Rebuild the rendering pipeline.
+ */
+static struct draw_stage *validate_pipeline( struct draw_stage *stage )
+{
+ struct draw_context *draw = stage->draw;
+ struct draw_stage *next = draw->pipeline.rasterize;
+ int need_det = 0;
+ int precalc_flat = 0;
+
+ /* Set the validate's next stage to the rasterize stage, so that it
+ * can be found later if needed for flushing.
+ */
+ stage->next = next;
+
+ /*
+ * NOTE: we build up the pipeline in end-to-start order.
+ *
+ * TODO: make the current primitive part of the state and build
+ * shorter pipelines for lines & points.
+ */
+
+ if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) ||
+ (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) ||
+ draw->rasterizer->point_sprite) {
+ draw->pipeline.wide->next = next;
+ next = draw->pipeline.wide;
+ }
+
+ if (draw->rasterizer->line_stipple_enable) {
+ draw->pipeline.stipple->next = next;
+ next = draw->pipeline.stipple;
+ precalc_flat = 1; /* only needed for lines really */
+ }
+
+ if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+ draw->pipeline.unfilled->next = next;
+ next = draw->pipeline.unfilled;
+ precalc_flat = 1; /* only needed for triangles really */
+ need_det = 1;
+ }
+
+ if (draw->rasterizer->flatshade && precalc_flat) {
+ draw->pipeline.flatshade->next = next;
+ next = draw->pipeline.flatshade;
+ }
+
+ if (draw->rasterizer->offset_cw ||
+ draw->rasterizer->offset_ccw) {
+ draw->pipeline.offset->next = next;
+ next = draw->pipeline.offset;
+ need_det = 1;
+ }
+
+ if (draw->rasterizer->light_twoside) {
+ draw->pipeline.twoside->next = next;
+ next = draw->pipeline.twoside;
+ need_det = 1;
+ }
+
+ /* Always run the cull stage as we calculate determinant there
+ * also.
+ *
+ * This can actually be a win as culling out the triangles can lead
+ * to less work emitting vertices, smaller vertex buffers, etc.
+ * It's difficult to say whether this will be true in general.
+ */
+ if (need_det || draw->rasterizer->cull_mode) {
+ draw->pipeline.cull->next = next;
+ next = draw->pipeline.cull;
+ }
+
+ /* Clip stage
+ */
+ if (!draw->rasterizer->bypass_clipping)
+ {
+ draw->pipeline.clip->next = next;
+ next = draw->pipeline.clip;
+ }
+
+
+ draw->pipeline.first = next;
+ return next;
+}
+
+static void validate_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->tri( pipeline, header );
+}
+
+static void validate_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->line( pipeline, header );
+}
+
+static void validate_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->point( pipeline, header );
+}
+
+static void validate_reset_stipple_counter( struct draw_stage *stage )
+{
+ struct draw_stage *pipeline = validate_pipeline( stage );
+ pipeline->reset_stipple_counter( pipeline );
+}
+
+static void validate_flush( struct draw_stage *stage,
+ unsigned flags )
+{
+ /* May need to pass a backend flush on to the rasterize stage.
+ */
+ if (stage->next)
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void validate_destroy( struct draw_stage *stage )
+{
+ FREE( stage );
+}
+
+
+/**
+ * Create validate pipeline stage.
+ */
+struct draw_stage *draw_validate_stage( struct draw_context *draw )
+{
+ struct draw_stage *stage = CALLOC_STRUCT(draw_stage);
+
+ stage->draw = draw;
+ stage->next = NULL;
+ stage->point = validate_point;
+ stage->line = validate_line;
+ stage->tri = validate_tri;
+ stage->flush = validate_flush;
+ stage->reset_stipple_counter = validate_reset_stipple_counter;
+ stage->destroy = validate_destroy;
+
+ return stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Vertex buffer drawing stage.
+ *
+ * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+
+#include "draw_vbuf.h"
+#include "draw_private.h"
+#include "draw_vertex.h"
+#include "draw_vf.h"
+
+
+/**
+ * Vertex buffer emit stage.
+ */
+struct vbuf_stage {
+ struct draw_stage stage; /**< This must be first (base class) */
+
+ struct vbuf_render *render;
+
+ const struct vertex_info *vinfo;
+
+ /** Vertex size in bytes */
+ unsigned vertex_size;
+
+ struct draw_vertex_fetch *vf;
+
+ /* FIXME: we have no guarantee that 'unsigned' is 32bit */
+
+ /** Vertices in hardware format */
+ unsigned *vertices;
+ unsigned *vertex_ptr;
+ unsigned max_vertices;
+ unsigned nr_vertices;
+
+ /** Indices */
+ ushort *indices;
+ unsigned max_indices;
+ unsigned nr_indices;
+
+ /** Pipe primitive */
+ unsigned prim;
+};
+
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct vbuf_stage *
+vbuf_stage( struct draw_stage *stage )
+{
+ assert(stage);
+ return (struct vbuf_stage *)stage;
+}
+
+
+static void vbuf_flush_indices( struct vbuf_stage *vbuf );
+static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
+static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
+
+
+static INLINE boolean
+overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
+{
+ unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
+ return (used + bytes) > bufsz;
+}
+
+
+static INLINE void
+check_space( struct vbuf_stage *vbuf, unsigned nr )
+{
+ if (vbuf->nr_vertices + nr > vbuf->max_vertices ) {
+ vbuf_flush_vertices(vbuf);
+ vbuf_alloc_vertices(vbuf);
+ }
+
+ if (vbuf->nr_indices + nr > vbuf->max_indices )
+ vbuf_flush_indices(vbuf);
+}
+
+
+#if 0
+static INLINE void
+dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
+{
+ assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
+ unsigned i, j, k;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ debug_printf("EMIT_OMIT:");
+ break;
+ case EMIT_ALL:
+ assert(i == 0);
+ assert(j == 0);
+ debug_printf("EMIT_ALL:\t");
+ for(k = 0; k < vinfo->size*4; ++k)
+ debug_printf("%02x ", *data++);
+ break;
+ case EMIT_1F:
+ debug_printf("EMIT_1F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_1F_PSIZE:
+ debug_printf("EMIT_1F_PSIZE:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_2F:
+ debug_printf("EMIT_2F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_3F:
+ debug_printf("EMIT_3F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ data += sizeof(float);
+ break;
+ case EMIT_4F:
+ debug_printf("EMIT_4F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ break;
+ case EMIT_4UB:
+ debug_printf("EMIT_4UB:\t");
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ break;
+ default:
+ assert(0);
+ }
+ debug_printf("\n");
+ }
+ debug_printf("\n");
+}
+#endif
+
+
+/**
+ * Extract the needed fields from post-transformed vertex and emit
+ * a hardware(driver) vertex.
+ * Recall that the vertices are constructed by the 'draw' module and
+ * have a couple of slots at the beginning (1-dword header, 4-dword
+ * clip pos) that we ignore here. We only use the vertex->data[] fields.
+ */
+static INLINE void
+emit_vertex( struct vbuf_stage *vbuf,
+ struct vertex_header *vertex )
+{
+#if 0
+ debug_printf("emit vertex %d to %p\n",
+ vbuf->nr_vertices, vbuf->vertex_ptr);
+#endif
+
+ if(vertex->vertex_id != UNDEFINED_VERTEX_ID) {
+ if(vertex->vertex_id < vbuf->nr_vertices)
+ return;
+ else
+ debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n",
+ vertex->vertex_id, vbuf->nr_vertices);
+ return;
+ }
+
+ vertex->vertex_id = vbuf->nr_vertices++;
+
+ if(!vbuf->vf) {
+ const struct vertex_info *vinfo = vbuf->vinfo;
+ uint i;
+ uint count = 0; /* for debug/sanity */
+
+ assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ uint j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ /* no-op */
+ break;
+ case EMIT_ALL:
+ /* just copy the whole vertex as-is to the vbuf */
+ assert(i == 0);
+ assert(j == 0);
+ memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4);
+ vbuf->vertex_ptr += vinfo->size;
+ count += vinfo->size;
+ break;
+ case EMIT_1F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ count++;
+ break;
+ case EMIT_1F_PSIZE:
+ *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size);
+ count++;
+ break;
+ case EMIT_2F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
+ count += 2;
+ break;
+ case EMIT_3F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
+ count += 3;
+ break;
+ case EMIT_4F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][3]);
+ count += 4;
+ break;
+ case EMIT_4UB:
+ *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ),
+ float_to_ubyte( vertex->data[j][1] ),
+ float_to_ubyte( vertex->data[j][0] ),
+ float_to_ubyte( vertex->data[j][3] ));
+ count += 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ assert(count == vinfo->size);
+#if 0
+ {
+ static float data[256];
+ draw_vf_emit_vertex(vbuf->vf, vertex, data);
+ if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) {
+ debug_printf("With VF:\n");
+ dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data);
+ debug_printf("Without VF:\n");
+ dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size);
+ assert(0);
+ }
+ }
+#endif
+ }
+ else {
+ draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr);
+
+ vbuf->vertex_ptr += vbuf->vertex_size/4;
+ }
+}
+
+
+static void
+vbuf_tri( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+ unsigned i;
+
+ check_space( vbuf, 3 );
+
+ for (i = 0; i < 3; i++) {
+ emit_vertex( vbuf, prim->v[i] );
+
+ vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
+ }
+}
+
+
+static void
+vbuf_line( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+ unsigned i;
+
+ check_space( vbuf, 2 );
+
+ for (i = 0; i < 2; i++) {
+ emit_vertex( vbuf, prim->v[i] );
+
+ vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id;
+ }
+}
+
+
+static void
+vbuf_point( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ check_space( vbuf, 1 );
+
+ emit_vertex( vbuf, prim->v[0] );
+
+ vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id;
+}
+
+
+/**
+ * Set the prim type for subsequent vertices.
+ * This may result in a new vertex size. The existing vbuffer (if any)
+ * will be flushed if needed and a new one allocated.
+ */
+static void
+vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim )
+{
+ const struct vertex_info *vinfo;
+ unsigned vertex_size;
+
+ assert(newprim == PIPE_PRIM_POINTS ||
+ newprim == PIPE_PRIM_LINES ||
+ newprim == PIPE_PRIM_TRIANGLES);
+
+ vbuf->prim = newprim;
+ vbuf->render->set_primitive(vbuf->render, newprim);
+
+ vinfo = vbuf->render->get_vertex_info(vbuf->render);
+ vertex_size = vinfo->size * sizeof(float);
+
+ if (vertex_size != vbuf->vertex_size)
+ vbuf_flush_vertices(vbuf);
+
+ vbuf->vinfo = vinfo;
+ vbuf->vertex_size = vertex_size;
+ if(vbuf->vf)
+ draw_vf_set_vertex_info(vbuf->vf,
+ vbuf->vinfo,
+ vbuf->stage.draw->rasterizer->point_size);
+
+ if (!vbuf->vertices)
+ vbuf_alloc_vertices(vbuf);
+}
+
+
+static void
+vbuf_first_tri( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_indices( vbuf );
+ stage->tri = vbuf_tri;
+ vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES);
+ stage->tri( stage, prim );
+}
+
+
+static void
+vbuf_first_line( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_indices( vbuf );
+ stage->line = vbuf_line;
+ vbuf_set_prim(vbuf, PIPE_PRIM_LINES);
+ stage->line( stage, prim );
+}
+
+
+static void
+vbuf_first_point( struct draw_stage *stage,
+ struct prim_header *prim )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_indices( vbuf );
+ stage->point = vbuf_point;
+ vbuf_set_prim(vbuf, PIPE_PRIM_POINTS);
+ stage->point( stage, prim );
+}
+
+
+static void
+vbuf_flush_indices( struct vbuf_stage *vbuf )
+{
+ if(!vbuf->nr_indices)
+ return;
+
+ assert((uint) (vbuf->vertex_ptr - vbuf->vertices) ==
+ vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned));
+
+ switch(vbuf->prim) {
+ case PIPE_PRIM_POINTS:
+ break;
+ case PIPE_PRIM_LINES:
+ assert(vbuf->nr_indices % 2 == 0);
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ assert(vbuf->nr_indices % 3 == 0);
+ break;
+ default:
+ assert(0);
+ }
+
+ vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices);
+
+ vbuf->nr_indices = 0;
+
+ /* don't need to reset point/line/tri functions */
+#if 0
+ stage->point = vbuf_first_point;
+ stage->line = vbuf_first_line;
+ stage->tri = vbuf_first_tri;
+#endif
+}
+
+
+/**
+ * Flush existing vertex buffer and allocate a new one.
+ *
+ * XXX: We separate flush-on-index-full and flush-on-vb-full, but may
+ * raise issues uploading vertices if the hardware wants to flush when
+ * we flush.
+ */
+static void
+vbuf_flush_vertices( struct vbuf_stage *vbuf )
+{
+ if(vbuf->vertices) {
+ vbuf_flush_indices(vbuf);
+
+ /* Reset temporary vertices ids */
+ if(vbuf->nr_vertices)
+ draw_reset_vertex_ids( vbuf->stage.draw );
+
+ /* Free the vertex buffer */
+ vbuf->render->release_vertices(vbuf->render,
+ vbuf->vertices,
+ vbuf->vertex_size,
+ vbuf->nr_vertices);
+ vbuf->max_vertices = vbuf->nr_vertices = 0;
+ vbuf->vertex_ptr = vbuf->vertices = NULL;
+
+ }
+}
+
+
+static void
+vbuf_alloc_vertices( struct vbuf_stage *vbuf )
+{
+ assert(!vbuf->nr_indices);
+ assert(!vbuf->vertices);
+
+ /* Allocate a new vertex buffer */
+ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size;
+ vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render,
+ (ushort) vbuf->vertex_size,
+ (ushort) vbuf->max_vertices);
+ vbuf->vertex_ptr = vbuf->vertices;
+}
+
+
+
+static void
+vbuf_flush( struct draw_stage *stage, unsigned flags )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ vbuf_flush_indices( vbuf );
+
+ stage->point = vbuf_first_point;
+ stage->line = vbuf_first_line;
+ stage->tri = vbuf_first_tri;
+
+ if (flags & DRAW_FLUSH_BACKEND)
+ vbuf_flush_vertices( vbuf );
+}
+
+
+static void
+vbuf_reset_stipple_counter( struct draw_stage *stage )
+{
+ /* XXX: Need to do something here for hardware with linestipple.
+ */
+ (void) stage;
+}
+
+
+static void vbuf_destroy( struct draw_stage *stage )
+{
+ struct vbuf_stage *vbuf = vbuf_stage( stage );
+
+ if(vbuf->indices)
+ align_free( vbuf->indices );
+
+ if(vbuf->vf)
+ draw_vf_destroy( vbuf->vf );
+
+ if (vbuf->render)
+ vbuf->render->destroy( vbuf->render );
+
+ FREE( stage );
+}
+
+
+/**
+ * Create a new primitive vbuf/render stage.
+ */
+struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
+ struct vbuf_render *render )
+{
+ struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
+
+ if(!vbuf)
+ return NULL;
+
+ vbuf->stage.draw = draw;
+ vbuf->stage.point = vbuf_first_point;
+ vbuf->stage.line = vbuf_first_line;
+ vbuf->stage.tri = vbuf_first_tri;
+ vbuf->stage.flush = vbuf_flush;
+ vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter;
+ vbuf->stage.destroy = vbuf_destroy;
+
+ vbuf->render = render;
+
+ assert(render->max_indices < UNDEFINED_VERTEX_ID);
+ vbuf->max_indices = render->max_indices;
+ vbuf->indices = (ushort *)
+ align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 );
+ if(!vbuf->indices)
+ vbuf_destroy(&vbuf->stage);
+
+ vbuf->vertices = NULL;
+ vbuf->vertex_ptr = vbuf->vertices;
+
+ vbuf->prim = ~0;
+
+ if(!GETENV("GALLIUM_NOVF"))
+ vbuf->vf = draw_vf_create();
+
+ return &vbuf->stage;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Vertex buffer drawing stage.
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author José Fonseca <jrfonsec@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VBUF_H_
+#define DRAW_VBUF_H_
+
+
+#include "pipe/p_util.h"
+
+
+struct draw_context;
+struct vertex_info;
+
+
+/**
+ * Interface for hardware vertex buffer rendering.
+ */
+struct vbuf_render {
+
+ /**
+ * Driver limits. May be tuned lower to improve cache hits on
+ * index list.
+ */
+ unsigned max_indices;
+ unsigned max_vertex_buffer_bytes;
+
+ /**
+ * Get the hardware vertex format.
+ *
+ * XXX: have this in draw_context instead?
+ */
+ const struct vertex_info *(*get_vertex_info)( struct vbuf_render * );
+
+ /**
+ * Request a destination for vertices.
+ * Hardware renderers will use ttm memory, others will just malloc
+ * something.
+ */
+ void *(*allocate_vertices)( struct vbuf_render *,
+ ushort vertex_size,
+ ushort nr_vertices );
+
+ /**
+ * Notify the renderer of the current primitive when it changes.
+ * Prim is restricted to TRIANGLES, LINES and POINTS.
+ */
+ void (*set_primitive)( struct vbuf_render *, unsigned prim );
+
+ /**
+ * DrawElements, note indices are ushort:
+ */
+ void (*draw)( struct vbuf_render *,
+ const ushort *indices,
+ uint nr_indices );
+
+ /**
+ * Called when vbuf is done with this set of vertices:
+ */
+ void (*release_vertices)( struct vbuf_render *,
+ void *vertices,
+ unsigned vertex_size,
+ unsigned vertices_used );
+
+ void (*destroy)( struct vbuf_render * );
+};
+
+
+
+struct draw_stage *
+draw_vbuf_stage( struct draw_context *draw,
+ struct vbuf_render *render );
+
+
+#endif /*DRAW_VBUF_H_*/
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Functions for specifying the post-transformation vertex layout.
+ *
+ * Author:
+ * Brian Paul
+ * Keith Whitwell
+ */
+
+
+#include "draw/draw_private.h"
+#include "draw/draw_vertex.h"
+
+
+/**
+ * Compute the size of a vertex, in dwords/floats, to update the
+ * vinfo->size field.
+ */
+void
+draw_compute_vertex_size(struct vertex_info *vinfo)
+{
+ uint i;
+
+ vinfo->size = 0;
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ break;
+ case EMIT_4UB:
+ /* fall-through */
+ case EMIT_1F_PSIZE:
+ /* fall-through */
+ case EMIT_1F:
+ vinfo->size += 1;
+ break;
+ case EMIT_2F:
+ vinfo->size += 2;
+ break;
+ case EMIT_3F:
+ vinfo->size += 3;
+ break;
+ case EMIT_4F:
+ vinfo->size += 4;
+ break;
+ case EMIT_ALL:
+ /* fall-through */
+ default:
+ assert(0);
+ }
+ }
+
+ assert(vinfo->size * 4 <= MAX_VERTEX_SIZE);
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Post-transform vertex format info. The vertex_info struct is used by
+ * the draw_vbuf code to emit hardware-specific vertex layouts into hw
+ * vertex buffers.
+ *
+ * Author:
+ * Brian Paul
+ */
+
+
+#ifndef DRAW_VERTEX_H
+#define DRAW_VERTEX_H
+
+
+#include "pipe/p_state.h"
+
+
+/**
+ * Vertex attribute emit modes
+ */
+enum attrib_emit {
+ EMIT_OMIT, /**< don't emit the attribute */
+ EMIT_ALL, /**< emit whole post-xform vertex, w/ header */
+ EMIT_1F,
+ EMIT_1F_PSIZE, /**< insert constant point size */
+ EMIT_2F,
+ EMIT_3F,
+ EMIT_4F,
+ EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */
+};
+
+
+/**
+ * Attribute interpolation mode
+ */
+enum interp_mode {
+ INTERP_NONE, /**< never interpolate vertex header info */
+ INTERP_POS, /**< special case for frag position */
+ INTERP_CONSTANT,
+ INTERP_LINEAR,
+ INTERP_PERSPECTIVE
+};
+
+
+/**
+ * Information about hardware/rasterization vertex layout.
+ */
+struct vertex_info
+{
+ uint num_attribs;
+ uint hwfmt[4]; /**< hardware format info for this format */
+ enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS];
+ enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */
+ uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */
+ uint size; /**< total vertex size in dwords */
+};
+
+
+
+/**
+ * Add another attribute to the given vertex_info object.
+ * \param src_index indicates which post-transformed vertex attrib slot
+ * corresponds to this attribute.
+ * \return slot in which the attribute was added
+ */
+static INLINE uint
+draw_emit_vertex_attr(struct vertex_info *vinfo,
+ enum attrib_emit emit, enum interp_mode interp,
+ uint src_index)
+{
+ const uint n = vinfo->num_attribs;
+ assert(n < PIPE_MAX_SHADER_INPUTS);
+ vinfo->emit[n] = emit;
+ vinfo->interp_mode[n] = interp;
+ vinfo->src_index[n] = src_index;
+ vinfo->num_attribs++;
+ return n;
+}
+
+
+extern void draw_compute_vertex_size(struct vertex_info *vinfo);
+
+
+#endif /* DRAW_VERTEX_H */
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+void draw_vertex_cache_invalidate( struct draw_context *draw )
+{
+ assert(draw->pq.queue_nr == 0);
+ assert(draw->vs.queue_nr == 0);
+ assert(draw->vcache.referenced == 0);
+
+ memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx));
+}
+
+
+/**
+ * Check if vertex is in cache, otherwise add it. It won't go through
+ * VS yet, not until there is a flush operation or the VS queue fills up.
+ *
+ * Note that cache entries are basically just two pointers: the first
+ * an index into the user's vertex arrays, the second a location in
+ * the vertex shader cache for the post-transformed vertex.
+ *
+ * \return pointer to location of (post-transformed) vertex header in the cache
+ */
+static struct vertex_header *get_vertex( struct draw_context *draw,
+ unsigned i )
+{
+ unsigned slot = (i + (i>>5)) % VCACHE_SIZE;
+
+ assert(slot < 32); /* so we don't exceed the bitfield size below */
+
+ /* Cache miss?
+ */
+ if (draw->vcache.idx[slot] != i) {
+
+ /* If slot is in use, use the overflow area:
+ */
+ if (draw->vcache.referenced & (1 << slot)) {
+ slot = VCACHE_SIZE + draw->vcache.overflow++;
+ }
+
+ assert(slot < Elements(draw->vcache.idx));
+
+ draw->vcache.idx[slot] = i;
+
+ /* Add to vertex shader queue:
+ */
+ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH);
+ draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot];
+ draw->vs.queue[draw->vs.queue_nr].elt = i;
+ draw->vs.queue_nr++;
+
+ /* Need to set the vertex's edge flag here. If we're being called
+ * by do_ef_triangle(), that function needs edge flag info!
+ */
+ draw->vcache.vertex[slot]->clipmask = 0;
+ draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */
+ draw->vcache.vertex[slot]->pad = 0;
+ draw->vcache.vertex[slot]->vertex_id = UNDEFINED_VERTEX_ID;
+ }
+
+
+ /* primitive flushing may have cleared the bitfield but did not
+ * clear the idx[] array values. Set the bit now. This fixes a
+ * bug found when drawing long triangle fans.
+ */
+ draw->vcache.referenced |= (1 << slot);
+ return draw->vcache.vertex[slot];
+}
+
+
+static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw,
+ unsigned i )
+{
+ const unsigned *elts = (const unsigned *) draw->user.elts;
+ return get_vertex( draw, elts[i] );
+}
+
+
+static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw,
+ unsigned i )
+{
+ const ushort *elts = (const ushort *) draw->user.elts;
+ return get_vertex( draw, elts[i] );
+}
+
+
+static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw,
+ unsigned i )
+{
+ const ubyte *elts = (const ubyte *) draw->user.elts;
+ return get_vertex( draw, elts[i] );
+}
+
+
+void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw )
+{
+ unsigned i;
+
+ for (i = 0; i < Elements(draw->vcache.vertex); i++)
+ draw->vcache.vertex[i]->vertex_id = UNDEFINED_VERTEX_ID;
+}
+
+
+void draw_vertex_cache_unreference( struct draw_context *draw )
+{
+ draw->vcache.referenced = 0;
+ draw->vcache.overflow = 0;
+}
+
+
+int draw_vertex_cache_check_space( struct draw_context *draw,
+ unsigned nr_verts )
+{
+ if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) {
+ /* The vs queue is sized so that this can never happen:
+ */
+ assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH);
+ return TRUE;
+ }
+ else
+ return FALSE;
+}
+
+
+
+/**
+ * Tell the drawing context about the index/element buffer to use
+ * (ala glDrawElements)
+ * If no element buffer is to be used (i.e. glDrawArrays) then this
+ * should be called with eltSize=0 and elements=NULL.
+ *
+ * \param draw the drawing context
+ * \param eltSize size of each element (1, 2 or 4 bytes)
+ * \param elements the element buffer ptr
+ */
+void
+draw_set_mapped_element_buffer( struct draw_context *draw,
+ unsigned eltSize, void *elements )
+{
+// draw_statechange( draw );
+
+ /* choose the get_vertex() function to use */
+ switch (eltSize) {
+ case 0:
+ draw->vcache.get_vertex = get_vertex;
+ break;
+ case 1:
+ draw->vcache.get_vertex = get_ubyte_elt_vertex;
+ break;
+ case 2:
+ draw->vcache.get_vertex = get_ushort_elt_vertex;
+ break;
+ case 4:
+ draw->vcache.get_vertex = get_uint_elt_vertex;
+ break;
+ default:
+ assert(0);
+ }
+ draw->user.elts = elements;
+ draw->user.eltSize = eltSize;
+}
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+#include "draw_context.h"
+
+
+#define DRAW_DBG 0
+
+
+/**
+ * Fetch a float[4] vertex attribute from memory, doing format/type
+ * conversion as needed.
+ *
+ * This is probably needed/dupliocated elsewhere, eg format
+ * conversion, texture sampling etc.
+ */
+#define FETCH_ATTRIB( NAME, SZ, CVT ) \
+static void \
+fetch_##NAME(const void *ptr, float *attrib) \
+{ \
+ static const float defaults[4] = { 0,0,0,1 }; \
+ int i; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ attrib[i] = CVT; \
+ } \
+ \
+ for (; i < 4; i++) { \
+ attrib[i] = defaults[i]; \
+ } \
+}
+
+#define CVT_64_FLOAT (float) ((double *) ptr)[i]
+#define CVT_32_FLOAT ((float *) ptr)[i]
+
+#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
+#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
+#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
+
+#define CVT_8_SSCALED (float) ((char *) ptr)[i]
+#define CVT_16_SSCALED (float) ((short *) ptr)[i]
+#define CVT_32_SSCALED (float) ((int *) ptr)[i]
+
+#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
+#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
+#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
+
+#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
+#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
+#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
+
+FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
+FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
+
+FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
+FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
+FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
+FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
+
+FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
+FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
+
+FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
+FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
+FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
+FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
+
+FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
+FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
+
+FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
+FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
+
+FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
+FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
+
+FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
+FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
+
+FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
+FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
+
+FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
+FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
+
+FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
+FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
+
+FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
+FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
+
+FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
+FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
+
+FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
+FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
+
+FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
+//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
+
+
+
+static fetch_func get_fetch_func( enum pipe_format format )
+{
+#if 0
+ {
+ char tmp[80];
+ pf_sprint_name(tmp, format);
+ debug_printf("%s: %s\n", __FUNCTION__, tmp);
+ }
+#endif
+
+ switch (format) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return fetch_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return fetch_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return fetch_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return fetch_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return fetch_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return fetch_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return fetch_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return fetch_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return fetch_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return fetch_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return fetch_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return fetch_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return fetch_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return fetch_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return fetch_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return fetch_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return fetch_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return fetch_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return fetch_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return fetch_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return fetch_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return fetch_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return fetch_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return fetch_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return fetch_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return fetch_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return fetch_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return fetch_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return fetch_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return fetch_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return fetch_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return fetch_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return fetch_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return fetch_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return fetch_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return fetch_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return fetch_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return fetch_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return fetch_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return fetch_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return fetch_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return fetch_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return fetch_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return fetch_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return fetch_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return fetch_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return fetch_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return fetch_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return fetch_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return fetch_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return fetch_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return fetch_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return fetch_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return fetch_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return fetch_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return fetch_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return fetch_A8R8G8B8_UNORM;
+
+ case 0:
+ return NULL; /* not sure why this is needed */
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+
+static void
+transpose_4x4( float *out, const float *in )
+{
+ /* This can be achieved in 12 sse instructions, plus the final
+ * stores I guess. This is probably a bit more than that - maybe
+ * 32 or so?
+ */
+ out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12];
+ out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13];
+ out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14];
+ out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15];
+}
+
+
+
+static void fetch_xyz_rgb( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count )
+{
+ const unsigned *pitch = draw->vertex_fetch.pitch;
+ const ubyte **src = draw->vertex_fetch.src_ptr;
+ int i;
+
+ assert(count <= 4);
+
+// debug_printf("%s\n", __FUNCTION__);
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+
+ for (i = 0; i < 4; i++) {
+ {
+ const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
+ float *out = &machine->Inputs[0].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
+
+ {
+ const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
+ float *out = &machine->Inputs[1].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
+ }
+}
+
+
+
+
+static void fetch_xyz_rgb_st( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count )
+{
+ const unsigned *pitch = draw->vertex_fetch.pitch;
+ const ubyte **src = draw->vertex_fetch.src_ptr;
+ int i;
+
+ assert(count <= 4);
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+
+ for (i = 0; i < 4; i++) {
+ {
+ const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
+ float *out = &machine->Inputs[0].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
+
+ {
+ const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
+ float *out = &machine->Inputs[1].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
+
+ {
+ const float *in = (const float *)(src[2] + elts[i] * pitch[2]);
+ float *out = &machine->Inputs[2].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = 0.0f;
+ out[12] = 1.0f;
+ }
+ }
+}
+
+
+
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void generic_vertex_fetch( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count )
+{
+ unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
+ unsigned attr;
+
+ assert(count <= 4);
+
+// debug_printf("%s %d\n", __FUNCTION__, count);
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (attr = 0; attr < nr_attrs; attr++) {
+
+ const unsigned pitch = draw->vertex_fetch.pitch[attr];
+ const ubyte *src = draw->vertex_fetch.src_ptr[attr];
+ const fetch_func fetch = draw->vertex_fetch.fetch[attr];
+ unsigned i;
+ float p[4][4];
+
+
+ /* Fetch four attributes for four vertices.
+ *
+ * Could fetch directly into AOS format, but this is meant to be
+ * a prototype for an sse implementation, which would have
+ * difficulties doing that.
+ */
+ for (i = 0; i < count; i++)
+ fetch( src + elts[i] * pitch, p[i] );
+
+ /* Be nice and zero out any missing vertices:
+ */
+ for ( ; i < 4; i++)
+ p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0;
+
+ /* Transpose/swizzle into sse-friendly format. Currently
+ * assuming that all vertex shader inputs are float[4], but this
+ * isn't true -- if the vertex shader only wants tex0.xy, we
+ * could optimize for that.
+ *
+ * To do so fully without codegen would probably require an
+ * excessive number of fetch functions, but we could at least
+ * minimize the transpose step:
+ */
+ transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p );
+ }
+}
+
+
+
+void draw_update_vertex_fetch( struct draw_context *draw )
+{
+ unsigned nr_attrs, i;
+
+// debug_printf("%s\n", __FUNCTION__);
+
+ /* this may happend during context init */
+ if (!draw->vertex_shader)
+ return;
+
+ nr_attrs = draw->vertex_shader->state->num_inputs;
+
+ for (i = 0; i < nr_attrs; i++) {
+ unsigned buf = draw->vertex_element[i].vertex_buffer_index;
+ enum pipe_format format = draw->vertex_element[i].src_format;
+
+ draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
+ draw->vertex_buffer[buf].buffer_offset +
+ draw->vertex_element[i].src_offset;
+
+ draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
+ draw->vertex_fetch.fetch[i] = get_fetch_func( format );
+ }
+
+ draw->vertex_fetch.nr_attrs = nr_attrs;
+
+ draw->vertex_fetch.fetch_func = generic_vertex_fetch;
+
+ switch (nr_attrs) {
+ case 2:
+ if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
+ draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT)
+ draw->vertex_fetch.fetch_func = fetch_xyz_rgb;
+ break;
+ case 3:
+ if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
+ draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
+ draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT)
+ draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st;
+ break;
+ default:
+ break;
+ }
+
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#if defined(__i386__) || defined(__386__)
+#include "tgsi/exec/tgsi_sse2.h"
+#endif
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "x86/rtasm/x86sse.h"
+#include "llvm/gallivm.h"
+
+
+#define DBG_VS 0
+
+
+static INLINE unsigned
+compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+typedef void (XSTDCALL *codegen_function) (
+ const struct tgsi_exec_vector *input,
+ struct tgsi_exec_vector *output,
+ float (*constant)[4],
+ struct tgsi_exec_vector *temporary );
+
+
+/**
+ * Transform vertices with the current vertex program/shader
+ * Up to four vertices can be shaded at a time.
+ * \param vbuffer the input vertex data
+ * \param elts indexes of four input vertices
+ * \param count number of vertices to shade [1..4]
+ * \param vOut array of pointers to four output vertices
+ */
+static void
+run_vertex_program(struct draw_context *draw,
+ unsigned elts[4], unsigned count,
+ struct vertex_header *vOut[])
+{
+ struct tgsi_exec_machine *machine = &draw->machine;
+ unsigned int j;
+
+ ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
+ ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
+ const float *scale = draw->viewport.scale;
+ const float *trans = draw->viewport.translate;
+
+ assert(count <= 4);
+ assert(draw->vertex_shader->state->output_semantic_name[0]
+ == TGSI_SEMANTIC_POSITION);
+
+ /* Consts does not require 16 byte alignment. */
+ machine->Consts = (float (*)[4]) draw->user.constants;
+
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
+
+ draw->vertex_fetch.fetch_func( draw, machine, elts, count );
+
+ /* run shader */
+#ifdef MESA_LLVM
+ if (1) {
+ struct gallivm_prog *prog = draw->vertex_shader->llvm_prog;
+ gallivm_cpu_vs_exec(prog,
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps);
+ } else
+#elif defined(__i386__) || defined(__386__)
+ if (draw->use_sse) {
+ /* SSE */
+ /* cast away const */
+ struct draw_vertex_shader *shader
+ = (struct draw_vertex_shader *)draw->vertex_shader;
+ codegen_function func
+ = (codegen_function) x86_get_func( &shader->sse2_program );
+
+ if (func)
+ func(
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps );
+ else
+ /* interpreter */
+ tgsi_exec_machine_run( machine );
+ }
+ else
+#endif
+ {
+ /* interpreter */
+ tgsi_exec_machine_run( machine );
+ }
+
+ /* store machine results */
+ for (j = 0; j < count; j++) {
+ unsigned slot;
+ float x, y, z, w;
+
+ /* Handle attr[0] (position) specially:
+ *
+ * XXX: Computing the clipmask should be done in the vertex
+ * program as a set of DP4 instructions appended to the
+ * user-provided code.
+ */
+ x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
+ vOut[j]->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ vOut[j]->data[0][0] = x * scale[0] + trans[0];
+ vOut[j]->data[0][1] = y * scale[1] + trans[1];
+ vOut[j]->data[0][2] = z * scale[2] + trans[2];
+ vOut[j]->data[0][3] = w;
+
+#if DBG_VS
+ debug_printf("output[%d]win: %f %f %f %f\n", j,
+ vOut[j]->data[0][0],
+ vOut[j]->data[0][1],
+ vOut[j]->data[0][2],
+ vOut[j]->data[0][3]);
+#endif
+ /* Remaining attributes are packed into sequential post-transform
+ * vertex attrib slots.
+ */
+ for (slot = 1; slot < draw->num_vs_outputs; slot++) {
+ vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+#if DBG_VS
+ debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot,
+ vOut[j]->data[slot][0],
+ vOut[j]->data[slot][1],
+ vOut[j]->data[slot][2],
+ vOut[j]->data[slot][3]);
+#endif
+ }
+ } /* loop over vertices */
+}
+
+
+/**
+ * Run the vertex shader on all vertices in the vertex queue.
+ * Called by the draw module when the vertx cache needs to be flushed.
+ */
+void
+draw_vertex_shader_queue_flush(struct draw_context *draw)
+{
+ unsigned i;
+
+ assert(draw->vs.queue_nr != 0);
+
+ /* XXX: do this on statechange:
+ */
+ draw_update_vertex_fetch( draw );
+
+// fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
+
+ /* run vertex shader on vertex cache entries, four per invokation */
+ for (i = 0; i < draw->vs.queue_nr; i += 4) {
+ struct vertex_header *dests[4];
+ unsigned elts[4];
+ int j, n = MIN2(4, draw->vs.queue_nr - i);
+
+ for (j = 0; j < n; j++) {
+ elts[j] = draw->vs.queue[i + j].elt;
+ dests[j] = draw->vs.queue[i + j].dest;
+ }
+
+ for ( ; j < 4; j++) {
+ elts[j] = elts[0];
+ dests[j] = dests[0];
+ }
+
+ assert(n > 0);
+ assert(n <= 4);
+
+ run_vertex_program(draw, elts, n, dests);
+ }
+
+ draw->vs.queue_nr = 0;
+}
+
+
+struct draw_vertex_shader *
+draw_create_vertex_shader(struct draw_context *draw,
+ const struct pipe_shader_state *shader)
+{
+ struct draw_vertex_shader *vs;
+
+ vs = CALLOC_STRUCT( draw_vertex_shader );
+ if (vs == NULL) {
+ return NULL;
+ }
+
+ vs->state = shader;
+
+#ifdef MESA_LLVM
+ struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
+ gallivm_ir_set_layout(ir, GALLIVM_SOA);
+ gallivm_ir_set_components(ir, 4);
+ gallivm_ir_fill_from_tgsi(ir, shader->tokens);
+ vs->llvm_prog = gallivm_ir_compile(ir);
+ gallivm_ir_delete(ir);
+
+ draw->engine = gallivm_global_cpu_engine();
+ if (!draw->engine) {
+ draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
+ }
+ else {
+ gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
+ }
+#elif defined(__i386__) || defined(__386__)
+ if (draw->use_sse) {
+ /* cast-away const */
+ struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
+
+ x86_init_func( &vs->sse2_program );
+ if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens,
+ &vs->sse2_program )) {
+ x86_release_func( (struct x86_function *) &vs->sse2_program );
+ fprintf(stdout /*err*/,
+ "tgsi_emit_sse2() failed, falling back to interpreter\n");
+ }
+ }
+#endif
+
+ return vs;
+}
+
+
+void
+draw_bind_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs)
+{
+ draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
+
+ draw->vertex_shader = dvs;
+ draw->num_vs_outputs = dvs->state->num_outputs;
+
+ /* specify the vertex program to interpret/execute */
+ tgsi_exec_machine_init(&draw->machine,
+ draw->vertex_shader->state->tokens,
+ PIPE_MAX_SAMPLERS,
+ NULL /*samplers*/ );
+}
+
+
+void
+draw_delete_vertex_shader(struct draw_context *draw,
+ struct draw_vertex_shader *dvs)
+{
+#if defined(__i386__) || defined(__386__)
+ x86_release_func( (struct x86_function *) &dvs->sse2_program );
+#endif
+
+ FREE( dvs );
+}
--- /dev/null
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include <stddef.h>
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_util.h"
+
+#include "draw_vf.h"
+
+
+#define DRAW_VF_DBG 0
+
+
+/* TODO: remove this */
+extern void
+_mesa_exec_free( void *addr );
+
+
+static boolean match_fastpath( struct draw_vertex_fetch *vf,
+ const struct draw_vf_fastpath *fp)
+{
+ unsigned j;
+
+ if (vf->attr_count != fp->attr_count)
+ return FALSE;
+
+ for (j = 0; j < vf->attr_count; j++)
+ if (vf->attr[j].format != fp->attr[j].format ||
+ vf->attr[j].inputsize != fp->attr[j].size ||
+ vf->attr[j].vertoffset != fp->attr[j].offset)
+ return FALSE;
+
+ if (fp->match_strides) {
+ if (vf->vertex_stride != fp->vertex_stride)
+ return FALSE;
+
+ for (j = 0; j < vf->attr_count; j++)
+ if (vf->attr[j].inputstride != fp->attr[j].stride)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean search_fastpath_emit( struct draw_vertex_fetch *vf )
+{
+ struct draw_vf_fastpath *fp = vf->fastpath;
+
+ for ( ; fp ; fp = fp->next) {
+ if (match_fastpath(vf, fp)) {
+ vf->emit = fp->func;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void draw_vf_register_fastpath( struct draw_vertex_fetch *vf,
+ boolean match_strides )
+{
+ struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath);
+ unsigned i;
+
+ fastpath->vertex_stride = vf->vertex_stride;
+ fastpath->attr_count = vf->attr_count;
+ fastpath->match_strides = match_strides;
+ fastpath->func = vf->emit;
+ fastpath->attr = (struct draw_vf_attr_type *)
+ MALLOC(vf->attr_count * sizeof(fastpath->attr[0]));
+
+ for (i = 0; i < vf->attr_count; i++) {
+ fastpath->attr[i].format = vf->attr[i].format;
+ fastpath->attr[i].stride = vf->attr[i].inputstride;
+ fastpath->attr[i].size = vf->attr[i].inputsize;
+ fastpath->attr[i].offset = vf->attr[i].vertoffset;
+ }
+
+ fastpath->next = vf->fastpath;
+ vf->fastpath = fastpath;
+}
+
+
+
+
+/***********************************************************************
+ * Build codegen functions or return generic ones:
+ */
+static void choose_emit_func( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *dest)
+{
+ vf->emit = NULL;
+
+ /* Does this match an existing (hardwired, codegen or known-bad)
+ * fastpath?
+ */
+ if (search_fastpath_emit(vf)) {
+ /* Use this result. If it is null, then it is already known
+ * that the current state will fail for codegen and there is no
+ * point trying again.
+ */
+ }
+ else if (vf->codegen_emit) {
+ vf->codegen_emit( vf );
+ }
+
+ if (!vf->emit) {
+ draw_vf_generate_hardwired_emit(vf);
+ }
+
+ /* Otherwise use the generic version:
+ */
+ if (!vf->emit)
+ vf->emit = draw_vf_generic_emit;
+
+ vf->emit( vf, count, dest );
+}
+
+
+
+
+
+/***********************************************************************
+ * Public entrypoints, mostly dispatch to the above:
+ */
+
+
+
+static unsigned
+draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
+ const struct draw_vf_attr_map *map,
+ unsigned nr,
+ unsigned vertex_stride )
+{
+ unsigned offset = 0;
+ unsigned i, j;
+
+ assert(nr < PIPE_ATTRIB_MAX);
+
+ for (j = 0, i = 0; i < nr; i++) {
+ const unsigned format = map[i].format;
+ if (format == DRAW_EMIT_PAD) {
+#if (DRAW_VF_DBG)
+ debug_printf("%d: pad %d, offset %d\n", i,
+ map[i].offset, offset);
+#endif
+
+ offset += map[i].offset;
+
+ }
+ else {
+ vf->attr[j].attrib = map[i].attrib;
+ vf->attr[j].format = format;
+ vf->attr[j].insert = draw_vf_format_info[format].insert;
+ vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize;
+ vf->attr[j].vertoffset = offset;
+ vf->attr[j].isconst = draw_vf_format_info[format].isconst;
+ if(vf->attr[j].isconst)
+ memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize);
+
+#if (DRAW_VF_DBG)
+ debug_printf("%d: %s, offset %d\n", i,
+ draw_vf_format_info[format].name,
+ vf->attr[j].vertoffset);
+#endif
+
+ offset += draw_vf_format_info[format].attrsize;
+ j++;
+ }
+ }
+
+ vf->attr_count = j;
+ vf->vertex_stride = vertex_stride ? vertex_stride : offset;
+ vf->emit = choose_emit_func;
+
+ assert(vf->vertex_stride >= offset);
+ return vf->vertex_stride;
+}
+
+
+void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
+ const struct vertex_info *vinfo,
+ float point_size )
+{
+ unsigned i, j, k;
+ struct draw_vf_attr *a = vf->attr;
+ struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS];
+ unsigned count = 0; /* for debug/sanity */
+ unsigned nr_attrs = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ /* no-op */
+ break;
+ case EMIT_ALL: {
+ /* just copy the whole vertex as-is to the vbuf */
+ unsigned s = vinfo->size;
+ assert(i == 0);
+ assert(j == 0);
+ /* copy the vertex header */
+ /* XXX: we actually don't copy the header, just pad it */
+ attrs[nr_attrs].attrib = 0;
+ attrs[nr_attrs].format = DRAW_EMIT_PAD;
+ attrs[nr_attrs].offset = offsetof(struct vertex_header, data);
+ s -= offsetof(struct vertex_header, data)/4;
+ count += offsetof(struct vertex_header, data)/4;
+ nr_attrs++;
+ /* copy the vertex data */
+ for(k = 0; k < (s & ~0x3); k += 4) {
+ attrs[nr_attrs].attrib = k/4;
+ attrs[nr_attrs].format = DRAW_EMIT_4F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 4;
+ }
+ /* tail */
+ /* XXX: actually, this shouldn't be needed */
+ attrs[nr_attrs].attrib = k/4;
+ attrs[nr_attrs].offset = 0;
+ switch(s & 0x3) {
+ case 0:
+ break;
+ case 1:
+ attrs[nr_attrs].format = DRAW_EMIT_1F;
+ nr_attrs++;
+ count += 1;
+ break;
+ case 2:
+ attrs[nr_attrs].format = DRAW_EMIT_2F;
+ nr_attrs++;
+ count += 2;
+ break;
+ case 3:
+ attrs[nr_attrs].format = DRAW_EMIT_3F;
+ nr_attrs++;
+ count += 3;
+ break;
+ }
+ break;
+ }
+ case EMIT_1F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_1F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count++;
+ break;
+ case EMIT_1F_PSIZE:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_1F_CONST;
+ attrs[nr_attrs].offset = 0;
+ attrs[nr_attrs].data.f[0] = point_size;
+ nr_attrs++;
+ count++;
+ break;
+ case EMIT_2F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_2F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 2;
+ break;
+ case EMIT_3F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_3F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 3;
+ break;
+ case EMIT_4F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_4F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 4;
+ break;
+ case EMIT_4UB:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ assert(count == vinfo->size);
+
+ draw_vf_set_vertex_attributes(vf,
+ attrs,
+ nr_attrs,
+ vinfo->size * sizeof(float) );
+
+ for (j = 0; j < vf->attr_count; j++) {
+ a[j].inputsize = 4;
+ a[j].do_insert = a[j].insert[4 - 1];
+ if(a[j].isconst) {
+ a[j].inputptr = a[j].data;
+ a[j].inputstride = 0;
+ }
+ }
+}
+
+
+#if 0
+/* Set attribute pointers, adjusted for start position:
+ */
+void draw_vf_set_sources( struct draw_vertex_fetch *vf,
+ GLvector4f * const sources[],
+ unsigned start )
+{
+ struct draw_vf_attr *a = vf->attr;
+ unsigned j;
+
+ for (j = 0; j < vf->attr_count; j++) {
+ const GLvector4f *vptr = sources[a[j].attrib];
+
+ if ((a[j].inputstride != vptr->stride) ||
+ (a[j].inputsize != vptr->size))
+ vf->emit = choose_emit_func;
+
+ a[j].inputstride = vptr->stride;
+ a[j].inputsize = vptr->size;
+ a[j].do_insert = a[j].insert[vptr->size - 1];
+ a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride;
+ }
+}
+#endif
+
+
+/**
+ * Emit a vertex to dest.
+ */
+void draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
+ struct vertex_header *vertex,
+ void *dest )
+{
+ struct draw_vf_attr *a = vf->attr;
+ unsigned j;
+
+ for (j = 0; j < vf->attr_count; j++) {
+ if (!a[j].isconst) {
+ a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0];
+ a[j].inputstride = 0; /* XXX: one-vertex-max ATM */
+ }
+ }
+
+ vf->emit( vf, 1, (uint8_t*) dest );
+}
+
+
+
+struct draw_vertex_fetch *draw_vf_create( void )
+{
+ struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch);
+ unsigned i;
+
+ for (i = 0; i < PIPE_ATTRIB_MAX; i++)
+ vf->attr[i].vf = vf;
+
+ vf->identity[0] = 0.0;
+ vf->identity[1] = 0.0;
+ vf->identity[2] = 0.0;
+ vf->identity[3] = 1.0;
+
+ vf->codegen_emit = NULL;
+
+#ifdef USE_SSE_ASM
+ if (!GETENV("GALLIUM_NO_CODEGEN"))
+ vf->codegen_emit = draw_vf_generate_sse_emit;
+#endif
+
+ return vf;
+}
+
+
+void draw_vf_destroy( struct draw_vertex_fetch *vf )
+{
+ struct draw_vf_fastpath *fp, *tmp;
+
+ for (fp = vf->fastpath ; fp ; fp = tmp) {
+ tmp = fp->next;
+ FREE(fp->attr);
+
+ /* KW: At the moment, fp->func is constrained to be allocated by
+ * _mesa_exec_alloc(), as the hardwired fastpaths in
+ * t_vertex_generic.c are handled specially. It would be nice
+ * to unify them, but this probably won't change until this
+ * module gets another overhaul.
+ */
+ //_mesa_exec_free((void *) fp->func);
+ FREE(fp);
+ }
+
+ vf->fastpath = NULL;
+ FREE(vf);
+}
--- /dev/null
+/*
+ * Copyright 2008 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * Vertex fetch/store/convert code. This functionality is used in two places:
+ * 1. Vertex fetch/convert - to grab vertex data from incoming vertex
+ * arrays and convert to format needed by vertex shaders.
+ * 2. Vertex store/emit - to convert simple float[][4] vertex attributes
+ * (which is the organization used throughout the draw/prim pipeline) to
+ * hardware-specific formats and emit into hardware vertex buffers.
+ *
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VF_H
+#define DRAW_VF_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "draw_vertex.h"
+#include "draw_private.h" /* for vertex_header */
+
+
+enum draw_vf_attr_format {
+ DRAW_EMIT_1F,
+ DRAW_EMIT_2F,
+ DRAW_EMIT_3F,
+ DRAW_EMIT_4F,
+ DRAW_EMIT_3F_XYW, /**< for projective texture */
+ DRAW_EMIT_1UB_1F, /**< for fog coordinate */
+ DRAW_EMIT_3UB_3F_RGB, /**< for specular color */
+ DRAW_EMIT_3UB_3F_BGR, /**< for specular color */
+ DRAW_EMIT_4UB_4F_RGBA, /**< for color */
+ DRAW_EMIT_4UB_4F_BGRA, /**< for color */
+ DRAW_EMIT_4UB_4F_ARGB, /**< for color */
+ DRAW_EMIT_4UB_4F_ABGR, /**< for color */
+ DRAW_EMIT_1F_CONST,
+ DRAW_EMIT_2F_CONST,
+ DRAW_EMIT_3F_CONST,
+ DRAW_EMIT_4F_CONST,
+ DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */
+ DRAW_EMIT_MAX
+};
+
+struct draw_vf_attr_map
+{
+ /** Input attribute number */
+ unsigned attrib;
+
+ enum draw_vf_attr_format format;
+
+ unsigned offset;
+
+ /**
+ * Constant data for DRAW_EMIT_*_CONST
+ */
+ union {
+ uint8_t ub[4];
+ float f[4];
+ } data;
+};
+
+struct draw_vertex_fetch;
+
+
+
+#if 0
+unsigned
+draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
+ const struct draw_vf_attr_map *map,
+ unsigned nr,
+ unsigned vertex_stride );
+#endif
+
+void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
+ const struct vertex_info *vinfo,
+ float point_size );
+
+#if 0
+void
+draw_vf_set_sources( struct draw_vertex_fetch *vf,
+ GLvector4f * const attrib[],
+ unsigned start );
+#endif
+
+void
+draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
+ struct vertex_header *vertex,
+ void *dest );
+
+struct draw_vertex_fetch *
+draw_vf_create( void );
+
+void
+draw_vf_destroy( struct draw_vertex_fetch *vf );
+
+
+
+/***********************************************************************
+ * Internal functions and structs:
+ */
+
+struct draw_vf_attr;
+
+typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a,
+ float *out,
+ const uint8_t *v );
+
+typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a,
+ uint8_t *v,
+ const float *in );
+
+typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *dest );
+
+
+
+/**
+ * Describes how to convert/move a vertex attribute from a vertex
+ * array to a vertex structure.
+ */
+struct draw_vf_attr
+{
+ struct draw_vertex_fetch *vf;
+
+ unsigned format;
+ unsigned inputsize;
+ unsigned inputstride;
+ unsigned vertoffset; /**< position of the attrib in the vertex struct */
+
+ boolean isconst; /**< read from const data below */
+ uint8_t data[16];
+
+ unsigned attrib; /**< which vertex attrib (0=position, etc) */
+ unsigned vertattrsize; /**< size of the attribute in bytes */
+
+ uint8_t *inputptr;
+ const draw_vf_insert_func *insert;
+ draw_vf_insert_func do_insert;
+ draw_vf_extract_func extract;
+};
+
+struct draw_vertex_fetch
+{
+ struct draw_vf_attr attr[PIPE_ATTRIB_MAX];
+ unsigned attr_count;
+ unsigned vertex_stride;
+
+ draw_vf_emit_func emit;
+
+ /* Parameters and constants for codegen:
+ */
+ float identity[4];
+
+ struct draw_vf_fastpath *fastpath;
+
+ void (*codegen_emit)( struct draw_vertex_fetch *vf );
+};
+
+
+struct draw_vf_attr_type {
+ unsigned format;
+ unsigned size;
+ unsigned stride;
+ unsigned offset;
+};
+
+/** XXX this could be moved into draw_vf.c */
+struct draw_vf_fastpath {
+ unsigned vertex_stride;
+ unsigned attr_count;
+ boolean match_strides;
+
+ struct draw_vf_attr_type *attr;
+
+ draw_vf_emit_func func;
+ struct draw_vf_fastpath *next;
+};
+
+
+void
+draw_vf_register_fastpath( struct draw_vertex_fetch *vtx,
+ boolean match_strides );
+
+void
+draw_vf_generic_emit( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *v );
+
+void
+draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf );
+
+void
+draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf );
+
+
+/** XXX this type and function could probably be moved into draw_vf.c */
+struct draw_vf_format_info {
+ const char *name;
+ draw_vf_insert_func insert[4];
+ const unsigned attrsize;
+ const boolean isconst;
+};
+
+extern const struct draw_vf_format_info
+draw_vf_format_info[DRAW_EMIT_MAX];
+
+
+#endif
--- /dev/null
+
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+
+#include "draw_vf.h"
+
+
+
+static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = 1;
+}
+
+static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = 0;
+ out[3] = 1;
+}
+
+static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = 0;
+ out[2] = 0;
+ out[3] = 1;
+}
+
+static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[3];
+}
+
+static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ (void) a; (void) v; (void) in;
+ assert(0);
+}
+
+static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+}
+
+static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = 0;
+}
+
+static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = 0;
+ out[2] = 0;
+}
+
+
+static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = 0;
+}
+
+static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+}
+
+static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ (void) a; (void) v; (void) in;
+}
+
+static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
+}
+
+static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[2] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ v[1] = 0;
+ v[2] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
+}
+
+static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[0] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ v[1] = 0;
+ v[0] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
+}
+
+static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ v[3] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ v[2] = 0x00;
+ v[3] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
+}
+
+static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ v[1] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ v[2] = 0x00;
+ v[1] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
+}
+
+static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[2] = 0;
+}
+
+static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ v[1] = 0;
+ v[2] = 0;
+}
+
+static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
+}
+
+static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[0] = 0;
+}
+
+static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ v[1] = 0;
+ v[0] = 0;
+}
+
+
+static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+}
+
+
+const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] =
+{
+ { "1f",
+ { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
+ sizeof(float), FALSE },
+
+ { "2f",
+ { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
+ 2 * sizeof(float), FALSE },
+
+ { "3f",
+ { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
+ 3 * sizeof(float), FALSE },
+
+ { "4f",
+ { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
+ 4 * sizeof(float), FALSE },
+
+ { "3f_xyw",
+ { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err,
+ insert_3f_xyw_4 },
+ 3 * sizeof(float), FALSE },
+
+ { "1ub_1f",
+ { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 },
+ sizeof(uint8_t), FALSE },
+
+ { "3ub_3f_rgb",
+ { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3,
+ insert_3ub_3f_rgb_3 },
+ 3 * sizeof(uint8_t), FALSE },
+
+ { "3ub_3f_bgr",
+ { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3,
+ insert_3ub_3f_bgr_3 },
+ 3 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_rgba",
+ { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3,
+ insert_4ub_4f_rgba_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_bgra",
+ { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3,
+ insert_4ub_4f_bgra_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_argb",
+ { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3,
+ insert_4ub_4f_argb_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_abgr",
+ { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3,
+ insert_4ub_4f_abgr_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "1f_const",
+ { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
+ sizeof(float), TRUE },
+
+ { "2f_const",
+ { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
+ 2 * sizeof(float), TRUE },
+
+ { "3f_const",
+ { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
+ 3 * sizeof(float), TRUE },
+
+ { "4f_const",
+ { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
+ 4 * sizeof(float), TRUE },
+
+ { "pad",
+ { NULL, NULL, NULL, NULL },
+ 0, FALSE },
+
+};
+
+
+
+
+/***********************************************************************
+ * Hardwired fastpaths for emitting whole vertices or groups of
+ * vertices
+ */
+#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \
+static void NAME( struct draw_vertex_fetch *vf, \
+ unsigned count, \
+ uint8_t *v ) \
+{ \
+ struct draw_vf_attr *a = vf->attr; \
+ unsigned i; \
+ \
+ for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \
+ if (NR > 0) { \
+ F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \
+ a[0].inputptr += a[0].inputstride; \
+ } \
+ \
+ if (NR > 1) { \
+ F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \
+ a[1].inputptr += a[1].inputstride; \
+ } \
+ \
+ if (NR > 2) { \
+ F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \
+ a[2].inputptr += a[2].inputstride; \
+ } \
+ \
+ if (NR > 3) { \
+ F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \
+ a[3].inputptr += a[3].inputstride; \
+ } \
+ \
+ if (NR > 4) { \
+ F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \
+ a[4].inputptr += a[4].inputstride; \
+ } \
+ } \
+}
+
+
+#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \
+ insert_null, insert_null, NAME)
+
+#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \
+ insert_null, NAME)
+
+#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \
+ insert_null, NAME)
+
+
+EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4)
+
+EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2)
+
+EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2)
+
+
+/* Use the codegen paths to select one of a number of hardwired
+ * fastpaths.
+ */
+void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf )
+{
+ draw_vf_emit_func func = NULL;
+
+ /* Does it fit a hardwired fastpath? Help! this is growing out of
+ * control!
+ */
+ switch (vf->attr_count) {
+ case 2:
+ if (vf->attr[0].do_insert == insert_3f_3 &&
+ vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
+ func = emit_xyz3_rgba4;
+ }
+ break;
+ case 3:
+ if (vf->attr[2].do_insert == insert_2f_2) {
+ if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
+ if (vf->attr[0].do_insert == insert_4f_4)
+ func = emit_xyzw4_rgba4_st2;
+ }
+ }
+ break;
+ case 4:
+ if (vf->attr[2].do_insert == insert_2f_2 &&
+ vf->attr[3].do_insert == insert_2f_2) {
+ if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
+ if (vf->attr[0].do_insert == insert_4f_4)
+ func = emit_xyzw4_rgba4_st2_st2;
+ }
+ }
+ break;
+ }
+
+ vf->emit = func;
+}
+
+/***********************************************************************
+ * Generic (non-codegen) functions for whole vertices or groups of
+ * vertices
+ */
+
+void draw_vf_generic_emit( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *v )
+{
+ struct draw_vf_attr *a = vf->attr;
+ const unsigned attr_count = vf->attr_count;
+ const unsigned stride = vf->vertex_stride;
+ unsigned i, j;
+
+ for (i = 0 ; i < count ; i++, v += stride) {
+ for (j = 0; j < attr_count; j++) {
+ float *in = (float *)a[j].inputptr;
+ a[j].inputptr += a[j].inputstride;
+ a[j].do_insert( &a[j], v + a[j].vertoffset, in );
+ }
+ }
+}
+
+
--- /dev/null
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include "simple_list.h"
+
+#include "pipe/p_compiler.h"
+
+#include "draw_vf.h"
+
+
+#if defined(USE_SSE_ASM)
+
+#include "x86/rtasm/x86sse.h"
+#include "x86/common_x86_asm.h"
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+struct x86_program {
+ struct x86_function func;
+
+ struct draw_vertex_fetch *vf;
+ boolean inputs_safe;
+ boolean outputs_safe;
+ boolean have_sse2;
+
+ struct x86_reg identity;
+ struct x86_reg chan0;
+};
+
+
+static struct x86_reg get_identity( struct x86_program *p )
+{
+ return p->identity;
+}
+
+static void emit_load4f_4( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movups(&p->func, dest, arg0);
+}
+
+static void emit_load4f_3( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1
+ * 0 0 c 1
+ * a b c 1
+ */
+ sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
+ sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
+ sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) );
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_load4f_2( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Initialize from identity, then pull in low two words:
+ */
+ sse_movups(&p->func, dest, get_identity(p));
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_load4f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Pull in low word, then swizzle in identity */
+ sse_movss(&p->func, dest, arg0);
+ sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
+}
+
+
+
+static void emit_load3f_3( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Over-reads by 1 dword - potential SEGV if input is a vertex
+ * array.
+ */
+ if (p->inputs_safe) {
+ sse_movups(&p->func, dest, arg0);
+ }
+ else {
+ /* c 0 0 0
+ * c c c c
+ * a b c c
+ */
+ sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
+ sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));
+ sse_movlps(&p->func, dest, arg0);
+ }
+}
+
+static void emit_load3f_2( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ emit_load4f_2(p, dest, arg0);
+}
+
+static void emit_load3f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ emit_load4f_1(p, dest, arg0);
+}
+
+static void emit_load2f_2( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_load2f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ emit_load4f_1(p, dest, arg0);
+}
+
+static void emit_load1f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movss(&p->func, dest, arg0);
+}
+
+static void (*load[4][4])( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 ) = {
+ { emit_load1f_1,
+ emit_load1f_1,
+ emit_load1f_1,
+ emit_load1f_1 },
+
+ { emit_load2f_1,
+ emit_load2f_2,
+ emit_load2f_2,
+ emit_load2f_2 },
+
+ { emit_load3f_1,
+ emit_load3f_2,
+ emit_load3f_3,
+ emit_load3f_3 },
+
+ { emit_load4f_1,
+ emit_load4f_2,
+ emit_load4f_3,
+ emit_load4f_4 }
+};
+
+static void emit_load( struct x86_program *p,
+ struct x86_reg dest,
+ unsigned sz,
+ struct x86_reg src,
+ unsigned src_sz)
+{
+ load[sz-1][src_sz-1](p, dest, src);
+}
+
+static void emit_store4f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movups(&p->func, dest, arg0);
+}
+
+static void emit_store3f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ if (p->outputs_safe) {
+ /* Emit the extra dword anyway. This may hurt writecombining,
+ * may cause other problems.
+ */
+ sse_movups(&p->func, dest, arg0);
+ }
+ else {
+ /* Alternate strategy - emit two, shuffle, emit one.
+ */
+ sse_movlps(&p->func, dest, arg0);
+ sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(&p->func, x86_make_disp(dest,8), arg0);
+ }
+}
+
+static void emit_store2f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_store1f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movss(&p->func, dest, arg0);
+}
+
+
+static void (*store[4])( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 ) =
+{
+ emit_store1f,
+ emit_store2f,
+ emit_store3f,
+ emit_store4f
+};
+
+static void emit_store( struct x86_program *p,
+ struct x86_reg dest,
+ unsigned sz,
+ struct x86_reg temp )
+
+{
+ store[sz-1](p, dest, temp);
+}
+
+static void emit_pack_store_4ub( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg temp )
+{
+ /* Scale by 255.0
+ */
+ sse_mulps(&p->func, temp, p->chan0);
+
+ if (p->have_sse2) {
+ sse2_cvtps2dq(&p->func, temp, temp);
+ sse2_packssdw(&p->func, temp, temp);
+ sse2_packuswb(&p->func, temp, temp);
+ sse_movss(&p->func, dest, temp);
+ }
+ else {
+ struct x86_reg mmx0 = x86_make_reg(file_MMX, 0);
+ struct x86_reg mmx1 = x86_make_reg(file_MMX, 1);
+ sse_cvtps2pi(&p->func, mmx0, temp);
+ sse_movhlps(&p->func, temp, temp);
+ sse_cvtps2pi(&p->func, mmx1, temp);
+ mmx_packssdw(&p->func, mmx0, mmx1);
+ mmx_packuswb(&p->func, mmx0, mmx0);
+ mmx_movd(&p->func, dest, mmx0);
+ }
+}
+
+static int get_offset( const void *a, const void *b )
+{
+ return (const char *)b - (const char *)a;
+}
+
+/* Not much happens here. Eventually use this function to try and
+ * avoid saving/reloading the source pointers each vertex (if some of
+ * them can fit in registers).
+ */
+static void get_src_ptr( struct x86_program *p,
+ struct x86_reg srcREG,
+ struct x86_reg vfREG,
+ struct draw_vf_attr *a )
+{
+ struct draw_vertex_fetch *vf = p->vf;
+ struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
+
+ /* Load current a[j].inputptr
+ */
+ x86_mov(&p->func, srcREG, ptr_to_src);
+}
+
+static void update_src_ptr( struct x86_program *p,
+ struct x86_reg srcREG,
+ struct x86_reg vfREG,
+ struct draw_vf_attr *a )
+{
+ if (a->inputstride) {
+ struct draw_vertex_fetch *vf = p->vf;
+ struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
+
+ /* add a[j].inputstride (hardcoded value - could just as easily
+ * pull the stride value from memory each time).
+ */
+ x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));
+
+ /* save new value of a[j].inputptr
+ */
+ x86_mov(&p->func, ptr_to_src, srcREG);
+ }
+}
+
+
+/* Lots of hardcoding
+ *
+ * EAX -- pointer to current output vertex
+ * ECX -- pointer to current attribute
+ *
+ */
+static boolean build_vertex_emit( struct x86_program *p )
+{
+ struct draw_vertex_fetch *vf = p->vf;
+ unsigned j = 0;
+
+ struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX);
+ struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX);
+ struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
+ struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI);
+ struct x86_reg temp = x86_make_reg(file_XMM, 0);
+ uint8_t *fixup, *label;
+
+ /* Push a few regs?
+ */
+ x86_push(&p->func, countEBP);
+ x86_push(&p->func, vfESI);
+
+
+ /* Get vertex count, compare to zero
+ */
+ x86_xor(&p->func, srcECX, srcECX);
+ x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2));
+ x86_cmp(&p->func, countEBP, srcECX);
+ fixup = x86_jcc_forward(&p->func, cc_E);
+
+ /* Initialize destination register.
+ */
+ x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3));
+
+ /* Move argument 1 (vf) into a reg:
+ */
+ x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1));
+
+
+ /* always load, needed or not:
+ */
+ sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0])));
+
+ /* Note address for loop jump */
+ label = x86_get_label(&p->func);
+
+ /* Emit code for each of the attributes. Currently routes
+ * everything through SSE registers, even when it might be more
+ * efficient to stick with regular old x86. No optimization or
+ * other tricks - enough new ground to cover here just getting
+ * things working.
+ */
+ while (j < vf->attr_count) {
+ struct draw_vf_attr *a = &vf->attr[j];
+ struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset);
+
+ /* Now, load an XMM reg from src, perhaps transform, then save.
+ * Could be shortcircuited in specific cases:
+ */
+ switch (a->format) {
+ case DRAW_EMIT_1F:
+ case DRAW_EMIT_1F_CONST:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 1, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_2F:
+ case DRAW_EMIT_2F_CONST:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 2, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_3F:
+ case DRAW_EMIT_3F_CONST:
+ /* Potentially the worst case - hardcode 2+1 copying:
+ */
+ if (0) {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 3, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ else {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 2, temp);
+ if (a->inputsize > 2) {
+ emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1);
+ emit_store(p, x86_make_disp(dest,8), 1, temp);
+ }
+ else {
+ sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p));
+ }
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ break;
+ case DRAW_EMIT_4F:
+ case DRAW_EMIT_4F_CONST:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 4, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_3F_XYW:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z));
+ emit_store(p, dest, 3, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+
+ case DRAW_EMIT_1UB_1F:
+ /* Test for PAD3 + 1UB:
+ */
+ if (j > 0 &&
+ a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3)
+ {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X));
+ emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ else {
+ debug_printf("Can't emit 1ub %x %x %d\n",
+ a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize );
+ return FALSE;
+ }
+ break;
+ case DRAW_EMIT_3UB_3F_RGB:
+ case DRAW_EMIT_3UB_3F_BGR:
+ /* Test for 3UB + PAD1:
+ */
+ if (j == vf->attr_count - 1 ||
+ a[1].vertoffset >= a->vertoffset + 4) {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
+ if (a->format == DRAW_EMIT_3UB_3F_BGR)
+ sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ /* Test for 3UB + 1UB:
+ */
+ else if (j < vf->attr_count - 1 &&
+ a[1].format == DRAW_EMIT_1UB_1F &&
+ a[1].vertoffset == a->vertoffset + 3) {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
+ update_src_ptr(p, srcECX, vfESI, a);
+
+ /* Make room for incoming value:
+ */
+ sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
+
+ get_src_ptr(p, srcECX, vfESI, &a[1]);
+ emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize);
+ update_src_ptr(p, srcECX, vfESI, &a[1]);
+
+ /* Rearrange and possibly do BGR conversion:
+ */
+ if (a->format == DRAW_EMIT_3UB_3F_BGR)
+ sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
+ else
+ sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X));
+
+ emit_pack_store_4ub(p, dest, temp);
+ j++; /* NOTE: two attrs consumed */
+ }
+ else {
+ debug_printf("Can't emit 3ub\n");
+ }
+ return FALSE; /* add this later */
+ break;
+
+ case DRAW_EMIT_4UB_4F_RGBA:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_4UB_4F_BGRA:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_4UB_4F_ARGB:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_4UB_4F_ABGR:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ default:
+ debug_printf("unknown a[%d].format %d\n", j, a->format);
+ return FALSE; /* catch any new opcodes */
+ }
+
+ /* Increment j by at least 1 - may have been incremented above also:
+ */
+ j++;
+ }
+
+ /* Next vertex:
+ */
+ x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride));
+
+ /* decr count, loop if not zero
+ */
+ x86_dec(&p->func, countEBP);
+ x86_test(&p->func, countEBP, countEBP);
+ x86_jcc(&p->func, cc_NZ, label);
+
+ /* Exit mmx state?
+ */
+ if (p->func.need_emms)
+ mmx_emms(&p->func);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(&p->func, fixup);
+
+ /* Pop regs and return
+ */
+ x86_pop(&p->func, x86_get_base_reg(vfESI));
+ x86_pop(&p->func, countEBP);
+ x86_ret(&p->func);
+
+ vf->emit = (draw_vf_emit_func)x86_get_func(&p->func);
+ return TRUE;
+}
+
+
+
+void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
+{
+ struct x86_program p;
+
+ if (!cpu_has_xmm) {
+ vf->codegen_emit = NULL;
+ return;
+ }
+
+ memset(&p, 0, sizeof(p));
+
+ p.vf = vf;
+ p.inputs_safe = 0; /* for now */
+ p.outputs_safe = 1; /* for now */
+ p.have_sse2 = cpu_has_xmm2;
+ p.identity = x86_make_reg(file_XMM, 6);
+ p.chan0 = x86_make_reg(file_XMM, 7);
+
+ x86_init_func(&p.func);
+
+ if (build_vertex_emit(&p)) {
+ draw_vf_register_fastpath( vf, TRUE );
+ }
+ else {
+ /* Note the failure so that we don't keep trying to codegen an
+ * impossible state:
+ */
+ draw_vf_register_fastpath( vf, FALSE );
+ x86_release_func(&p.func);
+ }
+}
+
+#else
+
+void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
+{
+ /* Dummy version for when USE_SSE_ASM not defined */
+}
+
+#endif
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* Authors: Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "pipe/p_util.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_shader_tokens.h"
+#include "draw_private.h"
+
+
+struct wide_stage {
+ struct draw_stage stage;
+
+ float half_line_width;
+ float half_point_size;
+
+ uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS];
+ uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS];
+ uint num_texcoords;
+
+ int psize_slot;
+};
+
+
+
+static INLINE struct wide_stage *wide_stage( struct draw_stage *stage )
+{
+ return (struct wide_stage *)stage;
+}
+
+
+static void passthrough_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->point( stage->next, header );
+}
+
+static void passthrough_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->line(stage->next, header);
+}
+
+static void passthrough_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ stage->next->tri(stage->next, header);
+}
+
+
+/**
+ * Draw a wide line by drawing a quad (two triangles).
+ * XXX need to disable polygon stipple.
+ */
+static void wide_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ const struct wide_stage *wide = wide_stage(stage);
+ const float half_width = wide->half_line_width;
+
+ struct prim_header tri;
+
+ struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
+ struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
+ struct vertex_header *v2 = dup_vert(stage, header->v[1], 2);
+ struct vertex_header *v3 = dup_vert(stage, header->v[1], 3);
+
+ float *pos0 = v0->data[0];
+ float *pos1 = v1->data[0];
+ float *pos2 = v2->data[0];
+ float *pos3 = v3->data[0];
+
+ const float dx = FABSF(pos0[0] - pos2[0]);
+ const float dy = FABSF(pos0[1] - pos2[1]);
+
+ /*
+ * Draw wide line as a quad (two tris) by "stretching" the line along
+ * X or Y.
+ * We need to tweak coords in several ways to be conformant here.
+ */
+
+ if (dx > dy) {
+ /* x-major line */
+ pos0[1] = pos0[1] - half_width - 0.25f;
+ pos1[1] = pos1[1] + half_width - 0.25f;
+ pos2[1] = pos2[1] - half_width - 0.25f;
+ pos3[1] = pos3[1] + half_width - 0.25f;
+ if (pos0[0] < pos2[0]) {
+ /* left to right line */
+ pos0[0] -= 0.5f;
+ pos1[0] -= 0.5f;
+ pos2[0] -= 0.5f;
+ pos3[0] -= 0.5f;
+ }
+ else {
+ /* right to left line */
+ pos0[0] += 0.5f;
+ pos1[0] += 0.5f;
+ pos2[0] += 0.5f;
+ pos3[0] += 0.5f;
+ }
+ }
+ else {
+ /* y-major line */
+ pos0[0] = pos0[0] - half_width + 0.25f;
+ pos1[0] = pos1[0] + half_width + 0.25f;
+ pos2[0] = pos2[0] - half_width + 0.25f;
+ pos3[0] = pos3[0] + half_width + 0.25f;
+ if (pos0[1] < pos2[1]) {
+ /* top to bottom line */
+ pos0[1] -= 0.5f;
+ pos1[1] -= 0.5f;
+ pos2[1] -= 0.5f;
+ pos3[1] -= 0.5f;
+ }
+ else {
+ /* bottom to top line */
+ pos0[1] += 0.5f;
+ pos1[1] += 0.5f;
+ pos2[1] += 0.5f;
+ pos3[1] += 0.5f;
+ }
+ }
+
+ tri.det = header->det; /* only the sign matters */
+ tri.v[0] = v0;
+ tri.v[1] = v2;
+ tri.v[2] = v3;
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v0;
+ tri.v[1] = v3;
+ tri.v[2] = v1;
+ stage->next->tri( stage->next, &tri );
+}
+
+
+/**
+ * Draw a wide line by drawing a quad, using geometry which will
+ * fullfill GL's antialiased line requirements.
+ */
+static void wide_line_aa(struct draw_stage *stage,
+ struct prim_header *header)
+{
+ const struct wide_stage *wide = wide_stage(stage);
+ const float half_width = wide->half_line_width;
+ struct prim_header tri;
+ struct vertex_header *v[4];
+ float *pos;
+ float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0];
+ float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1];
+ const float len = (float) sqrt(dx * dx + dy * dy);
+ uint i;
+
+ dx = dx * half_width / len;
+ dy = dy * half_width / len;
+
+ /* allocate/dup new verts */
+ for (i = 0; i < 4; i++) {
+ v[i] = dup_vert(stage, header->v[i/2], i);
+ }
+
+ /*
+ * Quad for line from v0 to v1:
+ *
+ * 1 3
+ * +-------------------------+
+ * | |
+ * *v0 v1*
+ * | |
+ * +-------------------------+
+ * 0 2
+ */
+
+ pos = v[0]->data[0];
+ pos[0] += dy;
+ pos[1] -= dx;
+
+ pos = v[1]->data[0];
+ pos[0] -= dy;
+ pos[1] += dx;
+
+ pos = v[2]->data[0];
+ pos[0] += dy;
+ pos[1] -= dx;
+
+ pos = v[3]->data[0];
+ pos[0] -= dy;
+ pos[1] += dx;
+
+ tri.det = header->det; /* only the sign matters */
+
+ tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0];
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2];
+ stage->next->tri( stage->next, &tri );
+
+}
+
+
+/**
+ * Set the vertex texcoords for sprite mode.
+ * Coords may be left untouched or set to a right-side-up or upside-down
+ * orientation.
+ */
+static void set_texcoords(const struct wide_stage *wide,
+ struct vertex_header *v, const float tc[4])
+{
+ uint i;
+ for (i = 0; i < wide->num_texcoords; i++) {
+ if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) {
+ uint j = wide->texcoord_slot[i];
+ v->data[j][0] = tc[0];
+ if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT)
+ v->data[j][1] = 1.0f - tc[1];
+ else
+ v->data[j][1] = tc[1];
+ v->data[j][2] = tc[2];
+ v->data[j][3] = tc[3];
+ }
+ }
+}
+
+
+/* If there are lots of sprite points (and why wouldn't there be?) it
+ * would probably be more sensible to change hardware setup to
+ * optimize this rather than doing the whole thing in software like
+ * this.
+ */
+static void wide_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ const struct wide_stage *wide = wide_stage(stage);
+ const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
+ float half_size;
+ float left_adj, right_adj;
+
+ struct prim_header tri;
+
+ /* four dups of original vertex */
+ struct vertex_header *v0 = dup_vert(stage, header->v[0], 0);
+ struct vertex_header *v1 = dup_vert(stage, header->v[0], 1);
+ struct vertex_header *v2 = dup_vert(stage, header->v[0], 2);
+ struct vertex_header *v3 = dup_vert(stage, header->v[0], 3);
+
+ float *pos0 = v0->data[0];
+ float *pos1 = v1->data[0];
+ float *pos2 = v2->data[0];
+ float *pos3 = v3->data[0];
+
+ /* point size is either per-vertex or fixed size */
+ if (wide->psize_slot >= 0) {
+ half_size = 0.5f * header->v[0]->data[wide->psize_slot][0];
+ }
+ else {
+ half_size = wide->half_point_size;
+ }
+
+ left_adj = -half_size + 0.25f;
+ right_adj = half_size + 0.25f;
+
+ pos0[0] += left_adj;
+ pos0[1] -= half_size;
+
+ pos1[0] += left_adj;
+ pos1[1] += half_size;
+
+ pos2[0] += right_adj;
+ pos2[1] -= half_size;
+
+ pos3[0] += right_adj;
+ pos3[1] += half_size;
+
+ if (sprite) {
+ static const float tex00[4] = { 0, 0, 0, 1 };
+ static const float tex01[4] = { 0, 1, 0, 1 };
+ static const float tex11[4] = { 1, 1, 0, 1 };
+ static const float tex10[4] = { 1, 0, 0, 1 };
+ set_texcoords( wide, v0, tex00 );
+ set_texcoords( wide, v1, tex01 );
+ set_texcoords( wide, v2, tex10 );
+ set_texcoords( wide, v3, tex11 );
+ }
+
+ tri.det = header->det; /* only the sign matters */
+ tri.v[0] = v0;
+ tri.v[1] = v2;
+ tri.v[2] = v3;
+ stage->next->tri( stage->next, &tri );
+
+ tri.v[0] = v0;
+ tri.v[1] = v3;
+ tri.v[2] = v1;
+ stage->next->tri( stage->next, &tri );
+}
+
+
+static void wide_first_point( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct wide_stage *wide = wide_stage(stage);
+ struct draw_context *draw = stage->draw;
+
+ wide->half_point_size = 0.5f * draw->rasterizer->point_size;
+
+ if (draw->rasterizer->point_size != 1.0) {
+ stage->point = wide_point;
+ }
+ else {
+ stage->point = passthrough_point;
+ }
+
+ if (draw->rasterizer->point_sprite) {
+ /* find vertex shader texcoord outputs */
+ const struct draw_vertex_shader *vs = draw->vertex_shader;
+ uint i, j = 0;
+ for (i = 0; i < vs->state->num_outputs; i++) {
+ if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) {
+ wide->texcoord_slot[j] = i;
+ wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j];
+ j++;
+ }
+ }
+ wide->num_texcoords = j;
+ }
+
+ wide->psize_slot = -1;
+
+ if (draw->rasterizer->point_size_per_vertex) {
+ /* find PSIZ vertex output */
+ const struct draw_vertex_shader *vs = draw->vertex_shader;
+ uint i;
+ for (i = 0; i < vs->state->num_outputs; i++) {
+ if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
+ wide->psize_slot = i;
+ break;
+ }
+ }
+ }
+
+ stage->point( stage, header );
+}
+
+
+
+static void wide_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ struct wide_stage *wide = wide_stage(stage);
+ struct draw_context *draw = stage->draw;
+
+ wide->half_line_width = 0.5f * draw->rasterizer->line_width;
+
+ if (draw->rasterizer->line_width != 1.0) {
+ if (draw->rasterizer->line_smooth)
+ wide->stage.line = wide_line_aa;
+ else
+ wide->stage.line = wide_line;
+ }
+ else {
+ wide->stage.line = passthrough_line;
+ }
+
+ stage->line( stage, header );
+}
+
+
+static void wide_flush( struct draw_stage *stage, unsigned flags )
+{
+ stage->line = wide_first_line;
+ stage->point = wide_first_point;
+ stage->next->flush( stage->next, flags );
+}
+
+
+static void wide_reset_stipple_counter( struct draw_stage *stage )
+{
+ stage->next->reset_stipple_counter( stage->next );
+}
+
+
+static void wide_destroy( struct draw_stage *stage )
+{
+ draw_free_temp_verts( stage );
+ FREE( stage );
+}
+
+
+struct draw_stage *draw_wide_stage( struct draw_context *draw )
+{
+ struct wide_stage *wide = CALLOC_STRUCT(wide_stage);
+
+ draw_alloc_temp_verts( &wide->stage, 4 );
+
+ wide->stage.draw = draw;
+ wide->stage.next = NULL;
+ wide->stage.point = wide_first_point;
+ wide->stage.line = wide_first_line;
+ wide->stage.tri = passthrough_tri;
+ wide->stage.flush = wide_flush;
+ wide->stage.reset_stipple_counter = wide_reset_stipple_counter;
+ wide->stage.destroy = wide_destroy;
+
+ return &wide->stage;
+}
--- /dev/null
+# -*-makefile-*-
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = gallivm
+
+
+GALLIVM_SOURCES = \
+ gallivm.cpp \
+ gallivm_cpu.cpp \
+ instructions.cpp \
+ loweringpass.cpp \
+ tgsitollvm.cpp \
+ storage.cpp \
+ storagesoa.cpp \
+ instructionssoa.cpp
+
+INC_SOURCES = gallivm_builtins.cpp
+
+CPP_SOURCES = \
+ $(GALLIVM_SOURCES)
+
+C_SOURCES =
+ASM_SOURCES =
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+### Include directories
+INCLUDES = \
+ -I. \
+ -I$(TOP)/src/gallium/drivers
+ -I$(TOP)/src/gallium/aux \
+ -I$(TOP)/src/gallium/include \
+ -I$(TOP)/src/mesa \
+ -I$(TOP)/include
+
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+
+##### TARGETS #####
+
+default:: depend symlinks $(LIBNAME)
+
+
+$(LIBNAME): $(OBJECTS) Makefile
+ $(TOP)/bin/mklib -o $@ -static $(OBJECTS)
+
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \
+ $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null
+
+
+gallivm_builtins.cpp: llvm_builtins.c
+ clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins
+
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` `find ../include`
+
+
+# Remove .o and backup files
+clean:
+ -rm -f *.o */*.o *~ *.so *~ server/*.o
+ -rm -f depend depend.bak
+ -rm -f gallivm_builtins.cpp
+
+symlinks:
+
+
+include depend
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "instructions.h"
+#include "loweringpass.h"
+#include "storage.h"
+#include "tgsitollvm.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/util/tgsi_dump.h"
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/ParameterAttributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+static int GLOBAL_ID = 0;
+
+using namespace llvm;
+
+static inline
+void AddStandardCompilePasses(PassManager &PM)
+{
+ PM.add(new LoweringPass());
+ PM.add(createVerifierPass()); // Verify that input is correct
+
+ PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp
+
+ //PM.add(createStripSymbolsPass(true));
+
+ PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst
+ PM.add(createCFGSimplificationPass()); // Clean up disgusting code
+ PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas
+ PM.add(createGlobalOptimizerPass()); // Optimize out global vars
+ PM.add(createGlobalDCEPass()); // Remove unused fns and globs
+ PM.add(createIPConstantPropagationPass());// IP Constant Propagation
+ PM.add(createDeadArgEliminationPass()); // Dead argument elimination
+ PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
+ PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
+
+ PM.add(createPruneEHPass()); // Remove dead EH info
+
+ PM.add(createFunctionInliningPass()); // Inline small functions
+ PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
+
+ PM.add(createTailDuplicationPass()); // Simplify cfg by copying code
+ PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
+ PM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas
+ PM.add(createInstructionCombiningPass()); // Combine silly seq's
+ PM.add(createCondPropagationPass()); // Propagate conditionals
+
+ PM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ PM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ PM.add(createReassociatePass()); // Reassociate expressions
+ PM.add(createLoopRotatePass());
+ PM.add(createLICMPass()); // Hoist loop invariants
+ PM.add(createLoopUnswitchPass()); // Unswitch loops.
+ PM.add(createLoopIndexSplitPass()); // Index split loops.
+ PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc
+ PM.add(createIndVarSimplifyPass()); // Canonicalize indvars
+ PM.add(createLoopUnrollPass()); // Unroll small loops
+ PM.add(createInstructionCombiningPass()); // Clean up after the unroller
+ PM.add(createGVNPass()); // Remove redundancies
+ PM.add(createSCCPPass()); // Constant prop with SCCP
+
+ // Run instcombine after redundancy elimination to exploit opportunities
+ // opened up by them.
+ PM.add(createInstructionCombiningPass());
+ PM.add(createCondPropagationPass()); // Propagate conditionals
+
+ PM.add(createDeadStoreEliminationPass()); // Delete dead stores
+ PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE'
+ PM.add(createCFGSimplificationPass()); // Merge & remove BBs
+ PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations
+ PM.add(createDeadTypeEliminationPass()); // Eliminate dead types
+ PM.add(createConstantMergePass()); // Merge dup global constants
+}
+
+void gallivm_prog_delete(struct gallivm_prog *prog)
+{
+ delete prog->module;
+ prog->module = 0;
+ prog->function = 0;
+ free(prog);
+}
+
+static inline void
+constant_interpolation(float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coefs,
+ unsigned attrib,
+ unsigned chan)
+{
+ unsigned i;
+
+ for (i = 0; i < QUAD_SIZE; ++i) {
+ inputs[i][attrib][chan] = coefs[attrib].a0[chan];
+ }
+}
+
+static inline void
+linear_interpolation(float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coefs,
+ unsigned attrib,
+ unsigned chan)
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ const float x = inputs[i][0][0];
+ const float y = inputs[i][0][1];
+
+ inputs[i][attrib][chan] =
+ coefs[attrib].a0[chan] +
+ coefs[attrib].dadx[chan] * x +
+ coefs[attrib].dady[chan] * y;
+ }
+}
+
+static inline void
+perspective_interpolation(float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coefs,
+ unsigned attrib,
+ unsigned chan )
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ const float x = inputs[i][0][0];
+ const float y = inputs[i][0][1];
+ /* WPOS.w here is really 1/w */
+ const float w = 1.0f / inputs[i][0][3];
+ assert(inputs[i][0][3] != 0.0);
+
+ inputs[i][attrib][chan] =
+ (coefs[attrib].a0[chan] +
+ coefs[attrib].dadx[chan] * x +
+ coefs[attrib].dady[chan] * y) * w;
+ }
+}
+
+void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix)
+{
+ if (!ir || !ir->module)
+ return;
+
+ if (file_prefix) {
+ std::ostringstream stream;
+ stream << file_prefix;
+ stream << ir->id;
+ stream << ".ll";
+ std::string name = stream.str();
+ std::ofstream out(name.c_str());
+ if (!out) {
+ std::cerr<<"Can't open file : "<<stream.str()<<std::endl;;
+ return;
+ }
+ out << (*ir->module);
+ out.close();
+ } else {
+ const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList();
+ llvm::Module::FunctionListType::const_iterator itr;
+ std::cout<<"; ---------- Start shader "<<ir->id<<std::endl;
+ for (itr = funcs.begin(); itr != funcs.end(); ++itr) {
+ const llvm::Function &func = (*itr);
+ std::string name = func.getName();
+ const llvm::Function *found = 0;
+ if (name.find("vs_shader") != std::string::npos ||
+ name.find("fs_shader") != std::string::npos ||
+ name.find("function") != std::string::npos)
+ found = &func;
+ if (found) {
+ std::cout<<*found<<std::endl;
+ }
+ }
+ std::cout<<"; ---------- End shader "<<ir->id<<std::endl;
+ }
+}
+
+
+void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
+ float (*inputs)[16][4],
+ const struct tgsi_interp_coef *coef)
+{
+ for (int i = 0; i < prog->num_interp; ++i) {
+ const gallivm_interpolate &interp = prog->interpolators[i];
+ switch (interp.type) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ constant_interpolation(inputs, coef, interp.attrib, interp.chan);
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ linear_interpolation(inputs, coef, interp.attrib, interp.chan);
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ perspective_interpolation(inputs, coef, interp.attrib, interp.chan);
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+}
+
+
+struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type)
+{
+ struct gallivm_ir *ir =
+ (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir));
+ ++GLOBAL_ID;
+ ir->id = GLOBAL_ID;
+ ir->type = type;
+
+ return ir;
+}
+
+void gallivm_ir_set_layout(struct gallivm_ir *ir,
+ enum gallivm_vector_layout layout)
+{
+ ir->layout = layout;
+}
+
+void gallivm_ir_set_components(struct gallivm_ir *ir, int num)
+{
+ ir->num_components = num;
+}
+
+void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens)
+{
+ std::cout << "Creating llvm from: " <<std::endl;
+ tgsi_dump(tokens, 0);
+
+
+ llvm::Module *mod = tgsi_to_llvmir(ir, tokens);
+
+ //llvm::Module *mod = tgsi_to_llvm(ir, tokens);
+ ir->module = mod;
+ gallivm_ir_dump(ir, 0);
+}
+
+void gallivm_ir_delete(struct gallivm_ir *ir)
+{
+ delete ir->module;
+ free(ir);
+}
+
+struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
+{
+ struct gallivm_prog *prog =
+ (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
+ llvm::Module *mod = llvm::CloneModule(ir->module);
+ prog->num_consts = ir->num_consts;
+ memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
+ prog->num_interp = ir->num_interp;
+
+ /* Run optimization passes over it */
+ PassManager passes;
+ passes.add(new TargetData(mod));
+ AddStandardCompilePasses(passes);
+ passes.run(*mod);
+ prog->module = mod;
+
+ std::cout << "After optimizations:"<<std::endl;
+ mod->dump();
+
+ return prog;
+}
+
+#endif /* MESA_LLVM */
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+
+#ifndef GALLIVM_H
+#define GALLIVM_H
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#include "pipe/p_state.h"
+
+#ifdef MESA_LLVM
+
+struct tgsi_token;
+
+struct gallivm_ir;
+struct gallivm_prog;
+struct gallivm_cpu_engine;
+struct tgsi_interp_coef;
+struct tgsi_sampler;
+struct tgsi_exec_vector;
+
+enum gallivm_shader_type {
+ GALLIVM_VS,
+ GALLIVM_FS
+};
+
+enum gallivm_vector_layout {
+ GALLIVM_AOS,
+ GALLIVM_SOA
+};
+
+struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type);
+void gallivm_ir_set_layout(struct gallivm_ir *ir,
+ enum gallivm_vector_layout layout);
+void gallivm_ir_set_components(struct gallivm_ir *ir, int num);
+void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens);
+void gallivm_ir_delete(struct gallivm_ir *ir);
+
+
+struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir);
+
+void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
+ float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
+ const struct tgsi_interp_coef *coefs);
+void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix);
+
+
+struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog);
+struct gallivm_cpu_engine *gallivm_global_cpu_engine();
+int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
+ struct tgsi_exec_vector *inputs,
+ struct tgsi_exec_vector *dests,
+ float (*consts)[4],
+ struct tgsi_exec_vector *temps);
+int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
+ float x, float y,
+ float (*dests)[PIPE_MAX_SHADER_INPUTS][4],
+ float (*inputs)[PIPE_MAX_SHADER_INPUTS][4],
+ float (*consts)[4],
+ struct tgsi_sampler *samplers);
+void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog);
+void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee);
+
+
+#endif /* MESA_LLVM */
+
+#if defined __cplusplus
+} // extern "C"
+#endif
+
+#endif
--- /dev/null
+// Generated by llvm2cpp - DO NOT MODIFY!
+
+
+Module* createGallivmBuiltins(Module *mod) {
+
+mod->setModuleIdentifier("shader");
+
+// Type Definitions
+ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25);
+
+PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0);
+
+std::vector<const Type*>FuncTy_2_args;
+FuncTy_2_args.push_back(Type::FloatTy);
+FuncTy_2_args.push_back(Type::FloatTy);
+FunctionType* FuncTy_2 = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/FuncTy_2_args,
+ /*isVarArg=*/false);
+
+PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0);
+
+VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4);
+
+std::vector<const Type*>FuncTy_5_args;
+FuncTy_5_args.push_back(VectorTy_4);
+FunctionType* FuncTy_5 = FunctionType::get(
+ /*Result=*/VectorTy_4,
+ /*Params=*/FuncTy_5_args,
+ /*isVarArg=*/false);
+
+std::vector<const Type*>FuncTy_6_args;
+FuncTy_6_args.push_back(VectorTy_4);
+FuncTy_6_args.push_back(VectorTy_4);
+FuncTy_6_args.push_back(VectorTy_4);
+FunctionType* FuncTy_6 = FunctionType::get(
+ /*Result=*/VectorTy_4,
+ /*Params=*/FuncTy_6_args,
+ /*isVarArg=*/false);
+
+VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4);
+
+std::vector<const Type*>FuncTy_9_args;
+FunctionType* FuncTy_9 = FunctionType::get(
+ /*Result=*/IntegerType::get(32),
+ /*Params=*/FuncTy_9_args,
+ /*isVarArg=*/true);
+
+PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0);
+
+PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0);
+
+std::vector<const Type*>FuncTy_12_args;
+FuncTy_12_args.push_back(Type::FloatTy);
+FunctionType* FuncTy_12 = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/FuncTy_12_args,
+ /*isVarArg=*/false);
+
+PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0);
+
+std::vector<const Type*>FuncTy_13_args;
+FuncTy_13_args.push_back(VectorTy_4);
+FunctionType* FuncTy_13 = FunctionType::get(
+ /*Result=*/IntegerType::get(32),
+ /*Params=*/FuncTy_13_args,
+ /*isVarArg=*/false);
+
+
+// Function Declarations
+
+Function* func_approx = new Function(
+ /*Type=*/FuncTy_2,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"approx", mod);
+func_approx->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_approx_PAL = 0;
+func_approx->setParamAttrs(func_approx_PAL);
+
+Function* func_powf = new Function(
+ /*Type=*/FuncTy_2,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"powf", mod); // (external, no body)
+func_powf->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_powf_PAL = 0;
+func_powf->setParamAttrs(func_powf_PAL);
+
+Function* func_lit = new Function(
+ /*Type=*/FuncTy_5,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"lit", mod);
+func_lit->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_lit_PAL = 0;
+func_lit->setParamAttrs(func_lit_PAL);
+
+Function* func_cmp = new Function(
+ /*Type=*/FuncTy_6,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"cmp", mod);
+func_cmp->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_cmp_PAL = 0;
+{
+ ParamAttrsVector Attrs;
+ ParamAttrsWithIndex PAWI;
+ PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
+ Attrs.push_back(PAWI);
+ func_cmp_PAL = ParamAttrsList::get(Attrs);
+
+}
+func_cmp->setParamAttrs(func_cmp_PAL);
+
+Function* func_vcos = new Function(
+ /*Type=*/FuncTy_5,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"vcos", mod);
+func_vcos->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_vcos_PAL = 0;
+func_vcos->setParamAttrs(func_vcos_PAL);
+
+Function* func_printf = new Function(
+ /*Type=*/FuncTy_9,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"printf", mod); // (external, no body)
+func_printf->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_printf_PAL = 0;
+func_printf->setParamAttrs(func_printf_PAL);
+
+Function* func_cosf = new Function(
+ /*Type=*/FuncTy_12,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"cosf", mod); // (external, no body)
+func_cosf->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_cosf_PAL = 0;
+func_cosf->setParamAttrs(func_cosf_PAL);
+
+Function* func_scs = new Function(
+ /*Type=*/FuncTy_5,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"scs", mod);
+func_scs->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_scs_PAL = 0;
+func_scs->setParamAttrs(func_scs_PAL);
+
+Function* func_sinf = new Function(
+ /*Type=*/FuncTy_12,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"sinf", mod); // (external, no body)
+func_sinf->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_sinf_PAL = 0;
+func_sinf->setParamAttrs(func_sinf_PAL);
+
+Function* func_vsin = new Function(
+ /*Type=*/FuncTy_5,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"vsin", mod);
+func_vsin->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_vsin_PAL = 0;
+func_vsin->setParamAttrs(func_vsin_PAL);
+
+Function* func_kilp = new Function(
+ /*Type=*/FuncTy_13,
+ /*Linkage=*/GlobalValue::WeakLinkage,
+ /*Name=*/"kilp", mod);
+func_kilp->setCallingConv(CallingConv::C);
+const ParamAttrsList *func_kilp_PAL = 0;
+{
+ ParamAttrsVector Attrs;
+ ParamAttrsWithIndex PAWI;
+ PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind;
+ Attrs.push_back(PAWI);
+ func_kilp_PAL = ParamAttrsList::get(Attrs);
+
+}
+func_kilp->setParamAttrs(func_kilp_PAL);
+
+// Global Variable Declarations
+
+
+GlobalVariable* gvar_array__str = new GlobalVariable(
+/*Type=*/ArrayTy_0,
+/*isConstant=*/true,
+/*Linkage=*/GlobalValue::InternalLinkage,
+/*Initializer=*/0, // has initializer, specified below
+/*Name=*/".str",
+mod);
+
+GlobalVariable* gvar_array__str1 = new GlobalVariable(
+/*Type=*/ArrayTy_0,
+/*isConstant=*/true,
+/*Linkage=*/GlobalValue::InternalLinkage,
+/*Initializer=*/0, // has initializer, specified below
+/*Name=*/".str1",
+mod);
+
+// Constant Definitions
+Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true);
+Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true);
+ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f));
+ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f));
+Constant* const_float_18 = Constant::getNullValue(Type::FloatTy);
+Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32));
+std::vector<Constant*> const_packed_20_elems;
+ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+const_packed_20_elems.push_back(const_float_21);
+UndefValue* const_float_22 = UndefValue::get(Type::FloatTy);
+const_packed_20_elems.push_back(const_float_22);
+const_packed_20_elems.push_back(const_float_22);
+const_packed_20_elems.push_back(const_float_21);
+Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems);
+ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10));
+ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10));
+ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10));
+std::vector<Constant*> const_packed_26_elems;
+const_packed_26_elems.push_back(const_float_21);
+const_packed_26_elems.push_back(const_float_18);
+const_packed_26_elems.push_back(const_float_18);
+const_packed_26_elems.push_back(const_float_21);
+Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems);
+Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy);
+std::vector<Constant*> const_packed_28_elems;
+const_packed_28_elems.push_back(const_int32_19);
+ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10));
+const_packed_28_elems.push_back(const_int32_29);
+const_packed_28_elems.push_back(const_int32_25);
+const_packed_28_elems.push_back(const_int32_24);
+Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems);
+std::vector<Constant*> const_packed_30_elems;
+const_packed_30_elems.push_back(const_int32_19);
+const_packed_30_elems.push_back(const_int32_23);
+ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10));
+const_packed_30_elems.push_back(const_int32_31);
+const_packed_30_elems.push_back(const_int32_24);
+Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems);
+std::vector<Constant*> const_packed_32_elems;
+const_packed_32_elems.push_back(const_int32_19);
+const_packed_32_elems.push_back(const_int32_23);
+const_packed_32_elems.push_back(const_int32_25);
+ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10));
+const_packed_32_elems.push_back(const_int32_33);
+Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems);
+std::vector<Constant*> const_ptr_34_indices;
+const_ptr_34_indices.push_back(const_int32_19);
+const_ptr_34_indices.push_back(const_int32_19);
+Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() );
+UndefValue* const_packed_35 = UndefValue::get(VectorTy_4);
+std::vector<Constant*> const_ptr_36_indices;
+const_ptr_36_indices.push_back(const_int32_19);
+const_ptr_36_indices.push_back(const_int32_19);
+Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() );
+
+// Global Variable Definitions
+gvar_array__str->setInitializer(const_array_14);
+gvar_array__str1->setInitializer(const_array_15);
+
+// Function Definitions
+
+// Function: approx (func_approx)
+{
+ Function::arg_iterator args = func_approx->arg_begin();
+ Value* float_a = args++;
+ float_a->setName("a");
+ Value* float_b = args++;
+ float_b->setName("b");
+
+ BasicBlock* label_entry = new BasicBlock("entry",func_approx,0);
+
+ // Block entry (label_entry)
+ FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry);
+ SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry);
+ FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry);
+ SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry);
+ FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry);
+ SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry);
+ std::vector<Value*> float_call_params;
+ float_call_params.push_back(float_a_addr_0);
+ float_call_params.push_back(float_b_addr_1);
+ CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry);
+ float_call->setCallingConv(CallingConv::C);
+ float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0;
+ float_call->setParamAttrs(float_call_PAL);
+
+ new ReturnInst(float_call, label_entry);
+
+}
+
+// Function: lit (func_lit)
+{
+ Function::arg_iterator args = func_lit->arg_begin();
+ Value* packed_tmp = args++;
+ packed_tmp->setName("tmp");
+
+ BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0);
+ BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0);
+ BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0);
+
+ // Block entry (label_entry_38)
+ ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38);
+ FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38);
+ new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38);
+
+ // Block ifthen (label_ifthen)
+ InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen);
+ ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen);
+ ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen);
+ std::vector<Value*> float_call_41_params;
+ float_call_41_params.push_back(float_tmp12);
+ float_call_41_params.push_back(float_tmp14);
+ CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen);
+ float_call_41->setCallingConv(CallingConv::C);
+ float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0;
+ float_call_41->setParamAttrs(float_call_41_PAL);
+
+ InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen);
+ new ReturnInst(packed_tmp16, label_ifthen);
+
+ // Block UnifiedReturnBlock (label_UnifiedReturnBlock)
+ new ReturnInst(const_packed_26, label_UnifiedReturnBlock);
+
+}
+
+// Function: cmp (func_cmp)
+{
+ Function::arg_iterator args = func_cmp->arg_begin();
+ Value* packed_tmp0 = args++;
+ packed_tmp0->setName("tmp0");
+ Value* packed_tmp1 = args++;
+ packed_tmp1->setName("tmp1");
+ Value* packed_tmp2 = args++;
+ packed_tmp2->setName("tmp2");
+
+ BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0);
+ BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0);
+ BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0);
+ BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0);
+ BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0);
+ BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0);
+ BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0);
+
+ // Block entry (label_entry_44)
+ ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44);
+ CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44);
+ FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44);
+ ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44);
+ CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44);
+ FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44);
+ SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44);
+ new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44);
+
+ // Block cond.?14 (label_cond__14)
+ ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14);
+ ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14);
+ CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14);
+ FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14);
+ new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14);
+
+ // Block cond.cont20 (label_cond_cont20)
+ ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20);
+ ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20);
+ CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20);
+ FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20);
+ new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20);
+
+ // Block cond.?28 (label_cond__28)
+ PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28);
+ packed_tmp23_reg2mem_0->reserveOperandSpace(2);
+ packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14);
+ packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20);
+
+ ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28);
+ ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28);
+ CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28);
+ FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28);
+ new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28);
+
+ // Block cond.cont34 (label_cond_cont34)
+ PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34);
+ packed_tmp23_reg2mem_1->reserveOperandSpace(2);
+ packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14);
+ packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20);
+
+ ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34);
+ ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34);
+ CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34);
+ FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34);
+ new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34);
+
+ // Block cond.?42 (label_cond__42)
+ PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42);
+ packed_tmp37_reg2mem_0->reserveOperandSpace(2);
+ packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28);
+ packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34);
+
+ ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42);
+ new ReturnInst(packed_tmp5113, label_cond__42);
+
+ // Block cond.cont48 (label_cond_cont48)
+ PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48);
+ packed_tmp37_reg2mem_1->reserveOperandSpace(2);
+ packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28);
+ packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34);
+
+ ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48);
+ new ReturnInst(packed_tmp51, label_cond_cont48);
+
+}
+
+// Function: vcos (func_vcos)
+{
+ Function::arg_iterator args = func_vcos->arg_begin();
+ Value* packed_val = args++;
+ packed_val->setName("val");
+
+ BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0);
+
+ // Block entry (label_entry_53)
+ ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53);
+ CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53);
+ ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53);
+ CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53);
+ ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53);
+ CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53);
+ ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53);
+ CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53);
+ std::vector<Value*> int32_call_params;
+ int32_call_params.push_back(const_ptr_34);
+ int32_call_params.push_back(double_conv_54);
+ int32_call_params.push_back(double_conv4);
+ int32_call_params.push_back(double_conv7);
+ int32_call_params.push_back(double_conv10);
+ CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53);
+ int32_call->setCallingConv(CallingConv::C);
+ int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0;
+ int32_call->setParamAttrs(int32_call_PAL);
+
+ CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53);
+ float_call13->setCallingConv(CallingConv::C);
+ float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0;
+ float_call13->setParamAttrs(float_call13_PAL);
+
+ InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53);
+ CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53);
+ float_call18->setCallingConv(CallingConv::C);
+ float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0;
+ float_call18->setParamAttrs(float_call18_PAL);
+
+ InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53);
+ CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53);
+ float_call23->setCallingConv(CallingConv::C);
+ float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0;
+ float_call23->setParamAttrs(float_call23_PAL);
+
+ InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53);
+ CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53);
+ float_call28->setCallingConv(CallingConv::C);
+ float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0;
+ float_call28->setParamAttrs(float_call28_PAL);
+
+ InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53);
+ CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53);
+ CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53);
+ CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53);
+ CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53);
+ std::vector<Value*> int32_call43_params;
+ int32_call43_params.push_back(const_ptr_36);
+ int32_call43_params.push_back(double_conv33);
+ int32_call43_params.push_back(double_conv36);
+ int32_call43_params.push_back(double_conv39);
+ int32_call43_params.push_back(double_conv42);
+ CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53);
+ int32_call43->setCallingConv(CallingConv::C);
+ int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0;
+ int32_call43->setParamAttrs(int32_call43_PAL);
+
+ new ReturnInst(packed_tmp30, label_entry_53);
+
+}
+
+// Function: scs (func_scs)
+{
+ Function::arg_iterator args = func_scs->arg_begin();
+ Value* packed_val_58 = args++;
+ packed_val_58->setName("val");
+
+ BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0);
+
+ // Block entry (label_entry_59)
+ ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59);
+ CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59);
+ float_call_60->setCallingConv(CallingConv::C);
+ float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0;
+ float_call_60->setParamAttrs(float_call_60_PAL);
+
+ InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59);
+ CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59);
+ float_call7->setCallingConv(CallingConv::C);
+ float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0;
+ float_call7->setParamAttrs(float_call7_PAL);
+
+ InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59);
+ new ReturnInst(packed_tmp9, label_entry_59);
+
+}
+
+// Function: vsin (func_vsin)
+{
+ Function::arg_iterator args = func_vsin->arg_begin();
+ Value* packed_val_62 = args++;
+ packed_val_62->setName("val");
+
+ BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0);
+
+ // Block entry (label_entry_63)
+ ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63);
+ CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63);
+ float_call_65->setCallingConv(CallingConv::C);
+ float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0;
+ float_call_65->setParamAttrs(float_call_65_PAL);
+
+ InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63);
+ InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63);
+ InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63);
+ InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63);
+ new ReturnInst(packed_tmp15_67, label_entry_63);
+
+}
+
+// Function: kilp (func_kilp)
+{
+ Function::arg_iterator args = func_kilp->arg_begin();
+ Value* packed_val_69 = args++;
+ packed_val_69->setName("val");
+
+ BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0);
+ BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0);
+ BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0);
+ BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0);
+ BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0);
+
+ // Block entry (label_entry_70)
+ ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70);
+ FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70);
+ new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70);
+
+ // Block lor_rhs (label_lor_rhs)
+ ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs);
+ FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs);
+ new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs);
+
+ // Block lor_rhs5 (label_lor_rhs5)
+ ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5);
+ FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5);
+ new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5);
+
+ // Block lor_rhs11 (label_lor_rhs11)
+ ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11);
+ FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11);
+ CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11);
+ new ReturnInst(int32_retval, label_lor_rhs11);
+
+ // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71)
+ new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71);
+
+}
+
+return mod;
+
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "instructions.h"
+#include "loweringpass.h"
+#include "storage.h"
+#include "tgsitollvm.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/util/tgsi_dump.h"
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/ParameterAttributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+struct gallivm_cpu_engine {
+ llvm::ExecutionEngine *engine;
+};
+
+static struct gallivm_cpu_engine *CPU = 0;
+
+typedef int (*fragment_shader_runner)(float x, float y,
+ float (*dests)[16][4],
+ float (*inputs)[16][4],
+ int num_attribs,
+ float (*consts)[4], int num_consts,
+ struct tgsi_sampler *samplers);
+
+int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
+ float fx, float fy,
+ float (*dests)[16][4],
+ float (*inputs)[16][4],
+ float (*consts)[4],
+ struct tgsi_sampler *samplers)
+{
+ fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
+ assert(runner);
+
+ return runner(fx, fy, dests, inputs, prog->num_interp,
+ consts, prog->num_consts,
+ samplers);
+}
+
+static inline llvm::Function *func_for_shader(struct gallivm_prog *prog)
+{
+ llvm::Module *mod = prog->module;
+ llvm::Function *func = 0;
+
+ switch (prog->type) {
+ case GALLIVM_VS:
+ func = mod->getFunction("vs_shader");
+ break;
+ case GALLIVM_FS:
+ func = mod->getFunction("fs_shader");
+ break;
+ default:
+ assert(!"Unknown shader type!");
+ break;
+ }
+ return func;
+}
+
+/*!
+ This function creates a CPU based execution engine for the given gallivm_prog.
+ gallivm_cpu_engine should be used as a singleton throughout the library. Before
+ executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
+ The gallivm_prog instance which is being passed to the constructor is being
+ automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
+ with it again.
+ */
+struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
+{
+ struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *)
+ calloc(1, sizeof(struct gallivm_cpu_engine));
+ llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
+ llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
+ llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
+ ee->DisableLazyCompilation();
+ cpu->engine = ee;
+
+ llvm::Function *func = func_for_shader(prog);
+
+ prog->function = ee->getPointerToFunction(func);
+ CPU = cpu;
+ return cpu;
+}
+
+
+/*!
+ This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
+ The reference to the generated machine code entry point will be stored
+ in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
+ in order to execute the gallivm_prog on the CPU.
+ */
+void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog)
+{
+ llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
+ llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
+ llvm::ExecutionEngine *ee = cpu->engine;
+ assert(ee);
+ /*FIXME : remove */
+ ee->DisableLazyCompilation();
+ ee->addModuleProvider(mp);
+
+ llvm::Function *func = func_for_shader(prog);
+ prog->function = ee->getPointerToFunction(func);
+}
+
+void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
+{
+ free(cpu);
+}
+
+struct gallivm_cpu_engine * gallivm_global_cpu_engine()
+{
+ return CPU;
+}
+
+
+typedef void (*vertex_shader_runner)(void *ainputs,
+ void *dests,
+ float (*aconsts)[4],
+ void *temps);
+
+
+/*!
+ This function is used to execute the gallivm_prog in software. Before calling
+ this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
+ function.
+ */
+int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
+ struct tgsi_exec_vector *inputs,
+ struct tgsi_exec_vector *dests,
+ float (*consts)[4],
+ struct tgsi_exec_vector *temps)
+{
+ vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
+ assert(runner);
+ /*FIXME*/
+ runner(inputs, dests, consts, temps);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+#ifndef GALLIVM_P_H
+#define GALLIVM_P_H
+
+#ifdef MESA_LLVM
+
+#include "gallivm.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_compiler.h"
+
+namespace llvm {
+ class Module;
+}
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+enum gallivm_shader_type;
+enum gallivm_vector_layout;
+
+struct gallivm_interpolate {
+ int attrib;
+ int chan;
+ int type;
+};
+
+struct gallivm_ir {
+ llvm::Module *module;
+ int id;
+ enum gallivm_shader_type type;
+ enum gallivm_vector_layout layout;
+ int num_components;
+ int num_consts;
+
+ //FIXME: this might not be enough for some shaders
+ struct gallivm_interpolate interpolators[32*4];
+ int num_interp;
+};
+
+struct gallivm_prog {
+ llvm::Module *module;
+ void *function;
+
+ int id;
+ enum gallivm_shader_type type;
+
+ int num_consts;
+
+ //FIXME: this might not be enough for some shaders
+ struct gallivm_interpolate interpolators[32*4];
+ int num_interp;
+};
+
+static INLINE void gallivm_swizzle_components(int swizzle,
+ int *xc, int *yc,
+ int *zc, int *wc)
+{
+ int x = swizzle / 1000; swizzle -= x * 1000;
+ int y = swizzle / 100; swizzle -= y * 100;
+ int z = swizzle / 10; swizzle -= z * 10;
+ int w = swizzle;
+
+ if (xc) *xc = x;
+ if (yc) *yc = y;
+ if (zc) *zc = z;
+ if (wc) *wc = w;
+}
+
+static INLINE boolean gallivm_is_swizzle(int swizzle)
+{
+ const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 +
+ TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W;
+ return swizzle != NO_SWIZZLE;
+}
+
+static INLINE int gallivm_x_swizzle(int swizzle)
+{
+ int x;
+ gallivm_swizzle_components(swizzle, &x, 0, 0, 0);
+ return x;
+}
+
+static INLINE int gallivm_y_swizzle(int swizzle)
+{
+ int y;
+ gallivm_swizzle_components(swizzle, 0, &y, 0, 0);
+ return y;
+}
+
+static INLINE int gallivm_z_swizzle(int swizzle)
+{
+ int z;
+ gallivm_swizzle_components(swizzle, 0, 0, &z, 0);
+ return z;
+}
+
+static INLINE int gallivm_w_swizzle(int swizzle)
+{
+ int w;
+ gallivm_swizzle_components(swizzle, 0, 0, 0, &w);
+ return w;
+}
+
+#endif /* MESA_LLVM */
+
+#if defined __cplusplus
+} // extern "C"
+#endif
+
+#endif
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+#ifdef MESA_LLVM
+
+#include "instructions.h"
+
+#include "storage.h"
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Function.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ParameterAttributes.h>
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+using namespace llvm;
+
+#include "gallivm_builtins.cpp"
+
+static inline std::string createFuncName(int label)
+{
+ std::ostringstream stream;
+ stream << "function";
+ stream << label;
+ return stream.str();
+}
+
+Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block,
+ Storage *storage)
+ : m_mod(mod), m_func(func), m_builder(block), m_idx(0),
+ m_storage(storage)
+{
+ m_floatVecType = VectorType::get(Type::FloatTy, 4);
+
+ m_llvmFSqrt = 0;
+ m_llvmFAbs = 0;
+ m_llvmPow = 0;
+ m_llvmFloor = 0;
+ m_llvmFlog = 0;
+ m_llvmLit = 0;
+ m_fmtPtr = 0;
+
+ createGallivmBuiltins(m_mod);
+}
+
+llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2)
+{
+ return m_builder.CreateAdd(in1, in2, name("add"));
+}
+
+llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3)
+{
+ Value *mulRes = mul(in1, in2);
+ return add(mulRes, in3);
+}
+
+llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2)
+{
+ return m_builder.CreateMul(in1, in2, name("mul"));
+}
+
+const char * Instructions::name(const char *prefix)
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ Value *x = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(0),
+ name("extractx"));
+ Value *y = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(1),
+ name("extracty"));
+ Value *z = m_builder.CreateExtractElement(mulRes,
+ m_storage->constantInt(2),
+ name("extractz"));
+ Value *xy = m_builder.CreateAdd(x, y,name("xy"));
+ Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3"));
+ return vectorFromVals(dot3, dot3, dot3, dot3);
+}
+
+llvm::Value *Instructions::callFSqrt(llvm::Value *val)
+{
+ if (!m_llvmFSqrt) {
+ // predeclare the intrinsic
+ std::vector<const Type*> fsqrtArgs;
+ fsqrtArgs.push_back(Type::FloatTy);
+ ParamAttrsList *fsqrtPal = 0;
+ FunctionType* fsqrtType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/fsqrtArgs,
+ /*isVarArg=*/false);
+ m_llvmFSqrt = new Function(
+ /*Type=*/fsqrtType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"llvm.sqrt.f32", m_mod);
+ m_llvmFSqrt->setCallingConv(CallingConv::C);
+ m_llvmFSqrt->setParamAttrs(fsqrtPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val,
+ name("sqrt"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::rsq(llvm::Value *in1)
+{
+ Value *x = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("extractx"));
+ Value *abs = callFAbs(x);
+ Value *sqrt = callFSqrt(abs);
+
+ Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy,
+ APFloat(1.f)),
+ sqrt,
+ name("rsqrt"));
+ return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt);
+}
+
+llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w)
+{
+ Constant *const_vec = Constant::getNullValue(m_floatVecType);
+ Value *res = m_builder.CreateInsertElement(const_vec, x,
+ m_storage->constantInt(0),
+ name("vecx"));
+ res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1),
+ name("vecxy"));
+ res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2),
+ name("vecxyz"));
+ if (w)
+ res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3),
+ name("vecxyzw"));
+ return res;
+}
+
+llvm::Value *Instructions::callFAbs(llvm::Value *val)
+{
+ if (!m_llvmFAbs) {
+ // predeclare the intrinsic
+ std::vector<const Type*> fabsArgs;
+ fabsArgs.push_back(Type::FloatTy);
+ ParamAttrsList *fabsPal = 0;
+ FunctionType* fabsType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/fabsArgs,
+ /*isVarArg=*/false);
+ m_llvmFAbs = new Function(
+ /*Type=*/fabsType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"fabs", m_mod);
+ m_llvmFAbs->setCallingConv(CallingConv::C);
+ m_llvmFAbs->setParamAttrs(fabsPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFAbs, val,
+ name("fabs"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::lit(llvm::Value *in)
+{
+ if (!m_llvmLit) {
+ m_llvmLit = m_mod->getFunction("lit");
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *res = m_builder.CreateSub(in1, in2, name("sub"));
+ return res;
+}
+
+llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2)
+{
+ if (!m_llvmPow) {
+ // predeclare the intrinsic
+ std::vector<const Type*> powArgs;
+ powArgs.push_back(Type::FloatTy);
+ powArgs.push_back(Type::FloatTy);
+ ParamAttrsList *powPal = 0;
+ FunctionType* powType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/powArgs,
+ /*isVarArg=*/false);
+ m_llvmPow = new Function(
+ /*Type=*/powType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"llvm.pow.f32", m_mod);
+ m_llvmPow->setCallingConv(CallingConv::C);
+ m_llvmPow->setParamAttrs(powPal);
+ }
+ std::vector<Value*> params;
+ params.push_back(val1);
+ params.push_back(val2);
+ CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(),
+ name("pow"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *x1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("x1"));
+ Value *x2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(0),
+ name("x2"));
+ llvm::Value *val = callPow(x1, x2);
+ return vectorFromVals(val, val, val, val);
+}
+
+llvm::Value * Instructions::rcp(llvm::Value *in1)
+{
+ Value *x1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("x1"));
+ Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy,
+ APFloat(1.f)),
+ x1, name("rcp"));
+ return vectorFromVals(res, res, res, res);
+}
+
+llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ std::vector<llvm::Value*> vec = extractVector(mulRes);
+ Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy"));
+ Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz"));
+ Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4"));
+ return vectorFromVals(dot4, dot4, dot4, dot4);
+}
+
+llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *mulRes = mul(in1, in2);
+ std::vector<llvm::Value*> vec1 = extractVector(mulRes);
+ Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy"));
+ Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz"));
+ Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph"));
+ return vectorFromVals(dph, dph, dph, dph);
+}
+
+llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *y1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(1),
+ name("y1"));
+ Value *z = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(2),
+ name("z"));
+ Value *y2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(1),
+ name("y2"));
+ Value *w = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(3),
+ name("w"));
+ Value *ry = m_builder.CreateMul(y1, y2, name("tyuy"));
+ return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)),
+ ry, z, w);
+}
+
+llvm::Value * Instructions::ex2(llvm::Value *in)
+{
+ llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)),
+ m_builder.CreateExtractElement(
+ in, m_storage->constantInt(0),
+ name("x1")));
+ return vectorFromVals(val, val, val, val);
+}
+
+llvm::Value * Instructions::callFloor(llvm::Value *val)
+{
+ if (!m_llvmFloor) {
+ // predeclare the intrinsic
+ std::vector<const Type*> floorArgs;
+ floorArgs.push_back(Type::FloatTy);
+ ParamAttrsList *floorPal = 0;
+ FunctionType* floorType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/floorArgs,
+ /*isVarArg=*/false);
+ m_llvmFloor = new Function(
+ /*Type=*/floorType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"floorf", m_mod);
+ m_llvmFloor->setCallingConv(CallingConv::C);
+ m_llvmFloor->setParamAttrs(floorPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFloor, val,
+ name("floorf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::floor(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]),
+ callFloor(vec[2]), callFloor(vec[3]));
+}
+
+llvm::Value * Instructions::arl(llvm::Value *in)
+{
+ return floor(in);
+}
+
+llvm::Value * Instructions::frc(llvm::Value *in)
+{
+ llvm::Value *flr = floor(in);
+ return sub(in, flr);
+}
+
+llvm::Value * Instructions::callFLog(llvm::Value *val)
+{
+ if (!m_llvmFlog) {
+ // predeclare the intrinsic
+ std::vector<const Type*> flogArgs;
+ flogArgs.push_back(Type::FloatTy);
+ ParamAttrsList *flogPal = 0;
+ FunctionType* flogType = FunctionType::get(
+ /*Result=*/Type::FloatTy,
+ /*Params=*/flogArgs,
+ /*isVarArg=*/false);
+ m_llvmFlog = new Function(
+ /*Type=*/flogType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"logf", m_mod);
+ m_llvmFlog->setCallingConv(CallingConv::C);
+ m_llvmFlog->setParamAttrs(flogPal);
+ }
+ CallInst *call = m_builder.CreateCall(m_llvmFlog, val,
+ name("logf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::lg2(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ llvm::Value *const_vec = constVector(1.442695f, 1.442695f,
+ 1.442695f, 1.442695f);
+ return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]),
+ callFLog(vec[2]), callFLog(vec[3])), const_vec);
+}
+
+llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp"));
+ Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0],
+ name("selx"));
+
+ Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp"));
+ Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1],
+ name("sely"));
+
+ Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp"));
+ Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2],
+ name("selz"));
+
+ Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp"));
+ Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3],
+ name("selw"));
+
+ return vectorFromVals(selx, sely, selz, selw);
+}
+
+llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2)
+{
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0],
+ name("xcmp"));
+ Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0],
+ name("selx"));
+
+ Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1],
+ name("ycmp"));
+ Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1],
+ name("sely"));
+
+ Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2],
+ name("zcmp"));
+ Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2],
+ name("selz"));
+
+ Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3],
+ name("wcmp"));
+ Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3],
+ name("selw"));
+
+ return vectorFromVals(selx, sely, selz, selw);
+}
+
+void Instructions::printVector(llvm::Value *val)
+{
+ static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A";
+
+ if (!m_fmtPtr) {
+ Constant *format = ConstantArray::get(frmt, true);
+ ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1);
+ GlobalVariable* globalFormat = new GlobalVariable(
+ /*Type=*/arrayTy,
+ /*isConstant=*/true,
+ /*Linkage=*/GlobalValue::InternalLinkage,
+ /*Initializer=*/0, // has initializer, specified below
+ /*Name=*/name(".str"),
+ m_mod);
+ globalFormat->setInitializer(format);
+
+ Constant* const_int0 = Constant::getNullValue(IntegerType::get(32));
+ std::vector<Constant*> const_ptr_21_indices;
+ const_ptr_21_indices.push_back(const_int0);
+ const_ptr_21_indices.push_back(const_int0);
+ m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat,
+ &const_ptr_21_indices[0], const_ptr_21_indices.size());
+ }
+
+ Function *func_printf = m_mod->getFunction("printf");
+ if (!func_printf)
+ func_printf = declarePrintf();
+ assert(func_printf);
+ std::vector<llvm::Value*> vec = extractVector(val);
+ Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx"));
+ Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy"));
+ Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz"));
+ Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw"));
+ std::vector<Value*> params;
+ params.push_back(m_fmtPtr);
+ params.push_back(dx);
+ params.push_back(dy);
+ params.push_back(dz);
+ params.push_back(dw);
+ CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(),
+ name("printf"));
+ call->setCallingConv(CallingConv::C);
+ call->setTailCall(true);
+}
+
+llvm::Function * Instructions::declarePrintf()
+{
+ std::vector<const Type*> args;
+ ParamAttrsList *params = 0;
+ FunctionType* funcTy = FunctionType::get(
+ /*Result=*/IntegerType::get(32),
+ /*Params=*/args,
+ /*isVarArg=*/true);
+ Function* func_printf = new Function(
+ /*Type=*/funcTy,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/"printf", m_mod);
+ func_printf->setCallingConv(CallingConv::C);
+ func_printf->setParamAttrs(params);
+ return func_printf;
+}
+
+
+llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+ Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+
+llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2)
+{
+ Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f));
+ Constant *const0f = Constant::getNullValue(Type::FloatTy);
+
+ std::vector<llvm::Value*> vec1 = extractVector(in1);
+ std::vector<llvm::Value*> vec2 = extractVector(in2);
+
+ Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp"));
+ Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel"));
+
+ Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp"));
+ Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel"));
+
+ Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp"));
+ Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel"));
+
+ Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp"));
+ Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel"));
+
+ return vectorFromVals(x, y, z, w);
+}
+
+llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2)
+{
+ Value *x1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(0),
+ name("x1"));
+ Value *y1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(1),
+ name("y1"));
+ Value *z1 = m_builder.CreateExtractElement(in1,
+ m_storage->constantInt(2),
+ name("z1"));
+
+ Value *x2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(0),
+ name("x2"));
+ Value *y2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(1),
+ name("y2"));
+ Value *z2 = m_builder.CreateExtractElement(in2,
+ m_storage->constantInt(2),
+ name("z2"));
+ Value *y1z2 = mul(y1, z2);
+ Value *z1y2 = mul(z1, y2);
+
+ Value *z1x2 = mul(z1, x2);
+ Value *x1z2 = mul(x1, z2);
+
+ Value *x1y2 = mul(x1, y2);
+ Value *y1x2 = mul(y1, x2);
+
+ return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2));
+}
+
+
+llvm::Value * Instructions::abs(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ Value *xabs = callFAbs(vec[0]);
+ Value *yabs = callFAbs(vec[1]);
+ Value *zabs = callFAbs(vec[2]);
+ Value *wabs = callFAbs(vec[3]);
+ return vectorFromVals(xabs, yabs, zabs, wabs);
+}
+
+void Instructions::ifop(llvm::Value *in)
+{
+ BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0);
+ BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0);
+
+ //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0);
+ //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0);
+ //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0);
+
+ Constant *float0 = Constant::getNullValue(Type::FloatTy);
+
+ Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0),
+ name("extractx"));
+ Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp"));
+ m_builder.CreateCondBr(xcmp, ifthen, ifend);
+ //m_builder.SetInsertPoint(yblock);
+
+ m_builder.SetInsertPoint(ifthen);
+ m_ifStack.push(ifend);
+}
+
+llvm::BasicBlock * Instructions::currentBlock() const
+{
+ return m_builder.GetInsertBlock();
+}
+
+void Instructions::elseop()
+{
+ assert(!m_ifStack.empty());
+ BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0);
+ m_builder.CreateBr(ifend);
+ m_builder.SetInsertPoint(m_ifStack.top());
+ currentBlock()->setName(name("ifelse"));
+ m_ifStack.pop();
+ m_ifStack.push(ifend);
+}
+
+void Instructions::endif()
+{
+ assert(!m_ifStack.empty());
+ m_builder.CreateBr(m_ifStack.top());
+ m_builder.SetInsertPoint(m_ifStack.top());
+ m_ifStack.pop();
+}
+
+llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3)
+{
+ llvm::Value *m = mul(in1, in2);
+ llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f);
+ llvm::Value *s = sub(vec1, in1);
+ return add(m, mul(s, in3));
+}
+
+void Instructions::beginLoop()
+{
+ BasicBlock *begin = new BasicBlock(name("loop"), m_func,0);
+ BasicBlock *end = new BasicBlock(name("endloop"), m_func,0);
+
+ m_builder.CreateBr(begin);
+ Loop loop;
+ loop.begin = begin;
+ loop.end = end;
+ m_builder.SetInsertPoint(begin);
+ m_loopStack.push(loop);
+}
+
+void Instructions::endLoop()
+{
+ assert(!m_loopStack.empty());
+ Loop loop = m_loopStack.top();
+ m_builder.CreateBr(loop.begin);
+ loop.end->moveAfter(currentBlock());
+ m_builder.SetInsertPoint(loop.end);
+ m_loopStack.pop();
+}
+
+void Instructions::brk()
+{
+ assert(!m_loopStack.empty());
+ BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0);
+ m_builder.CreateBr(m_loopStack.top().end);
+ m_builder.SetInsertPoint(unr);
+}
+
+llvm::Value * Instructions::trunc(llvm::Value *in)
+{
+ std::vector<llvm::Value*> vec = extractVector(in);
+ Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32),
+ name("ftoix"));
+ Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32),
+ name("ftoiy"));
+ Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32),
+ name("ftoiz"));
+ Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32),
+ name("ftoiw"));
+ Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy,
+ name("fx"));
+ Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy,
+ name("fy"));
+ Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy,
+ name("fz"));
+ Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy,
+ name("fw"));
+ return vectorFromVals(fx, fy, fz, fw);
+}
+
+void Instructions::end()
+{
+ m_builder.CreateRetVoid();
+}
+
+void Instructions::cal(int label, llvm::Value *input)
+{
+ std::vector<Value*> params;
+ params.push_back(input);
+ llvm::Function *func = findFunction(label);
+
+ m_builder.CreateCall(func, params.begin(), params.end());
+}
+
+llvm::Function * Instructions::declareFunc(int label)
+{
+ PointerType *vecPtr = PointerType::getUnqual(m_floatVecType);
+ std::vector<const Type*> args;
+ args.push_back(vecPtr);
+ args.push_back(vecPtr);
+ args.push_back(vecPtr);
+ args.push_back(vecPtr);
+ ParamAttrsList *params = 0;
+ FunctionType *funcType = FunctionType::get(
+ /*Result=*/Type::VoidTy,
+ /*Params=*/args,
+ /*isVarArg=*/false);
+ std::string name = createFuncName(label);
+ Function *func = new Function(
+ /*Type=*/funcType,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Name=*/name.c_str(), m_mod);
+ func->setCallingConv(CallingConv::C);
+ func->setParamAttrs(params);
+ return func;
+}
+
+void Instructions::bgnSub(unsigned label)
+{
+ llvm::Function *func = findFunction(label);
+
+ Function::arg_iterator args = func->arg_begin();
+ Value *ptr_INPUT = args++;
+ ptr_INPUT->setName("INPUT");
+ m_storage->pushArguments(ptr_INPUT);
+
+ llvm::BasicBlock *entry = new BasicBlock("entry", func, 0);
+
+ m_func = func;
+ m_builder.SetInsertPoint(entry);
+}
+
+void Instructions::endSub()
+{
+ m_func = 0;
+ m_builder.SetInsertPoint(0);
+}
+
+llvm::Function * Instructions::findFunction(int label)
+{
+ llvm::Function *func = m_functions[label];
+ if (!func) {
+ func = declareFunc(label);
+ m_functions[label] = func;
+ }
+ return func;
+}
+
+llvm::Value * Instructions::constVector(float x, float y, float z, float w)
+{
+ std::vector<Constant*> vec(4);
+ vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x));
+ vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y));
+ vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z));
+ vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w));
+ return ConstantVector::get(m_floatVecType, vec);
+}
+
+
+std::vector<llvm::Value*> Instructions::extractVector(llvm::Value *vec)
+{
+ std::vector<llvm::Value*> elems(4);
+ elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0),
+ name("x"));
+ elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1),
+ name("y"));
+ elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2),
+ name("z"));
+ elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3),
+ name("w"));
+ return elems;
+}
+
+llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3)
+{
+ llvm::Function *func = m_mod->getFunction("cmp");
+ assert(func);
+
+ std::vector<Value*> params;
+ params.push_back(in1);
+ params.push_back(in2);
+ params.push_back(in3);
+ CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::cos(llvm::Value *in)
+{
+#if 0
+ llvm::Function *func = m_mod->getFunction("vcos");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("cosres"));
+ call->setTailCall(false);
+ return call;
+#else
+ std::vector<llvm::Value*> elems = extractVector(in);
+ Function *func = m_mod->getFunction("cosf");
+ assert(func);
+ CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres"));
+ cos->setCallingConv(CallingConv::C);
+ cos->setTailCall(true);
+ return vectorFromVals(cos, cos, cos, cos);
+#endif
+}
+
+llvm::Value * Instructions::scs(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("scs");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("scsres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::kilp(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("kilp");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("kilpres"));
+ call->setTailCall(false);
+ return call;
+}
+
+llvm::Value * Instructions::sin(llvm::Value *in)
+{
+ llvm::Function *func = m_mod->getFunction("vsin");
+ assert(func);
+
+ CallInst *call = m_builder.CreateCall(func, in, name("sinres"));
+ call->setTailCall(false);
+ return call;
+}
+#endif //MESA_LLVM
+
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+
+#ifndef INSTRUCTIONS_H
+#define INSTRUCTIONS_H
+
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+#include <llvm/Support/LLVMBuilder.h>
+
+#include <map>
+#include <stack>
+
+namespace llvm {
+ class VectorType;
+ class Function;
+}
+
+class Storage;
+
+class Instructions
+{
+public:
+ Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block,
+ Storage *storage);
+
+ llvm::BasicBlock *currentBlock() const;
+
+ llvm::Value *abs(llvm::Value *in1);
+ llvm::Value *arl(llvm::Value *in1);
+ llvm::Value *add(llvm::Value *in1, llvm::Value *in2);
+ void beginLoop();
+ void bgnSub(unsigned);
+ void brk();
+ void cal(int label, llvm::Value *input);
+ llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3);
+ llvm::Value *cos(llvm::Value *in);
+ llvm::Value *cross(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dph(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *dst(llvm::Value *in1, llvm::Value *in2);
+ void elseop();
+ void endif();
+ void endLoop();
+ void end();
+ void endSub();
+ llvm::Value *ex2(llvm::Value *in);
+ llvm::Value *floor(llvm::Value *in);
+ llvm::Value *frc(llvm::Value *in);
+ void ifop(llvm::Value *in);
+ llvm::Value *kilp(llvm::Value *in);
+ llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in3);
+ llvm::Value *lit(llvm::Value *in);
+ llvm::Value *lg2(llvm::Value *in);
+ llvm::Value *madd(llvm::Value *in1, llvm::Value *in2,
+ llvm::Value *in2);
+ llvm::Value *min(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *max(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *mul(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *pow(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *rcp(llvm::Value *in);
+ llvm::Value *rsq(llvm::Value *in);
+ llvm::Value *scs(llvm::Value *in);
+ llvm::Value *sge(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sin(llvm::Value *in);
+ llvm::Value *slt(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *sub(llvm::Value *in1, llvm::Value *in2);
+ llvm::Value *trunc(llvm::Value *in);
+
+ void printVector(llvm::Value *val);
+private:
+ const char *name(const char *prefix);
+
+ llvm::Value *callFAbs(llvm::Value *val);
+ llvm::Value *callFloor(llvm::Value *val);
+ llvm::Value *callFSqrt(llvm::Value *val);
+ llvm::Value *callFLog(llvm::Value *val);
+ llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2);
+
+ llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w=0);
+
+ llvm::Value *constVector(float x, float y, float z, float w);
+
+ llvm::Function *declarePrintf();
+ llvm::Function *declareFunc(int label);
+
+ llvm::Function *findFunction(int label);
+
+ std::vector<llvm::Value*> extractVector(llvm::Value *vec);
+private:
+ llvm::Module *m_mod;
+ llvm::Function *m_func;
+ char m_name[32];
+ llvm::LLVMFoldingBuilder m_builder;
+ int m_idx;
+
+ llvm::VectorType *m_floatVecType;
+
+ llvm::Function *m_llvmFSqrt;
+ llvm::Function *m_llvmFAbs;
+ llvm::Function *m_llvmPow;
+ llvm::Function *m_llvmFloor;
+ llvm::Function *m_llvmFlog;
+ llvm::Function *m_llvmLit;
+
+ llvm::Constant *m_fmtPtr;
+
+ std::stack<llvm::BasicBlock*> m_ifStack;
+ struct Loop {
+ llvm::BasicBlock *begin;
+ llvm::BasicBlock *end;
+ };
+ std::stack<Loop> m_loopStack;
+ std::map<int, llvm::Function*> m_functions;
+ Storage *m_storage;
+};
+
+#endif
--- /dev/null
+#include "instructionssoa.h"
+
+#include "storagesoa.h"
+
+#include <llvm/Constants.h>
+
+using namespace llvm;
+
+InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func,
+ llvm::BasicBlock *block, StorageSoa *storage)
+ : m_builder(block),
+ m_storage(storage),
+ m_idx(0)
+{
+}
+
+const char * InstructionsSoa::name(const char *prefix) const
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ Constant *constVector = Constant::getNullValue(vectorType);
+ Value *res = m_builder.CreateInsertElement(constVector, x,
+ m_storage->constantInt(0),
+ name("vecx"));
+ res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1),
+ name("vecxy"));
+ res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2),
+ name("vecxyz"));
+ if (w)
+ res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3),
+ name("vecxyzw"));
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in)
+{
+ std::vector<llvm::Value*> res(4);
+
+ //Extract x's
+ llvm::Value *x1 = m_builder.CreateExtractElement(in[0],
+ m_storage->constantInt(0),
+ name("extractX"));
+ //cast it to an unsigned int
+ x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast"));
+
+ res[0] = x1;//vectorFromVals(x1, x2, x3, x4);
+ //only x is valid. the others shouldn't be necessary
+ /*
+ res[1] = Constant::getNullValue(m_floatVecType);
+ res[2] = Constant::getNullValue(m_floatVecType);
+ res[3] = Constant::getNullValue(m_floatVecType);
+ */
+
+ return res;
+}
+
+
+std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx"));
+ res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy"));
+ res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz"));
+ res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw"));
+
+ return res;
+}
+
+std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx"));
+ res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly"));
+ res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz"));
+ res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw"));
+
+ return res;
+}
+
+void InstructionsSoa::end()
+{
+ m_builder.CreateRetVoid();
+}
+
+std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3)
+{
+ std::vector<llvm::Value*> res = mul(in1, in2);
+ return add(res, in3);
+}
+
+std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
+{
+ std::vector<llvm::Value*> res(4);
+ res[0] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(0),
+ name("extract1X"));
+ res[1] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(1),
+ name("extract2X"));
+ res[2] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(2),
+ name("extract3X"));
+ res[3] = m_builder.CreateExtractElement(vector,
+ m_storage->constantInt(3),
+ name("extract4X"));
+
+ return res;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef INSTRUCTIONSSOA_H
+#define INSTRUCTIONSSOA_H
+
+#include <llvm/Support/LLVMBuilder.h>
+
+#include <vector>
+
+namespace llvm {
+ class Module;
+ class Function;
+ class BasicBlock;
+ class Value;
+}
+class StorageSoa;
+
+class InstructionsSoa
+{
+public:
+ InstructionsSoa(llvm::Module *mod, llvm::Function *func,
+ llvm::BasicBlock *block, StorageSoa *storage);
+
+ std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in);
+
+ std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2,
+ const std::vector<llvm::Value*> in3);
+ std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1,
+ const std::vector<llvm::Value*> in2);
+ void end();
+
+ std::vector<llvm::Value*> extractVector(llvm::Value *vector);
+private:
+ const char * name(const char *prefix) const;
+ llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
+ llvm::Value *z, llvm::Value *w);
+private:
+ llvm::LLVMFoldingBuilder m_builder;
+ StorageSoa *m_storage;
+private:
+ mutable int m_idx;
+ mutable char m_name[32];
+};
+
+
+#endif
--- /dev/null
+/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+typedef __attribute__(( ocu_vector_type(4) )) float float4;
+
+extern float powf(float a, float b);
+
+inline float approx(float a, float b)
+{
+ if (b < -128.0f) b = -128.0f;
+ if (b > 128.0f) b = 128.0f;
+ if (a < 0) a = 0;
+ return powf(a, b);
+}
+
+inline float4 lit(float4 tmp)
+{
+ float4 result;
+ result.x = 1.0;
+ result.w = 1.0;
+ if (tmp.x > 0) {
+ result.y = tmp.x;
+ result.z = approx(tmp.y, tmp.w);
+ } else {
+ result.y = 0;
+ result.z = 0;
+ }
+ return result;
+}
+
+inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2)
+{
+ float4 result;
+
+ result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x;
+ result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y;
+ result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z;
+ result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w;
+
+ return result;
+}
+
+extern float cosf(float val);
+extern float sinf(float val);
+
+inline float4 vcos(float4 val)
+{
+ float4 result;
+ printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w);
+ result.x = cosf(val.x);
+ result.y = cosf(val.x);
+ result.z = cosf(val.x);
+ result.w = cosf(val.x);
+ printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w);
+ return result;
+}
+
+inline float4 scs(float4 val)
+{
+ float4 result;
+ float tmp = val.x;
+ result.x = cosf(tmp);
+ result.y = sinf(tmp);
+ return result;
+}
+
+
+inline float4 vsin(float4 val)
+{
+ float4 result;
+ float tmp = val.x;
+ float res = sinf(tmp);
+ result.x = res;
+ result.y = res;
+ result.z = res;
+ result.w = res;
+ return result;
+}
+
+inline int kilp(float4 val)
+{
+ if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0)
+ return 1;
+ else
+ return 0;
+}
--- /dev/null
+#include "loweringpass.h"
+
+using namespace llvm;
+
+char LoweringPass::ID = 0;
+RegisterPass<LoweringPass> X("lowering", "Lowering Pass");
+
+LoweringPass::LoweringPass()
+ : ModulePass((intptr_t)&ID)
+{
+}
+
+bool LoweringPass::runOnModule(Module &m)
+{
+ llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n";
+ return false;
+}
--- /dev/null
+#ifndef LOWERINGPASS_H
+#define LOWERINGPASS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+
+struct LoweringPass : public llvm::ModulePass
+{
+ static char ID;
+ LoweringPass();
+
+ virtual bool runOnModule(llvm::Module &m);
+};
+
+#endif
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+#ifdef MESA_LLVM
+
+#include "storage.h"
+
+#include "gallivm_p.h"
+
+#include "pipe/p_shader_tokens.h"
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+
+using namespace llvm;
+
+Storage::Storage(llvm::BasicBlock *block, llvm::Value *input)
+ : m_block(block),
+ m_INPUT(input),
+ m_addrs(32),
+ m_idx(0)
+{
+ m_floatVecType = VectorType::get(Type::FloatTy, 4);
+ m_intVecType = VectorType::get(IntegerType::get(32), 4);
+
+ m_undefFloatVec = UndefValue::get(m_floatVecType);
+ m_undefIntVec = UndefValue::get(m_intVecType);
+ m_extSwizzleVec = 0;
+
+ m_numConsts = 0;
+}
+
+//can only build vectors with all members in the [0, 9] range
+llvm::Constant *Storage::shuffleMask(int vec)
+{
+ if (!m_extSwizzleVec) {
+ std::vector<Constant*> elems;
+ elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f)));
+ elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f)));
+ elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f)));
+ elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f)));
+ m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems);
+ }
+
+ if (m_intVecs.find(vec) != m_intVecs.end()) {
+ return m_intVecs[vec];
+ }
+ int origVec = vec;
+ Constant* const_vec = 0;
+ if (origVec == 0) {
+ const_vec = Constant::getNullValue(m_intVecType);
+ } else {
+ int x = gallivm_x_swizzle(vec);
+ int y = gallivm_y_swizzle(vec);
+ int z = gallivm_z_swizzle(vec);
+ int w = gallivm_w_swizzle(vec);
+ std::vector<Constant*> elems;
+ elems.push_back(constantInt(x));
+ elems.push_back(constantInt(y));
+ elems.push_back(constantInt(z));
+ elems.push_back(constantInt(w));
+ const_vec = ConstantVector::get(m_intVecType, elems);
+ }
+
+ m_intVecs[origVec] = const_vec;
+ return const_vec;
+}
+
+llvm::ConstantInt *Storage::constantInt(int idx)
+{
+ if (m_constInts.find(idx) != m_constInts.end()) {
+ return m_constInts[idx];
+ }
+ ConstantInt *const_int = ConstantInt::get(APInt(32, idx));
+ m_constInts[idx] = const_int;
+ return const_int;
+}
+
+llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx)
+{
+ Value *val = element(InputsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(val, name("input"), false, m_block);
+ load->setAlignment(8);
+
+ return load;
+}
+
+llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx)
+{
+ m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts;
+
+ Value *elem = element(ConstsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(elem, name("const"), false, m_block);
+ load->setAlignment(8);
+ return load;
+}
+
+llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle)
+{
+ Constant *mask = shuffleMask(shuffle);
+ ShuffleVectorInst *res =
+ new ShuffleVectorInst(vec, m_extSwizzleVec, mask,
+ name("shuffle"), m_block);
+ return res;
+}
+
+
+llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx)
+{
+ Value *elem = element(TempsArg, idx, indIdx);
+
+ LoadInst *load = new LoadInst(elem, name("temp"), false, m_block);
+ load->setAlignment(8);
+
+ return load;
+}
+
+void Storage::setTempElement(int idx, llvm::Value *val, int mask)
+{
+ if (mask != TGSI_WRITEMASK_XYZW) {
+ llvm::Value *templ = 0;
+ if (m_tempWriteMap[idx])
+ templ = tempElement(idx);
+ val = maskWrite(val, mask, templ);
+ }
+ Value *elem = element(TempsArg, idx);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+ m_tempWriteMap[idx] = true;
+}
+
+void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask)
+{
+ if (mask != TGSI_WRITEMASK_XYZW) {
+ llvm::Value *templ = 0;
+ if (m_destWriteMap[dstIdx])
+ templ = outputElement(dstIdx);
+ val = maskWrite(val, mask, templ);
+ }
+
+ Value *elem = element(DestsArg, dstIdx);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+ m_destWriteMap[dstIdx] = true;
+}
+
+llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ)
+{
+ llvm::Value *dst = templ;
+ if (!dst)
+ dst = Constant::getNullValue(m_floatVecType);
+ if ((mask & TGSI_WRITEMASK_X)) {
+ llvm::Value *x = new ExtractElementInst(src, unsigned(0),
+ name("x"), m_block);
+ dst = new InsertElementInst(dst, x, unsigned(0),
+ name("dstx"), m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Y)) {
+ llvm::Value *y = new ExtractElementInst(src, unsigned(1),
+ name("y"), m_block);
+ dst = new InsertElementInst(dst, y, unsigned(1),
+ name("dsty"), m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Z)) {
+ llvm::Value *z = new ExtractElementInst(src, unsigned(2),
+ name("z"), m_block);
+ dst = new InsertElementInst(dst, z, unsigned(2),
+ name("dstz"), m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_W)) {
+ llvm::Value *w = new ExtractElementInst(src, unsigned(3),
+ name("w"), m_block);
+ dst = new InsertElementInst(dst, w, unsigned(3),
+ name("dstw"), m_block);
+ }
+ return dst;
+}
+
+const char * Storage::name(const char *prefix)
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+int Storage::numConsts() const
+{
+ return m_numConsts;
+}
+
+llvm::Value * Storage::addrElement(int idx) const
+{
+ Value *ret = m_addrs[idx];
+ if (!ret)
+ return m_undefFloatVec;
+ return ret;
+}
+
+void Storage::setAddrElement(int idx, llvm::Value *val, int mask)
+{
+ if (mask != TGSI_WRITEMASK_XYZW) {
+ llvm::Value *templ = m_addrs[idx];
+ val = maskWrite(val, mask, templ);
+ }
+ m_addrs[idx] = val;
+}
+
+llvm::Value * Storage::extractIndex(llvm::Value *vec)
+{
+ llvm::Value *x = new ExtractElementInst(vec, unsigned(0),
+ name("x"), m_block);
+ return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block);
+}
+
+void Storage::setCurrentBlock(llvm::BasicBlock *block)
+{
+ m_block = block;
+}
+
+llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx)
+{
+ Value *elem = element(DestsArg, idx, indIdx);
+ LoadInst *load = new LoadInst(elem, name("output"), false, m_block);
+ load->setAlignment(8);
+
+ return load;
+}
+
+llvm::Value * Storage::inputPtr() const
+{
+ return m_INPUT;
+}
+
+void Storage::pushArguments(llvm::Value *input)
+{
+ m_argStack.push(m_INPUT);
+
+ m_INPUT = input;
+}
+
+void Storage::popArguments()
+{
+ m_INPUT = m_argStack.top();
+ m_argStack.pop();
+}
+
+void Storage::pushTemps()
+{
+ m_extSwizzleVec = 0;
+}
+
+void Storage::popTemps()
+{
+}
+
+llvm::Value * Storage::immediateElement(int idx)
+{
+ return m_immediates[idx];
+}
+
+void Storage::addImmediate(float *val)
+{
+ std::vector<Constant*> vec(4);
+ vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0]));
+ vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1]));
+ vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2]));
+ vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3]));
+ m_immediates.push_back(ConstantVector::get(m_floatVecType, vec));
+}
+
+
+llvm::Value * Storage::elemPtr(Args arg)
+{
+ std::vector<Value*> indices;
+ indices.push_back(constantInt(0));
+ indices.push_back(constantInt(static_cast<int>(arg)));
+ GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("input_ptr"),
+ m_block);
+ return new LoadInst(getElem, name("input_field"), false, m_block);
+}
+
+llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx,
+ llvm::Value *indIdx )
+{
+ GetElementPtrInst *getElem = 0;
+
+ if (indIdx) {
+ getElem = new GetElementPtrInst(ptr,
+ BinaryOperator::create(Instruction::Add,
+ indIdx,
+ constantInt(idx),
+ name("add"),
+ m_block),
+ name("field"),
+ m_block);
+ } else {
+ getElem = new GetElementPtrInst(ptr,
+ constantInt(idx),
+ name("field"),
+ m_block);
+ }
+ return getElem;
+}
+
+llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx )
+{
+ Value *val = elemPtr(arg);
+ return elemIdx(val, idx, indIdx);
+}
+
+void Storage::setKilElement(llvm::Value *val)
+{
+ std::vector<Value*> indices;
+ indices.push_back(constantInt(0));
+ indices.push_back(constantInt(static_cast<int>(KilArg)));
+ GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT,
+ indices.begin(),
+ indices.end(),
+ name("kil_ptr"),
+ m_block);
+ StoreInst *st = new StoreInst(val, elem, false, m_block);
+ st->setAlignment(8);
+}
+
+#endif //MESA_LLVM
+
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Zack Rusin zack@tungstengraphics.com
+ */
+
+#ifndef STORAGE_H
+#define STORAGE_H
+
+#include <map>
+#include <set>
+#include <stack>
+#include <vector>
+
+namespace llvm {
+ class BasicBlock;
+ class Constant;
+ class ConstantInt;
+ class LoadInst;
+ class Value;
+ class VectorType;
+}
+
+class Storage
+{
+public:
+ Storage(llvm::BasicBlock *block,
+ llvm::Value *input);
+
+ llvm::Value *inputPtr() const;
+
+ void setCurrentBlock(llvm::BasicBlock *block);
+
+ llvm::ConstantInt *constantInt(int);
+ llvm::Constant *shuffleMask(int vec);
+ llvm::Value *inputElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *constElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *outputElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *tempElement(int idx, llvm::Value *indIdx =0);
+ llvm::Value *immediateElement(int idx);
+
+ void setOutputElement(int dstIdx, llvm::Value *val, int mask);
+ void setTempElement(int idx, llvm::Value *val, int mask);
+
+ llvm::Value *addrElement(int idx) const;
+ void setAddrElement(int idx, llvm::Value *val, int mask);
+
+ void setKilElement(llvm::Value *val);
+
+ llvm::Value *shuffleVector(llvm::Value *vec, int shuffle);
+
+ llvm::Value *extractIndex(llvm::Value *vec);
+
+ int numConsts() const;
+
+ void pushArguments(llvm::Value *input);
+ void popArguments();
+ void pushTemps();
+ void popTemps();
+
+ void addImmediate(float *val);
+
+private:
+ llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ);
+ const char *name(const char *prefix);
+
+ enum Args {
+ DestsArg = 0,
+ InputsArg = 1,
+ TempsArg = 2,
+ ConstsArg = 3,
+ KilArg = 4
+ };
+ llvm::Value *elemPtr(Args arg);
+ llvm::Value *elemIdx(llvm::Value *ptr, int idx,
+ llvm::Value *indIdx = 0);
+ llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0);
+
+private:
+ llvm::BasicBlock *m_block;
+ llvm::Value *m_INPUT;
+
+ std::map<int, llvm::ConstantInt*> m_constInts;
+ std::map<int, llvm::Constant*> m_intVecs;
+ std::vector<llvm::Value*> m_addrs;
+ std::vector<llvm::Constant*> m_immediates;
+
+ llvm::VectorType *m_floatVecType;
+ llvm::VectorType *m_intVecType;
+
+ char m_name[32];
+ int m_idx;
+
+ int m_numConsts;
+
+ std::map<int, bool > m_destWriteMap;
+ std::map<int, bool > m_tempWriteMap;
+
+ llvm::Value *m_undefFloatVec;
+ llvm::Value *m_undefIntVec;
+ llvm::Value *m_extSwizzleVec;
+
+ std::stack<llvm::Value*> m_argStack;
+ std::stack<std::vector<llvm::Value*> > m_tempStack;
+};
+
+#endif
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "storagesoa.h"
+
+#include "gallivm_p.h"
+
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_debug.h"
+
+#include <llvm/BasicBlock.h>
+#include <llvm/Module.h>
+#include <llvm/Value.h>
+
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/InstrTypes.h>
+#include <llvm/Instructions.h>
+
+using namespace llvm;
+
+
+StorageSoa::StorageSoa(llvm::BasicBlock *block,
+ llvm::Value *input,
+ llvm::Value *output,
+ llvm::Value *consts,
+ llvm::Value *temps)
+ : m_block(block),
+ m_input(input),
+ m_output(output),
+ m_consts(consts),
+ m_temps(temps),
+ m_immediates(0),
+ m_idx(0)
+{
+}
+
+void StorageSoa::addImmediate(float *vec)
+{
+ std::vector<float> vals(4);
+ vals[0] = vec[0];
+ vals[1] = vec[1];
+ vals[2] = vec[2];
+ vals[3] = vec[3];
+ m_immediatesToFlush.push_back(vals);
+}
+
+void StorageSoa::declareImmediates()
+{
+ if (m_immediatesToFlush.empty())
+ return;
+
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vectorChannels = ArrayType::get(vectorType, 4);
+ ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size());
+
+ m_immediates = new GlobalVariable(
+ /*Type=*/arrayType,
+ /*isConstant=*/false,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Initializer=*/0, // has initializer, specified below
+ /*Name=*/name("immediates"),
+ currentModule());
+
+ std::vector<Constant*> arrayVals;
+ for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) {
+ std::vector<float> vec = m_immediatesToFlush[i];
+ std::vector<float> vals(4);
+ std::vector<Constant*> channelArray;
+
+ vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0];
+ llvm::Constant *xChannel = createConstGlobalVector(vals);
+
+ vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
+ llvm::Constant *yChannel = createConstGlobalVector(vals);
+
+ vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2];
+ llvm::Constant *zChannel = createConstGlobalVector(vals);
+
+ vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3];
+ llvm::Constant *wChannel = createConstGlobalVector(vals);
+ channelArray.push_back(xChannel);
+ channelArray.push_back(yChannel);
+ channelArray.push_back(zChannel);
+ channelArray.push_back(wChannel);
+ Constant *constChannels = ConstantArray::get(vectorChannels,
+ channelArray);
+ arrayVals.push_back(constChannels);
+ }
+ Constant *constArray = ConstantArray::get(arrayType, arrayVals);
+ m_immediates->setInitializer(constArray);
+
+ m_immediatesToFlush.clear();
+}
+
+llvm::Value *StorageSoa::addrElement(int idx) const
+{
+ std::map<int, llvm::Value*>::const_iterator itr = m_addresses.find(idx);
+ if (itr == m_addresses.end()) {
+ debug_printf("Trying to access invalid shader 'address'\n");
+ return 0;
+ }
+ llvm::Value * res = (*itr).second;
+
+ res = new LoadInst(res, name("addr"), false, m_block);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_input, idx, 0);
+ res[1] = element(m_input, idx, 1);
+ res[2] = element(m_input, idx, 2);
+ res[3] = element(m_input, idx, 3);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+ llvm::Value *xChannel, *yChannel, *zChannel, *wChannel;
+
+ xChannel = elementPointer(m_consts, idx, 0);
+ yChannel = elementPointer(m_consts, idx, 1);
+ zChannel = elementPointer(m_consts, idx, 2);
+ wChannel = elementPointer(m_consts, idx, 3);
+
+ res[0] = alignedArrayLoad(xChannel);
+ res[1] = alignedArrayLoad(yChannel);
+ res[2] = alignedArrayLoad(zChannel);
+ res[3] = alignedArrayLoad(wChannel);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_output, idx, 0);
+ res[1] = element(m_output, idx, 1);
+ res[2] = element(m_output, idx, 2);
+ res[3] = element(m_output, idx, 3);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_temps, idx, 0);
+ res[1] = element(m_temps, idx, 1);
+ res[2] = element(m_temps, idx, 2);
+ res[3] = element(m_temps, idx, 3);
+
+ return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::immediateElement(llvm::Value *idx)
+{
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = element(m_immediates, idx, 0);
+ res[1] = element(m_immediates, idx, 1);
+ res[2] = element(m_immediates, idx, 2);
+ res[3] = element(m_immediates, idx, 3);
+
+ return res;
+}
+
+llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index,
+ int channel) const
+{
+ std::vector<Value*> indices;
+ if (m_immediates == ptr)
+ indices.push_back(constantInt(0));
+ indices.push_back(index);
+ indices.push_back(constantInt(channel));
+
+ GetElementPtrInst *getElem = new GetElementPtrInst(ptr,
+ indices.begin(),
+ indices.end(),
+ name("ptr"),
+ m_block);
+ return getElem;
+}
+
+llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index,
+ int channel) const
+{
+ llvm::Value *res = elementPointer(ptr, index, channel);
+ LoadInst *load = new LoadInst(res, name("element"), false, m_block);
+ //load->setAlignment(8);
+ return load;
+}
+
+const char * StorageSoa::name(const char *prefix) const
+{
+ ++m_idx;
+ snprintf(m_name, 32, "%s%d", prefix, m_idx);
+ return m_name;
+}
+
+llvm::ConstantInt * StorageSoa::constantInt(int idx) const
+{
+ if (m_constInts.find(idx) != m_constInts.end()) {
+ return m_constInts[idx];
+ }
+ ConstantInt *constInt = ConstantInt::get(APInt(32, idx));
+ m_constInts[idx] = constInt;
+ return constInt;
+}
+
+llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ PointerType *vectorPtr = PointerType::get(vectorType, 0);
+
+ CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block);
+ LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block);
+ load->setAlignment(8);
+ return load;
+}
+
+llvm::Module * StorageSoa::currentModule() const
+{
+ if (!m_block || !m_block->getParent())
+ return 0;
+
+ return m_block->getParent()->getParent();
+}
+
+llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec)
+{
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ std::vector<Constant*> immValues;
+ ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0]));
+ ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1]));
+ ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2]));
+ ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3]));
+ immValues.push_back(constx);
+ immValues.push_back(consty);
+ immValues.push_back(constz);
+ immValues.push_back(constw);
+ Constant *constVector = ConstantVector::get(vectorType, immValues);
+
+ return constVector;
+}
+
+std::vector<llvm::Value*> StorageSoa::load(Argument type, int idx, int swizzle,
+ llvm::Value *indIdx)
+{
+ std::vector<llvm::Value*> val(4);
+
+ //if we have an indirect index, always use that
+ // if not use the integer offset to create one
+ llvm::Value *realIndex = 0;
+ if (indIdx)
+ realIndex = indIdx;
+ else
+ realIndex = constantInt(idx);
+ debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx);
+
+ switch(type) {
+ case Input:
+ val = inputElement(realIndex);
+ break;
+ case Output:
+ val = outputElement(realIndex);
+ break;
+ case Temp:
+ val = tempElement(realIndex);
+ break;
+ case Const:
+ val = constElement(realIndex);
+ break;
+ case Immediate:
+ val = immediateElement(realIndex);
+ break;
+ case Address:
+ debug_printf("Address not handled in the load phase!\n");
+ assert(0);
+ break;
+ }
+ if (!gallivm_is_swizzle(swizzle))
+ return val;
+
+ std::vector<llvm::Value*> res(4);
+
+ res[0] = val[gallivm_x_swizzle(swizzle)];
+ res[1] = val[gallivm_y_swizzle(swizzle)];
+ res[2] = val[gallivm_z_swizzle(swizzle)];
+ res[3] = val[gallivm_w_swizzle(swizzle)];
+ return res;
+}
+
+void StorageSoa::store(Argument type, int idx, const std::vector<llvm::Value*> &val,
+ int mask)
+{
+ llvm::Value *out = 0;
+ switch(type) {
+ case Output:
+ out = m_output;
+ break;
+ case Temp:
+ out = m_temps;
+ break;
+ case Input:
+ out = m_input;
+ break;
+ case Address: {
+ llvm::Value *addr = m_addresses[idx];
+ if (!addr) {
+ addAddress(idx);
+ addr = m_addresses[idx];
+ assert(addr);
+ }
+ new StoreInst(val[0], addr, false, m_block);
+ return;
+ break;
+ }
+ default:
+ debug_printf("Can't save output of this type: %d !\n", type);
+ assert(0);
+ break;
+ }
+ llvm::Value *realIndex = constantInt(idx);
+ if ((mask & TGSI_WRITEMASK_X)) {
+ llvm::Value *xChannel = elementPointer(out, realIndex, 0);
+ new StoreInst(val[0], xChannel, false, m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Y)) {
+ llvm::Value *yChannel = elementPointer(out, realIndex, 1);
+ new StoreInst(val[1], yChannel, false, m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_Z)) {
+ llvm::Value *zChannel = elementPointer(out, realIndex, 2);
+ new StoreInst(val[2], zChannel, false, m_block);
+ }
+ if ((mask & TGSI_WRITEMASK_W)) {
+ llvm::Value *wChannel = elementPointer(out, realIndex, 3);
+ new StoreInst(val[3], wChannel, false, m_block);
+ }
+}
+
+void StorageSoa::addAddress(int idx)
+{
+ GlobalVariable *val = new GlobalVariable(
+ /*Type=*/IntegerType::get(32),
+ /*isConstant=*/false,
+ /*Linkage=*/GlobalValue::ExternalLinkage,
+ /*Initializer=*/0, // has initializer, specified below
+ /*Name=*/name("address"),
+ currentModule());
+ val->setInitializer(Constant::getNullValue(IntegerType::get(32)));
+
+ debug_printf("adding to %d\n", idx);
+ m_addresses[idx] = val;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef STORAGESOA_H
+#define STORAGESOA_H
+
+#include <vector>
+#include <list>
+#include <map>
+
+namespace llvm {
+ class BasicBlock;
+ class Constant;
+ class ConstantInt;
+ class GlobalVariable;
+ class LoadInst;
+ class Value;
+ class VectorType;
+ class Module;
+}
+
+class StorageSoa
+{
+public:
+ enum Argument {
+ Input,
+ Output,
+ Temp,
+ Const,
+ Immediate,
+ Address
+ };
+public:
+ StorageSoa(llvm::BasicBlock *block,
+ llvm::Value *input,
+ llvm::Value *output,
+ llvm::Value *consts,
+ llvm::Value *temps);
+
+
+ std::vector<llvm::Value*> load(Argument type, int idx, int swizzle,
+ llvm::Value *indIdx =0);
+ void store(Argument type, int idx, const std::vector<llvm::Value*> &val,
+ int mask);
+
+ void addImmediate(float *vec);
+ void declareImmediates();
+
+ void addAddress(int idx);
+
+ llvm::Value * addrElement(int idx) const;
+
+ llvm::ConstantInt *constantInt(int) const;
+private:
+ llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx,
+ int channel) const;
+ llvm::Value *element(llvm::Value *ptr, llvm::Value *idx,
+ int channel) const;
+ const char *name(const char *prefix) const;
+ llvm::Value *alignedArrayLoad(llvm::Value *val);
+ llvm::Module *currentModule() const;
+ llvm::Constant *createConstGlobalVector(const std::vector<float> &vec);
+
+ std::vector<llvm::Value*> inputElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> constElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> outputElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> tempElement(llvm::Value *indIdx);
+ std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx);
+private:
+ llvm::BasicBlock *m_block;
+
+ llvm::Value *m_input;
+ llvm::Value *m_output;
+ llvm::Value *m_consts;
+ llvm::Value *m_temps;
+ llvm::GlobalVariable *m_immediates;
+
+ std::map<int, llvm::Value*> m_addresses;
+
+ std::vector<std::vector<float> > m_immediatesToFlush;
+
+ mutable std::map<int, llvm::ConstantInt*> m_constInts;
+ mutable char m_name[32];
+ mutable int m_idx;
+};
+
+#endif
--- /dev/null
+#include "tgsitollvm.h"
+
+#include "gallivm.h"
+#include "gallivm_p.h"
+
+#include "storage.h"
+#include "instructions.h"
+#include "storagesoa.h"
+#include "instructionssoa.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/exec/tgsi_exec.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi/util/tgsi_build.h"
+#include "tgsi/util/tgsi_dump.h"
+
+
+#include <llvm/Module.h>
+#include <llvm/CallingConv.h>
+#include <llvm/Constants.h>
+#include <llvm/DerivedTypes.h>
+#include <llvm/Instructions.h>
+#include <llvm/ModuleProvider.h>
+#include <llvm/Pass.h>
+#include <llvm/PassManager.h>
+#include <llvm/ParameterAttributes.h>
+#include <llvm/Support/PatternMatch.h>
+#include <llvm/ExecutionEngine/JIT.h>
+#include <llvm/ExecutionEngine/Interpreter.h>
+#include <llvm/ExecutionEngine/GenericValue.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/LinkAllPasses.h>
+#include <llvm/Analysis/Verifier.h>
+#include <llvm/Analysis/LoopPass.h>
+#include <llvm/Target/TargetData.h>
+#include <llvm/Bitcode/ReaderWriter.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+
+
+#include <sstream>
+#include <fstream>
+#include <iostream>
+
+using namespace llvm;
+
+static inline FunctionType *vertexShaderFunctionType()
+{
+ //Function takes three arguments,
+ // the calling code has to make sure the types it will
+ // pass are castable to the following:
+ // [4 x <4 x float>] inputs,
+ // [4 x <4 x float>] output,
+ // [4 x [4 x float]] consts,
+ // [4 x <4 x float>] temps
+
+ std::vector<const Type*> funcArgs;
+ VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
+ ArrayType *vectorArray = ArrayType::get(vectorType, 4);
+ PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0);
+
+ ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4);
+ ArrayType *constsArray = ArrayType::get(floatArray, 4);
+ PointerType *constsArrayPtr = PointerType::get(constsArray, 0);
+
+ funcArgs.push_back(vectorArrayPtr);//inputs
+ funcArgs.push_back(vectorArrayPtr);//output
+ funcArgs.push_back(constsArrayPtr);//consts
+ funcArgs.push_back(vectorArrayPtr);//temps
+
+ FunctionType *functionType = FunctionType::get(
+ /*Result=*/Type::VoidTy,
+ /*Params=*/funcArgs,
+ /*isVarArg=*/false);
+
+ return functionType;
+}
+
+static inline void
+add_interpolator(struct gallivm_ir *ir,
+ struct gallivm_interpolate *interp)
+{
+ ir->interpolators[ir->num_interp] = *interp;
+ ++ir->num_interp;
+}
+
+static void
+translate_declaration(struct gallivm_ir *prog,
+ llvm::Module *module,
+ Storage *storage,
+ struct tgsi_full_declaration *decl,
+ struct tgsi_full_declaration *fd)
+{
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ unsigned first, last, mask;
+ uint interp_method;
+
+ assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE);
+
+ first = decl->u.DeclarationRange.First;
+ last = decl->u.DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ /* Do not touch WPOS.xy */
+ if (first == 0) {
+ mask &= ~TGSI_WRITEMASK_XY;
+ if (mask == TGSI_WRITEMASK_NONE) {
+ first++;
+ if (first > last) {
+ return;
+ }
+ }
+ }
+
+ interp_method = decl->Interpolation.Interpolate;
+
+ if (mask == TGSI_WRITEMASK_XYZW) {
+ unsigned i, j;
+
+ for (i = first; i <= last; i++) {
+ for (j = 0; j < NUM_CHANNELS; j++) {
+ //interp( mach, i, j );
+ struct gallivm_interpolate interp;
+ interp.type = interp_method;
+ interp.attrib = i;
+ interp.chan = j;
+ add_interpolator(prog, &interp);
+ }
+ }
+ } else {
+ unsigned i, j;
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ for( i = first; i <= last; i++ ) {
+ struct gallivm_interpolate interp;
+ interp.type = interp_method;
+ interp.attrib = i;
+ interp.chan = j;
+ add_interpolator(prog, &interp);
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+translate_declarationir(struct gallivm_ir *,
+ llvm::Module *,
+ StorageSoa *storage,
+ struct tgsi_full_declaration *decl,
+ struct tgsi_full_declaration *)
+{
+ if (decl->Declaration.File == TGSI_FILE_ADDRESS) {
+ int idx = decl->u.DeclarationRange.First;
+ storage->addAddress(idx);
+ }
+}
+
+static void
+translate_immediate(Storage *storage,
+ struct tgsi_full_immediate *imm)
+{
+ float vec[4];
+ int i;
+ for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+ switch (imm->Immediate.DataType) {
+ case TGSI_IMM_FLOAT32:
+ vec[i] = imm->u.ImmediateFloat32[i].Float;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ storage->addImmediate(vec);
+}
+
+
+static void
+translate_immediateir(StorageSoa *storage,
+ struct tgsi_full_immediate *imm)
+{
+ float vec[4];
+ int i;
+ for (i = 0; i < imm->Immediate.Size - 1; ++i) {
+ switch (imm->Immediate.DataType) {
+ case TGSI_IMM_FLOAT32:
+ vec[i] = imm->u.ImmediateFloat32[i].Float;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ storage->addImmediate(vec);
+}
+
+static inline int
+swizzleInt(struct tgsi_full_src_register *src)
+{
+ int swizzle = 0;
+ int start = 1000;
+
+ for (int k = 0; k < 4; ++k) {
+ swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start;
+ start /= 10;
+ }
+ return swizzle;
+}
+
+static inline llvm::Value *
+swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src,
+ Storage *storage)
+{
+ int swizzle = swizzleInt(src);
+
+ if (gallivm_is_swizzle(swizzle)) {
+ /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/
+ val = storage->shuffleVector(val, swizzle);
+ }
+ return val;
+}
+
+static void
+translate_instruction(llvm::Module *module,
+ Storage *storage,
+ Instructions *instr,
+ struct tgsi_full_instruction *inst,
+ struct tgsi_full_instruction *fi,
+ unsigned instno)
+{
+ llvm::Value *inputs[4];
+ inputs[0] = 0;
+ inputs[1] = 0;
+ inputs[2] = 0;
+ inputs[3] = 0;
+
+ for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ llvm::Value *val = 0;
+ llvm::Value *indIdx = 0;
+
+ if (src->SrcRegister.Indirect) {
+ indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+ indIdx = storage->extractIndex(indIdx);
+ }
+ if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
+ val = storage->constElement(src->SrcRegister.Index, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+ val = storage->inputElement(src->SrcRegister.Index, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ val = storage->tempElement(src->SrcRegister.Index);
+ } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
+ val = storage->outputElement(src->SrcRegister.Index, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ val = storage->immediateElement(src->SrcRegister.Index);
+ } else {
+ fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
+ return;
+ }
+
+ inputs[i] = swizzleVector(val, src, storage);
+ }
+
+ /*if (inputs[0])
+ instr->printVector(inputs[0]);
+ if (inputs[1])
+ instr->printVector(inputs[1]);*/
+ llvm::Value *out = 0;
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL: {
+ out = instr->arl(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_MOV: {
+ out = inputs[0];
+ }
+ break;
+ case TGSI_OPCODE_LIT: {
+ out = instr->lit(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RCP: {
+ out = instr->rcp(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RSQ: {
+ out = instr->rsq(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_EXP:
+ break;
+ case TGSI_OPCODE_LOG:
+ break;
+ case TGSI_OPCODE_MUL: {
+ out = instr->mul(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_ADD: {
+ out = instr->add(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP3: {
+ out = instr->dp3(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP4: {
+ out = instr->dp4(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DST: {
+ out = instr->dst(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MIN: {
+ out = instr->min(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MAX: {
+ out = instr->max(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SLT: {
+ out = instr->slt(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SGE: {
+ out = instr->sge(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MAD: {
+ out = instr->madd(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_SUB: {
+ out = instr->sub(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_LERP: {
+ out = instr->lerp(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_CND:
+ break;
+ case TGSI_OPCODE_CND0:
+ break;
+ case TGSI_OPCODE_DOT2ADD:
+ break;
+ case TGSI_OPCODE_INDEX:
+ break;
+ case TGSI_OPCODE_NEGATE:
+ break;
+ case TGSI_OPCODE_FRAC: {
+ out = instr->frc(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_CLAMP:
+ break;
+ case TGSI_OPCODE_FLOOR: {
+ out = instr->floor(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_ROUND:
+ break;
+ case TGSI_OPCODE_EXPBASE2: {
+ out = instr->ex2(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_LOGBASE2: {
+ out = instr->lg2(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_POWER: {
+ out = instr->pow(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_CROSSPRODUCT: {
+ out = instr->cross(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ break;
+ case TGSI_OPCODE_ABS: {
+ out = instr->abs(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_RCC:
+ break;
+ case TGSI_OPCODE_DPH: {
+ out = instr->dph(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_COS: {
+ out = instr->cos(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_DDX:
+ break;
+ case TGSI_OPCODE_DDY:
+ break;
+ case TGSI_OPCODE_KILP: {
+ out = instr->kilp(inputs[0]);
+ storage->setKilElement(out);
+ return;
+ }
+ break;
+ case TGSI_OPCODE_PK2H:
+ break;
+ case TGSI_OPCODE_PK2US:
+ break;
+ case TGSI_OPCODE_PK4B:
+ break;
+ case TGSI_OPCODE_PK4UB:
+ break;
+ case TGSI_OPCODE_RFL:
+ break;
+ case TGSI_OPCODE_SEQ:
+ break;
+ case TGSI_OPCODE_SFL:
+ break;
+ case TGSI_OPCODE_SGT: {
+ out = instr->sgt(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_SIN: {
+ out = instr->sin(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_SLE:
+ break;
+ case TGSI_OPCODE_SNE:
+ break;
+ case TGSI_OPCODE_STR:
+ break;
+ case TGSI_OPCODE_TEX:
+ break;
+ case TGSI_OPCODE_TXD:
+ break;
+ case TGSI_OPCODE_UP2H:
+ break;
+ case TGSI_OPCODE_UP2US:
+ break;
+ case TGSI_OPCODE_UP4B:
+ break;
+ case TGSI_OPCODE_UP4UB:
+ break;
+ case TGSI_OPCODE_X2D:
+ break;
+ case TGSI_OPCODE_ARA:
+ break;
+ case TGSI_OPCODE_ARR:
+ break;
+ case TGSI_OPCODE_BRA:
+ break;
+ case TGSI_OPCODE_CAL: {
+ instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr());
+ return;
+ }
+ break;
+ case TGSI_OPCODE_RET: {
+ instr->end();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_SSG:
+ break;
+ case TGSI_OPCODE_CMP: {
+ out = instr->cmp(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_SCS: {
+ out = instr->scs(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_TXB:
+ break;
+ case TGSI_OPCODE_NRM:
+ break;
+ case TGSI_OPCODE_DIV:
+ break;
+ case TGSI_OPCODE_DP2:
+ break;
+ case TGSI_OPCODE_TXL:
+ break;
+ case TGSI_OPCODE_BRK: {
+ instr->brk();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_IF: {
+ instr->ifop(inputs[0]);
+ storage->setCurrentBlock(instr->currentBlock());
+ return; //just update the state
+ }
+ break;
+ case TGSI_OPCODE_LOOP:
+ break;
+ case TGSI_OPCODE_REP:
+ break;
+ case TGSI_OPCODE_ELSE: {
+ instr->elseop();
+ storage->setCurrentBlock(instr->currentBlock());
+ return; //only state update
+ }
+ break;
+ case TGSI_OPCODE_ENDIF: {
+ instr->endif();
+ storage->setCurrentBlock(instr->currentBlock());
+ return; //just update the state
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ break;
+ case TGSI_OPCODE_ENDREP:
+ break;
+ case TGSI_OPCODE_PUSHA:
+ break;
+ case TGSI_OPCODE_POPA:
+ break;
+ case TGSI_OPCODE_CEIL:
+ break;
+ case TGSI_OPCODE_I2F:
+ break;
+ case TGSI_OPCODE_NOT:
+ break;
+ case TGSI_OPCODE_TRUNC: {
+ out = instr->trunc(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_SHL:
+ break;
+ case TGSI_OPCODE_SHR:
+ break;
+ case TGSI_OPCODE_AND:
+ break;
+ case TGSI_OPCODE_OR:
+ break;
+ case TGSI_OPCODE_MOD:
+ break;
+ case TGSI_OPCODE_XOR:
+ break;
+ case TGSI_OPCODE_SAD:
+ break;
+ case TGSI_OPCODE_TXF:
+ break;
+ case TGSI_OPCODE_TXQ:
+ break;
+ case TGSI_OPCODE_CONT:
+ break;
+ case TGSI_OPCODE_EMIT:
+ break;
+ case TGSI_OPCODE_ENDPRIM:
+ break;
+ case TGSI_OPCODE_BGNLOOP2: {
+ instr->beginLoop();
+ storage->setCurrentBlock(instr->currentBlock());
+ return;
+ }
+ break;
+ case TGSI_OPCODE_BGNSUB: {
+ instr->bgnSub(instno);
+ storage->setCurrentBlock(instr->currentBlock());
+ storage->pushTemps();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP2: {
+ instr->endLoop();
+ storage->setCurrentBlock(instr->currentBlock());
+ return;
+ }
+ break;
+ case TGSI_OPCODE_ENDSUB: {
+ instr->endSub();
+ storage->setCurrentBlock(instr->currentBlock());
+ storage->popArguments();
+ storage->popTemps();
+ return;
+ }
+ break;
+ case TGSI_OPCODE_NOISE1:
+ break;
+ case TGSI_OPCODE_NOISE2:
+ break;
+ case TGSI_OPCODE_NOISE3:
+ break;
+ case TGSI_OPCODE_NOISE4:
+ break;
+ case TGSI_OPCODE_NOP:
+ break;
+ case TGSI_OPCODE_TEXBEM:
+ break;
+ case TGSI_OPCODE_TEXBEML:
+ break;
+ case TGSI_OPCODE_TEXREG2AR:
+ break;
+ case TGSI_OPCODE_TEXM3X2PAD:
+ break;
+ case TGSI_OPCODE_TEXM3X2TEX:
+ break;
+ case TGSI_OPCODE_TEXM3X3PAD:
+ break;
+ case TGSI_OPCODE_TEXM3X3TEX:
+ break;
+ case TGSI_OPCODE_TEXM3X3SPEC:
+ break;
+ case TGSI_OPCODE_TEXM3X3VSPEC:
+ break;
+ case TGSI_OPCODE_TEXREG2GB:
+ break;
+ case TGSI_OPCODE_TEXREG2RGB:
+ break;
+ case TGSI_OPCODE_TEXDP3TEX:
+ break;
+ case TGSI_OPCODE_TEXDP3:
+ break;
+ case TGSI_OPCODE_TEXM3X3:
+ break;
+ case TGSI_OPCODE_TEXM3X2DEPTH:
+ break;
+ case TGSI_OPCODE_TEXDEPTH:
+ break;
+ case TGSI_OPCODE_BEM:
+ break;
+ case TGSI_OPCODE_M4X3:
+ break;
+ case TGSI_OPCODE_M3X4:
+ break;
+ case TGSI_OPCODE_M3X3:
+ break;
+ case TGSI_OPCODE_M3X2:
+ break;
+ case TGSI_OPCODE_NRM4:
+ break;
+ case TGSI_OPCODE_CALLNZ:
+ break;
+ case TGSI_OPCODE_IFC:
+ break;
+ case TGSI_OPCODE_BREAKC:
+ break;
+ case TGSI_OPCODE_KIL:
+ break;
+ case TGSI_OPCODE_END:
+ instr->end();
+ return;
+ break;
+ default:
+ fprintf(stderr, "ERROR: Unknown opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(0);
+ break;
+ }
+
+ if (!out) {
+ fprintf(stderr, "ERROR: unsupported opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(!"Unsupported opcode");
+ }
+
+ /* # not sure if we need this */
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ /*TXT( "_SAT" );*/
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ /*TXT( "_SAT[-1,1]" );*/
+ break;
+ default:
+ assert( 0 );
+ }
+
+ /* store results */
+ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
+ storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else {
+ fprintf(stderr, "ERROR: unsupported LLVM destination!");
+ assert(!"wrong destination");
+ }
+ }
+}
+
+
+static void
+translate_instructionir(llvm::Module *module,
+ StorageSoa *storage,
+ InstructionsSoa *instr,
+ struct tgsi_full_instruction *inst,
+ struct tgsi_full_instruction *fi,
+ unsigned instno)
+{
+ std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs);
+
+ for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ std::vector<llvm::Value*> val;
+ llvm::Value *indIdx = 0;
+ int swizzle = swizzleInt(src);
+
+ if (src->SrcRegister.Indirect) {
+ indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+ }
+ if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
+ val = storage->load(StorageSoa::Const,
+ src->SrcRegister.Index, swizzle, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
+ val = storage->load(StorageSoa::Input,
+ src->SrcRegister.Index, swizzle, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+ val = storage->load(StorageSoa::Temp,
+ src->SrcRegister.Index, swizzle, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
+ val = storage->load(StorageSoa::Output,
+ src->SrcRegister.Index, swizzle, indIdx);
+ } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+ val = storage->load(StorageSoa::Immediate,
+ src->SrcRegister.Index, swizzle, indIdx);
+ } else {
+ fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
+ return;
+ }
+
+ inputs[i] = val;
+ }
+
+ std::vector<llvm::Value*> out(4);
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL: {
+ out = instr->arl(inputs[0]);
+ }
+ break;
+ case TGSI_OPCODE_MOV: {
+ out = inputs[0];
+ }
+ break;
+ case TGSI_OPCODE_LIT: {
+ }
+ break;
+ case TGSI_OPCODE_RCP: {
+ }
+ break;
+ case TGSI_OPCODE_RSQ: {
+ }
+ break;
+ case TGSI_OPCODE_EXP:
+ break;
+ case TGSI_OPCODE_LOG:
+ break;
+ case TGSI_OPCODE_MUL: {
+ out = instr->mul(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_ADD: {
+ out = instr->add(inputs[0], inputs[1]);
+ }
+ break;
+ case TGSI_OPCODE_DP3: {
+ }
+ break;
+ case TGSI_OPCODE_DP4: {
+ }
+ break;
+ case TGSI_OPCODE_DST: {
+ }
+ break;
+ case TGSI_OPCODE_MIN: {
+ }
+ break;
+ case TGSI_OPCODE_MAX: {
+ }
+ break;
+ case TGSI_OPCODE_SLT: {
+ }
+ break;
+ case TGSI_OPCODE_SGE: {
+ }
+ break;
+ case TGSI_OPCODE_MAD: {
+ out = instr->madd(inputs[0], inputs[1], inputs[2]);
+ }
+ break;
+ case TGSI_OPCODE_SUB: {
+ }
+ break;
+ case TGSI_OPCODE_LERP: {
+ }
+ break;
+ case TGSI_OPCODE_CND:
+ break;
+ case TGSI_OPCODE_CND0:
+ break;
+ case TGSI_OPCODE_DOT2ADD:
+ break;
+ case TGSI_OPCODE_INDEX:
+ break;
+ case TGSI_OPCODE_NEGATE:
+ break;
+ case TGSI_OPCODE_FRAC: {
+ }
+ break;
+ case TGSI_OPCODE_CLAMP:
+ break;
+ case TGSI_OPCODE_FLOOR: {
+ }
+ break;
+ case TGSI_OPCODE_ROUND:
+ break;
+ case TGSI_OPCODE_EXPBASE2: {
+ }
+ break;
+ case TGSI_OPCODE_LOGBASE2: {
+ }
+ break;
+ case TGSI_OPCODE_POWER: {
+ }
+ break;
+ case TGSI_OPCODE_CROSSPRODUCT: {
+ }
+ break;
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ break;
+ case TGSI_OPCODE_ABS: {
+ }
+ break;
+ case TGSI_OPCODE_RCC:
+ break;
+ case TGSI_OPCODE_DPH: {
+ }
+ break;
+ case TGSI_OPCODE_COS: {
+ }
+ break;
+ case TGSI_OPCODE_DDX:
+ break;
+ case TGSI_OPCODE_DDY:
+ break;
+ case TGSI_OPCODE_KILP: {
+ }
+ break;
+ case TGSI_OPCODE_PK2H:
+ break;
+ case TGSI_OPCODE_PK2US:
+ break;
+ case TGSI_OPCODE_PK4B:
+ break;
+ case TGSI_OPCODE_PK4UB:
+ break;
+ case TGSI_OPCODE_RFL:
+ break;
+ case TGSI_OPCODE_SEQ:
+ break;
+ case TGSI_OPCODE_SFL:
+ break;
+ case TGSI_OPCODE_SGT: {
+ }
+ break;
+ case TGSI_OPCODE_SIN: {
+ }
+ break;
+ case TGSI_OPCODE_SLE:
+ break;
+ case TGSI_OPCODE_SNE:
+ break;
+ case TGSI_OPCODE_STR:
+ break;
+ case TGSI_OPCODE_TEX:
+ break;
+ case TGSI_OPCODE_TXD:
+ break;
+ case TGSI_OPCODE_UP2H:
+ break;
+ case TGSI_OPCODE_UP2US:
+ break;
+ case TGSI_OPCODE_UP4B:
+ break;
+ case TGSI_OPCODE_UP4UB:
+ break;
+ case TGSI_OPCODE_X2D:
+ break;
+ case TGSI_OPCODE_ARA:
+ break;
+ case TGSI_OPCODE_ARR:
+ break;
+ case TGSI_OPCODE_BRA:
+ break;
+ case TGSI_OPCODE_CAL: {
+ }
+ break;
+ case TGSI_OPCODE_RET: {
+ }
+ break;
+ case TGSI_OPCODE_SSG:
+ break;
+ case TGSI_OPCODE_CMP: {
+ }
+ break;
+ case TGSI_OPCODE_SCS: {
+ }
+ break;
+ case TGSI_OPCODE_TXB:
+ break;
+ case TGSI_OPCODE_NRM:
+ break;
+ case TGSI_OPCODE_DIV:
+ break;
+ case TGSI_OPCODE_DP2:
+ break;
+ case TGSI_OPCODE_TXL:
+ break;
+ case TGSI_OPCODE_BRK: {
+ }
+ break;
+ case TGSI_OPCODE_IF: {
+ }
+ break;
+ case TGSI_OPCODE_LOOP:
+ break;
+ case TGSI_OPCODE_REP:
+ break;
+ case TGSI_OPCODE_ELSE: {
+ }
+ break;
+ case TGSI_OPCODE_ENDIF: {
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP:
+ break;
+ case TGSI_OPCODE_ENDREP:
+ break;
+ case TGSI_OPCODE_PUSHA:
+ break;
+ case TGSI_OPCODE_POPA:
+ break;
+ case TGSI_OPCODE_CEIL:
+ break;
+ case TGSI_OPCODE_I2F:
+ break;
+ case TGSI_OPCODE_NOT:
+ break;
+ case TGSI_OPCODE_TRUNC: {
+ }
+ break;
+ case TGSI_OPCODE_SHL:
+ break;
+ case TGSI_OPCODE_SHR:
+ break;
+ case TGSI_OPCODE_AND:
+ break;
+ case TGSI_OPCODE_OR:
+ break;
+ case TGSI_OPCODE_MOD:
+ break;
+ case TGSI_OPCODE_XOR:
+ break;
+ case TGSI_OPCODE_SAD:
+ break;
+ case TGSI_OPCODE_TXF:
+ break;
+ case TGSI_OPCODE_TXQ:
+ break;
+ case TGSI_OPCODE_CONT:
+ break;
+ case TGSI_OPCODE_EMIT:
+ break;
+ case TGSI_OPCODE_ENDPRIM:
+ break;
+ case TGSI_OPCODE_BGNLOOP2: {
+ }
+ break;
+ case TGSI_OPCODE_BGNSUB: {
+ }
+ break;
+ case TGSI_OPCODE_ENDLOOP2: {
+ }
+ break;
+ case TGSI_OPCODE_ENDSUB: {
+ }
+ break;
+ case TGSI_OPCODE_NOISE1:
+ break;
+ case TGSI_OPCODE_NOISE2:
+ break;
+ case TGSI_OPCODE_NOISE3:
+ break;
+ case TGSI_OPCODE_NOISE4:
+ break;
+ case TGSI_OPCODE_NOP:
+ break;
+ case TGSI_OPCODE_TEXBEM:
+ break;
+ case TGSI_OPCODE_TEXBEML:
+ break;
+ case TGSI_OPCODE_TEXREG2AR:
+ break;
+ case TGSI_OPCODE_TEXM3X2PAD:
+ break;
+ case TGSI_OPCODE_TEXM3X2TEX:
+ break;
+ case TGSI_OPCODE_TEXM3X3PAD:
+ break;
+ case TGSI_OPCODE_TEXM3X3TEX:
+ break;
+ case TGSI_OPCODE_TEXM3X3SPEC:
+ break;
+ case TGSI_OPCODE_TEXM3X3VSPEC:
+ break;
+ case TGSI_OPCODE_TEXREG2GB:
+ break;
+ case TGSI_OPCODE_TEXREG2RGB:
+ break;
+ case TGSI_OPCODE_TEXDP3TEX:
+ break;
+ case TGSI_OPCODE_TEXDP3:
+ break;
+ case TGSI_OPCODE_TEXM3X3:
+ break;
+ case TGSI_OPCODE_TEXM3X2DEPTH:
+ break;
+ case TGSI_OPCODE_TEXDEPTH:
+ break;
+ case TGSI_OPCODE_BEM:
+ break;
+ case TGSI_OPCODE_M4X3:
+ break;
+ case TGSI_OPCODE_M3X4:
+ break;
+ case TGSI_OPCODE_M3X3:
+ break;
+ case TGSI_OPCODE_M3X2:
+ break;
+ case TGSI_OPCODE_NRM4:
+ break;
+ case TGSI_OPCODE_CALLNZ:
+ break;
+ case TGSI_OPCODE_IFC:
+ break;
+ case TGSI_OPCODE_BREAKC:
+ break;
+ case TGSI_OPCODE_KIL:
+ break;
+ case TGSI_OPCODE_END:
+ instr->end();
+ return;
+ break;
+ default:
+ fprintf(stderr, "ERROR: Unknown opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(0);
+ break;
+ }
+
+ if (!out[0]) {
+ fprintf(stderr, "ERROR: unsupported opcode %d\n",
+ inst->Instruction.Opcode);
+ assert(!"Unsupported opcode");
+ }
+
+ /* store results */
+ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+ if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+ storage->store(StorageSoa::Output,
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
+ storage->store(StorageSoa::Temp,
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
+ storage->store(StorageSoa::Address,
+ dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+ } else {
+ fprintf(stderr, "ERROR: unsupported LLVM destination!");
+ assert(!"wrong destination");
+ }
+ }
+}
+
+llvm::Module *
+tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens)
+{
+ llvm::Module *mod = new Module("shader");
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction fi;
+ struct tgsi_full_declaration fd;
+ unsigned instno = 0;
+ Function* shader = mod->getFunction("execute_shader");
+ std::ostringstream stream;
+ if (ir->type == GALLIVM_VS) {
+ stream << "vs_shader";
+ } else {
+ stream << "fs_shader";
+ }
+ stream << ir->id;
+ std::string func_name = stream.str();
+ shader->setName(func_name.c_str());
+
+ Function::arg_iterator args = shader->arg_begin();
+ Value *ptr_INPUT = args++;
+ ptr_INPUT->setName("input");
+
+ BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+
+ tgsi_parse_init(&parse, tokens);
+
+ fi = tgsi_default_full_instruction();
+ fd = tgsi_default_full_declaration();
+ Storage storage(label_entry, ptr_INPUT);
+ Instructions instr(mod, shader, label_entry, &storage);
+ while(!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ translate_declaration(ir, mod, &storage,
+ &parse.FullToken.FullDeclaration,
+ &fd);
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ translate_immediate(&storage,
+ &parse.FullToken.FullImmediate);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ translate_instruction(mod, &storage, &instr,
+ &parse.FullToken.FullInstruction,
+ &fi, instno);
+ ++instno;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ ir->num_consts = storage.numConsts();
+ return mod;
+}
+
+llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens)
+{
+ llvm::Module *mod = new Module("shader");
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction fi;
+ struct tgsi_full_declaration fd;
+ unsigned instno = 0;
+ std::ostringstream stream;
+ if (ir->type == GALLIVM_VS) {
+ stream << "vs_shader";
+ } else {
+ stream << "fs_shader";
+ }
+ //stream << ir->id;
+ std::string func_name = stream.str();
+ Function *shader = llvm::cast<Function>(mod->getOrInsertFunction(
+ func_name.c_str(),
+ vertexShaderFunctionType()));
+
+ Function::arg_iterator args = shader->arg_begin();
+ Value *input = args++;
+ input->setName("inputs");
+ Value *output = args++;
+ output->setName("outputs");
+ Value *consts = args++;
+ consts->setName("consts");
+ Value *temps = args++;
+ temps->setName("temps");
+
+ BasicBlock *label_entry = new BasicBlock("entry", shader, 0);
+
+ tgsi_parse_init(&parse, tokens);
+
+ fi = tgsi_default_full_instruction();
+ fd = tgsi_default_full_declaration();
+
+ StorageSoa storage(label_entry, input, output, consts, temps);
+ InstructionsSoa instr(mod, shader, label_entry, &storage);
+
+ while(!tgsi_parse_end_of_tokens(&parse)) {
+ tgsi_parse_token(&parse);
+
+ switch (parse.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ translate_declarationir(ir, mod, &storage,
+ &parse.FullToken.FullDeclaration,
+ &fd);
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ translate_immediateir(&storage,
+ &parse.FullToken.FullImmediate);
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ storage.declareImmediates();
+ translate_instructionir(mod, &storage, &instr,
+ &parse.FullToken.FullInstruction,
+ &fi, instno);
+ ++instno;
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+
+ tgsi_parse_free(&parse);
+
+ return mod;
+}
--- /dev/null
+#ifndef TGSITOLLVM_H
+#define TGSITOLLVM_H
+
+
+namespace llvm {
+ class Module;
+}
+
+struct gallivm_ir;
+struct tgsi_token;
+
+
+llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens);
+
+
+llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir,
+ const struct tgsi_token *tokens);
+
+#endif
--- /dev/null
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = pipebuffer
+
+DRIVER_SOURCES = \
+ pb_buffer_fenced.c \
+ pb_buffer_malloc.c \
+ pb_bufmgr_fenced.c \
+ pb_bufmgr_mm.c \
+ pb_bufmgr_pool.c \
+ pb_winsys.c
+
+C_SOURCES = \
+ $(DRIVER_SOURCES)
+
+ASM_SOURCES =
+
+include ../../Makefile.template
+
+symlinks:
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * List macros heavily inspired by the Linux kernel
+ * list handling. No list looping yet.
+ *
+ * Is not threadsafe, so common operations need to
+ * be protected using an external mutex.
+ */
+
+#ifndef LINKED_LIST_H_
+#define LINKED_LIST_H_
+
+
+#include <stddef.h>
+
+
+struct list_head
+{
+ struct list_head *prev;
+ struct list_head *next;
+};
+
+
+#define LIST_INITHEAD(__item) \
+ do { \
+ (__item)->prev = (__item); \
+ (__item)->next = (__item); \
+ } while (0)
+
+#define LIST_ADD(__item, __list) \
+ do { \
+ (__item)->prev = (__list); \
+ (__item)->next = (__list)->next; \
+ (__list)->next->prev = (__item); \
+ (__list)->next = (__item); \
+ } while (0)
+
+#define LIST_ADDTAIL(__item, __list) \
+ do { \
+ (__item)->next = (__list); \
+ (__item)->prev = (__list)->prev; \
+ (__list)->prev->next = (__item); \
+ (__list)->prev = (__item); \
+ } while(0)
+
+#define LIST_DEL(__item) \
+ do { \
+ (__item)->prev->next = (__item)->next; \
+ (__item)->next->prev = (__item)->prev; \
+ } while(0)
+
+#define LIST_DELINIT(__item) \
+ do { \
+ (__item)->prev->next = (__item)->next; \
+ (__item)->next->prev = (__item)->prev; \
+ (__item)->next = (__item); \
+ (__item)->prev = (__item); \
+ } while(0)
+
+#define LIST_ENTRY(__type, __item, __field) \
+ ((__type *)(((char *)(__item)) - offsetof(__type, __field)))
+
+
+#endif /*LINKED_LIST_H_*/
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Generic code for buffers.
+ *
+ * Behind a pipe buffle handle there can be DMA buffers, client (or user)
+ * buffers, regular malloced buffers, etc. This file provides an abstract base
+ * buffer handle that allows the driver to cope with all those kinds of buffers
+ * in a more flexible way.
+ *
+ * There is no obligation of a winsys driver to use this library. And a pipe
+ * driver should be completly agnostic about it.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef PB_BUFFER_H_
+#define PB_BUFFER_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_state.h"
+#include "pipe/p_inlines.h"
+
+
+struct pb_vtbl;
+
+/**
+ * Buffer description.
+ *
+ * Used when allocating the buffer.
+ */
+struct pb_desc
+{
+ unsigned alignment;
+ unsigned usage;
+};
+
+
+/**
+ * Base class for all pb_* buffers.
+ */
+struct pb_buffer
+{
+ struct pipe_buffer base;
+
+ /**
+ * Pointer to the virtual function table.
+ *
+ * Avoid accessing this table directly. Use the inline functions below
+ * instead to avoid mistakes.
+ */
+ const struct pb_vtbl *vtbl;
+};
+
+
+/**
+ * Virtual function table for the buffer storage operations.
+ *
+ * Note that creation is not done through this table.
+ */
+struct pb_vtbl
+{
+ void (*destroy)( struct pb_buffer *buf );
+
+ /**
+ * Map the entire data store of a buffer object into the client's address.
+ * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE.
+ */
+ void *(*map)( struct pb_buffer *buf,
+ unsigned flags );
+
+ void (*unmap)( struct pb_buffer *buf );
+
+ /**
+ * Get the base buffer and the offset.
+ *
+ * A buffer can be subdivided in smaller buffers. This method should return
+ * the underlaying buffer, and the relative offset.
+ *
+ * Buffers without an underlaying base buffer should return themselves, with
+ * a zero offset.
+ *
+ * Note that this will increase the reference count of the base buffer.
+ */
+ void (*get_base_buffer)( struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset );
+};
+
+
+static INLINE struct pipe_buffer *
+pb_pipe_buffer( struct pb_buffer *pbuf )
+{
+ assert(pbuf);
+ return &pbuf->base;
+}
+
+
+static INLINE struct pb_buffer *
+pb_buffer( struct pipe_buffer *buf )
+{
+ assert(buf);
+ /* Could add a magic cookie check on debug builds.
+ */
+ return (struct pb_buffer *)buf;
+}
+
+
+/* Accessor functions for pb->vtbl:
+ */
+static INLINE void *
+pb_map(struct pb_buffer *buf,
+ unsigned flags)
+{
+ assert(buf);
+ return buf->vtbl->map(buf, flags);
+}
+
+
+static INLINE void
+pb_unmap(struct pb_buffer *buf)
+{
+ assert(buf);
+ buf->vtbl->unmap(buf);
+}
+
+
+static INLINE void
+pb_get_base_buffer( struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset )
+{
+ buf->vtbl->get_base_buffer(buf, base_buf, offset);
+}
+
+
+static INLINE void
+pb_destroy(struct pb_buffer *buf)
+{
+ assert(buf);
+ buf->vtbl->destroy(buf);
+}
+
+
+/* XXX: thread safety issues!
+ */
+static INLINE void
+pb_reference(struct pb_buffer **dst,
+ struct pb_buffer *src)
+{
+ if (src)
+ src->base.refcount++;
+
+ if (*dst && --(*dst)->base.refcount == 0)
+ pb_destroy( *dst );
+
+ *dst = src;
+}
+
+
+/**
+ * Malloc-based buffer to store data that can't be used by the graphics
+ * hardware.
+ */
+struct pb_buffer *
+pb_malloc_buffer_create(size_t size,
+ const struct pb_desc *desc);
+
+
+void
+pb_init_winsys(struct pipe_winsys *winsys);
+
+
+#endif /*PB_BUFFER_H_*/
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Implementation of fenced buffers.
+ *
+ * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
+ */
+
+
+#include "linked_list.h"
+
+#include "p_compiler.h"
+#include "p_debug.h"
+#include "p_winsys.h"
+#include "p_thread.h"
+#include "p_util.h"
+
+#include "pb_buffer.h"
+#include "pb_buffer_fenced.h"
+
+#ifndef __MSC__
+#include <unistd.h>
+#endif
+
+
+/**
+ * Convenience macro (type safe).
+ */
+#define SUPER(__derived) (&(__derived)->base)
+
+
+struct fenced_buffer_list
+{
+ _glthread_Mutex mutex;
+
+ struct pipe_winsys *winsys;
+
+ size_t numDelayed;
+ size_t checkDelayed;
+
+ struct list_head delayed;
+};
+
+
+/**
+ * Wrapper around a pipe buffer which adds fencing and reference counting.
+ */
+struct fenced_buffer
+{
+ struct pb_buffer base;
+
+ struct pb_buffer *buffer;
+
+ struct pipe_fence_handle *fence;
+
+ struct list_head head;
+ struct fenced_buffer_list *list;
+};
+
+
+static INLINE struct fenced_buffer *
+fenced_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ assert(buf->vtbl == &fenced_buffer_vtbl);
+ return (struct fenced_buffer *)buf;
+}
+
+
+
+
+static void
+_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
+ int wait)
+{
+ struct pipe_winsys *winsys = fenced_list->winsys;
+ struct fenced_buffer *fenced_buf;
+ struct list_head *list, *prev;
+ int signaled = -1;
+
+ list = fenced_list->delayed.next;
+
+ if (fenced_list->numDelayed > 3) {
+ unsigned i;
+
+ for (i = 0; i < fenced_list->numDelayed; i += 3) {
+ list = list->next;
+ }
+ }
+
+ prev = list->prev;
+ for (; list != &fenced_list->delayed; list = prev, prev = list->prev) {
+
+ fenced_buf = LIST_ENTRY(struct fenced_buffer, list, head);
+
+ if (signaled != 0) {
+ if (wait) {
+ signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0);
+ }
+ else {
+ signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0);
+ }
+ }
+
+ if (signaled != 0)
+ /* XXX: we are assuming that buffers are freed in the same order they
+ * are fenced which may not always be true...
+ */
+ break;
+
+ winsys->fence_reference(winsys, &fenced_buf->fence, NULL);
+
+ LIST_DEL(list);
+ fenced_list->numDelayed--;
+
+ /* Do the delayed destroy:
+ */
+ pb_reference(&fenced_buf->buffer, NULL);
+ FREE(fenced_buf);
+ }
+}
+
+
+static void
+fenced_buffer_destroy(struct pb_buffer *buf)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ struct fenced_buffer_list *fenced_list = fenced_buf->list;
+
+ if (fenced_buf->fence) {
+ LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed);
+ fenced_list->numDelayed++;
+ }
+ else {
+ pb_reference(&fenced_buf->buffer, NULL);
+ FREE(fenced_buf);
+ }
+
+ if ((fenced_list->numDelayed % fenced_list->checkDelayed) == 0)
+ _fenced_buffer_list_check_free(fenced_list, 0);
+}
+
+
+static void *
+fenced_buffer_map(struct pb_buffer *buf,
+ unsigned flags)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ return pb_map(fenced_buf->buffer, flags);
+}
+
+
+static void
+fenced_buffer_unmap(struct pb_buffer *buf)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ pb_unmap(fenced_buf->buffer);
+}
+
+
+static void
+fenced_buffer_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
+}
+
+
+const struct pb_vtbl
+fenced_buffer_vtbl = {
+ fenced_buffer_destroy,
+ fenced_buffer_map,
+ fenced_buffer_unmap,
+ fenced_buffer_get_base_buffer
+};
+
+
+struct pb_buffer *
+fenced_buffer_create(struct fenced_buffer_list *fenced_list,
+ struct pb_buffer *buffer)
+{
+ struct fenced_buffer *buf;
+
+ if(!buffer)
+ return NULL;
+
+ buf = CALLOC_STRUCT(fenced_buffer);
+ if(!buf)
+ return NULL;
+
+ buf->base.base.refcount = 1;
+ buf->base.base.alignment = buffer->base.alignment;
+ buf->base.base.usage = buffer->base.usage;
+ buf->base.base.size = buffer->base.size;
+
+ buf->base.vtbl = &fenced_buffer_vtbl;
+ buf->buffer = buffer;
+ buf->list = fenced_list;
+
+ return &buf->base;
+}
+
+
+void
+buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence)
+{
+ struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+ struct fenced_buffer_list *fenced_list = fenced_buf->list;
+ struct pipe_winsys *winsys = fenced_list->winsys;
+
+ _glthread_LOCK_MUTEX(fenced_list->mutex);
+ winsys->fence_reference(winsys, &fenced_buf->fence, fence);
+ _glthread_UNLOCK_MUTEX(fenced_list->mutex);
+}
+
+
+struct fenced_buffer_list *
+fenced_buffer_list_create(struct pipe_winsys *winsys)
+{
+ struct fenced_buffer_list *fenced_list;
+
+ fenced_list = (struct fenced_buffer_list *)CALLOC(1, sizeof(*fenced_list));
+ if (!fenced_list)
+ return NULL;
+
+ fenced_list->winsys = winsys;
+
+ LIST_INITHEAD(&fenced_list->delayed);
+
+ fenced_list->numDelayed = 0;
+
+ /* TODO: don't hard code this */
+ fenced_list->checkDelayed = 5;
+
+ _glthread_INIT_MUTEX(fenced_list->mutex);
+
+ return fenced_list;
+}
+
+
+void
+fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
+ int wait)
+{
+ _glthread_LOCK_MUTEX(fenced_list->mutex);
+ _fenced_buffer_list_check_free(fenced_list, wait);
+ _glthread_UNLOCK_MUTEX(fenced_list->mutex);
+}
+
+
+void
+fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
+{
+ _glthread_LOCK_MUTEX(fenced_list->mutex);
+
+ /* Wait on outstanding fences */
+ while (fenced_list->numDelayed) {
+ _glthread_UNLOCK_MUTEX(fenced_list->mutex);
+ sched_yield();
+ _fenced_buffer_list_check_free(fenced_list, 1);
+ _glthread_LOCK_MUTEX(fenced_list->mutex);
+ }
+
+ _glthread_UNLOCK_MUTEX(fenced_list->mutex);
+
+ FREE(fenced_list);
+}
+
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Buffer fencing.
+ *
+ * "Fenced buffers" is actually a misnomer. They should be referred as
+ * "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find
+ * the word "fenceable" in the dictionary.
+ *
+ * A "fenced buffer" is a decorator around a normal buffer, which adds two
+ * special properties:
+ * - the ability for the destruction to be delayed by a fence;
+ * - reference counting.
+ *
+ * Usually DMA buffers have a life-time that will extend the life-time of its
+ * handle. The end-of-life is dictated by the fence signalling.
+ *
+ * Between the handle's destruction, and the fence signalling, the buffer is
+ * stored in a fenced buffer list.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef PB_BUFFER_FENCED_H_
+#define PB_BUFFER_FENCED_H_
+
+
+#include "pipe/p_debug.h"
+
+
+struct pipe_winsys;
+struct pipe_buffer;
+struct pipe_fence_handle;
+
+
+/**
+ * List of buffers which are awaiting fence signalling.
+ */
+struct fenced_buffer_list;
+
+
+/**
+ * The fenced buffer's virtual function table.
+ *
+ * NOTE: Made public for debugging purposes.
+ */
+extern const struct pb_vtbl fenced_buffer_vtbl;
+
+
+/**
+ * Create a fenced buffer list.
+ *
+ * See also fenced_bufmgr_create for a more convenient way to use this.
+ */
+struct fenced_buffer_list *
+fenced_buffer_list_create(struct pipe_winsys *winsys);
+
+
+/**
+ * Walk the fenced buffer list to check and free signalled buffers.
+ */
+void
+fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
+ int wait);
+
+void
+fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list);
+
+
+/**
+ * Wrap a buffer in a fenced buffer.
+ *
+ * NOTE: this will not increase the buffer reference count.
+ */
+struct pb_buffer *
+fenced_buffer_create(struct fenced_buffer_list *fenced,
+ struct pb_buffer *buffer);
+
+
+/**
+ * Set a buffer's fence.
+ *
+ * NOTE: Although it takes a generic pb_buffer argument, it will fail
+ * on everything but buffers returned by fenced_buffer_create.
+ */
+void
+buffer_fence(struct pb_buffer *buf,
+ struct pipe_fence_handle *fence);
+
+
+#endif /*PB_BUFFER_FENCED_H_*/
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Implementation of malloc-based buffers to store data that can't be processed
+ * by the hardware.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pb_buffer.h"
+
+
+struct malloc_buffer
+{
+ struct pb_buffer base;
+ void *data;
+};
+
+
+extern const struct pb_vtbl malloc_buffer_vtbl;
+
+static INLINE struct malloc_buffer *
+malloc_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ assert(buf->vtbl == &malloc_buffer_vtbl);
+ return (struct malloc_buffer *)buf;
+}
+
+
+static void
+malloc_buffer_destroy(struct pb_buffer *buf)
+{
+ align_free(malloc_buffer(buf)->data);
+ FREE(buf);
+}
+
+
+static void *
+malloc_buffer_map(struct pb_buffer *buf,
+ unsigned flags)
+{
+ return malloc_buffer(buf)->data;
+}
+
+
+static void
+malloc_buffer_unmap(struct pb_buffer *buf)
+{
+ /* No-op */
+}
+
+
+static void
+malloc_buffer_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ *base_buf = buf;
+ *offset = 0;
+}
+
+
+const struct pb_vtbl
+malloc_buffer_vtbl = {
+ malloc_buffer_destroy,
+ malloc_buffer_map,
+ malloc_buffer_unmap,
+ malloc_buffer_get_base_buffer
+};
+
+
+struct pb_buffer *
+pb_malloc_buffer_create(size_t size,
+ const struct pb_desc *desc)
+{
+ struct malloc_buffer *buf;
+
+ /* TODO: do a single allocation */
+
+ buf = CALLOC_STRUCT(malloc_buffer);
+ if(!buf)
+ return NULL;
+
+ buf->base.base.refcount = 1;
+ buf->base.base.alignment = desc->alignment;
+ buf->base.base.usage = desc->usage;
+ buf->base.base.size = size;
+ buf->base.vtbl = &malloc_buffer_vtbl;
+
+ buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment);
+ if(!buf->data) {
+ align_free(buf);
+ return NULL;
+ }
+
+ return &buf->base;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Buffer management.
+ *
+ * A buffer manager does only one basic thing: it creates buffers. Actually,
+ * "buffer factory" would probably a more accurate description.
+ *
+ * You can chain buffer managers so that you can have a finer grained memory
+ * management and pooling.
+ *
+ * For example, for a simple batch buffer manager you would chain:
+ * - the native buffer manager, which provides DMA memory from the graphics
+ * memory space;
+ * - the pool buffer manager, which keep around a pool of equally sized buffers
+ * to avoid latency associated with the native buffer manager;
+ * - the fenced buffer manager, which will delay buffer destruction until the
+ * the moment the card finishing processing it.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef PB_BUFMGR_H_
+#define PB_BUFMGR_H_
+
+
+#include <stddef.h>
+
+
+struct pb_desc;
+struct pipe_buffer;
+struct pipe_winsys;
+
+
+/**
+ * Abstract base class for all buffer managers.
+ */
+struct pb_manager
+{
+ /* XXX: we will likely need more allocation flags */
+ struct pb_buffer *
+ (*create_buffer)( struct pb_manager *mgr,
+ size_t size,
+ const struct pb_desc *desc);
+
+ void
+ (*destroy)( struct pb_manager *mgr );
+};
+
+
+/**
+ * Static buffer pool manager.
+ *
+ * Manages the allocation of equally sized buffers. It does so by allocating
+ * a single big buffer and divide it equally sized buffers.
+ *
+ * It is meant to manage the allocation of batch buffer pools.
+ */
+struct pb_manager *
+pool_bufmgr_create(struct pb_manager *provider,
+ size_t n, size_t size,
+ const struct pb_desc *desc);
+
+
+/**
+ * Wraper around the old memory manager.
+ *
+ * It managers buffers of different sizes. It does so by allocating a buffer
+ * with the size of the heap, and then using the old mm memory manager to manage
+ * that heap.
+ */
+struct pb_manager *
+mm_bufmgr_create(struct pb_manager *provider,
+ size_t size, size_t align2);
+
+/**
+ * Same as mm_bufmgr_create.
+ *
+ * Buffer will be release when the manager is destroyed.
+ */
+struct pb_manager *
+mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
+ size_t size, size_t align2);
+
+
+/**
+ * Fenced buffer manager.
+ *
+ * This manager is just meant for convenience. It wraps the buffers returned
+ * by another manager in fenced buffers, so that
+ *
+ * NOTE: the buffer manager that provides the buffers will be destroyed
+ * at the same time.
+ */
+struct pb_manager *
+fenced_bufmgr_create(struct pb_manager *provider,
+ struct pipe_winsys *winsys);
+
+
+#endif /*PB_BUFMGR_H_*/
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * A buffer manager that wraps buffers in fenced buffers.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.dot.com>
+ */
+
+
+#include "p_debug.h"
+#include "p_util.h"
+
+#include "pb_buffer.h"
+#include "pb_buffer_fenced.h"
+#include "pb_bufmgr.h"
+
+
+struct fenced_pb_manager
+{
+ struct pb_manager base;
+
+ struct pb_manager *provider;
+
+ struct fenced_buffer_list *fenced_list;
+};
+
+
+static INLINE struct fenced_pb_manager *
+fenced_pb_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct fenced_pb_manager *)mgr;
+}
+
+
+static struct pb_buffer *
+fenced_bufmgr_create_buffer(struct pb_manager *mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr);
+ struct pb_buffer *buf;
+ struct pb_buffer *fenced_buf;
+
+ /* check for free buffers before allocating new ones */
+ fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0);
+
+ buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc);
+ if(!buf) {
+ /* try harder to get a buffer */
+ fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1);
+
+ buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc);
+ if(!buf) {
+ /* give up */
+ return NULL;
+ }
+ }
+
+ fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf);
+ if(!fenced_buf) {
+ assert(buf->base.refcount == 1);
+ pb_destroy(buf);
+ }
+
+ return fenced_buf;
+}
+
+
+static void
+fenced_bufmgr_destroy(struct pb_manager *mgr)
+{
+ struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr);
+
+ fenced_buffer_list_destroy(fenced_mgr->fenced_list);
+
+ fenced_mgr->provider->destroy(fenced_mgr->provider);
+
+ FREE(fenced_mgr);
+}
+
+
+struct pb_manager *
+fenced_bufmgr_create(struct pb_manager *provider,
+ struct pipe_winsys *winsys)
+{
+ struct fenced_pb_manager *fenced_mgr;
+
+ fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr));
+ if (!fenced_mgr)
+ return NULL;
+
+ fenced_mgr->base.destroy = fenced_bufmgr_destroy;
+ fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer;
+
+ fenced_mgr->provider = provider;
+ fenced_mgr->fenced_list = fenced_buffer_list_create(winsys);
+ if(!fenced_mgr->fenced_list) {
+ FREE(fenced_mgr);
+ return NULL;
+ }
+
+ return &fenced_mgr->base;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 1999 Wittawat Yamwong
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Buffer manager using the old texture memory manager.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "linked_list.h"
+
+#include "p_defines.h"
+#include "p_debug.h"
+#include "p_thread.h"
+#include "p_util.h"
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+/**
+ * Convenience macro (type safe).
+ */
+#define SUPER(__derived) (&(__derived)->base)
+
+
+struct mem_block
+{
+ struct mem_block *next, *prev;
+ struct mem_block *next_free, *prev_free;
+ struct mem_block *heap;
+ int ofs, size;
+ unsigned int free:1;
+ unsigned int reserved:1;
+};
+
+
+#ifdef DEBUG
+/**
+ * For debugging purposes.
+ */
+static void
+mmDumpMemInfo(const struct mem_block *heap)
+{
+ debug_printf("Memory heap %p:\n", (void *)heap);
+ if (heap == 0) {
+ debug_printf(" heap == 0\n");
+ } else {
+ const struct mem_block *p;
+
+ for(p = heap->next; p != heap; p = p->next) {
+ debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
+ p->free ? 'F':'.',
+ p->reserved ? 'R':'.');
+ }
+
+ debug_printf("\nFree list:\n");
+
+ for(p = heap->next_free; p != heap; p = p->next_free) {
+ debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
+ p->free ? 'F':'.',
+ p->reserved ? 'R':'.');
+ }
+
+ }
+ debug_printf("End of memory blocks\n");
+}
+#endif
+
+
+/**
+ * input: total size in bytes
+ * return: a heap pointer if OK, NULL if error
+ */
+static struct mem_block *
+mmInit(int ofs, int size)
+{
+ struct mem_block *heap, *block;
+
+ if (size <= 0)
+ return NULL;
+
+ heap = CALLOC_STRUCT(mem_block);
+ if (!heap)
+ return NULL;
+
+ block = CALLOC_STRUCT(mem_block);
+ if (!block) {
+ FREE(heap);
+ return NULL;
+ }
+
+ heap->next = block;
+ heap->prev = block;
+ heap->next_free = block;
+ heap->prev_free = block;
+
+ block->heap = heap;
+ block->next = heap;
+ block->prev = heap;
+ block->next_free = heap;
+ block->prev_free = heap;
+
+ block->ofs = ofs;
+ block->size = size;
+ block->free = 1;
+
+ return heap;
+}
+
+
+static struct mem_block *
+SliceBlock(struct mem_block *p,
+ int startofs, int size,
+ int reserved, int alignment)
+{
+ struct mem_block *newblock;
+
+ /* break left [p, newblock, p->next], then p = newblock */
+ if (startofs > p->ofs) {
+ newblock = CALLOC_STRUCT(mem_block);
+ if (!newblock)
+ return NULL;
+ newblock->ofs = startofs;
+ newblock->size = p->size - (startofs - p->ofs);
+ newblock->free = 1;
+ newblock->heap = p->heap;
+
+ newblock->next = p->next;
+ newblock->prev = p;
+ p->next->prev = newblock;
+ p->next = newblock;
+
+ newblock->next_free = p->next_free;
+ newblock->prev_free = p;
+ p->next_free->prev_free = newblock;
+ p->next_free = newblock;
+
+ p->size -= newblock->size;
+ p = newblock;
+ }
+
+ /* break right, also [p, newblock, p->next] */
+ if (size < p->size) {
+ newblock = CALLOC_STRUCT(mem_block);
+ if (!newblock)
+ return NULL;
+ newblock->ofs = startofs + size;
+ newblock->size = p->size - size;
+ newblock->free = 1;
+ newblock->heap = p->heap;
+
+ newblock->next = p->next;
+ newblock->prev = p;
+ p->next->prev = newblock;
+ p->next = newblock;
+
+ newblock->next_free = p->next_free;
+ newblock->prev_free = p;
+ p->next_free->prev_free = newblock;
+ p->next_free = newblock;
+
+ p->size = size;
+ }
+
+ /* p = middle block */
+ p->free = 0;
+
+ /* Remove p from the free list:
+ */
+ p->next_free->prev_free = p->prev_free;
+ p->prev_free->next_free = p->next_free;
+
+ p->next_free = 0;
+ p->prev_free = 0;
+
+ p->reserved = reserved;
+ return p;
+}
+
+
+/**
+ * Allocate 'size' bytes with 2^align2 bytes alignment,
+ * restrict the search to free memory after 'startSearch'
+ * depth and back buffers should be in different 4mb banks
+ * to get better page hits if possible
+ * input: size = size of block
+ * align2 = 2^align2 bytes alignment
+ * startSearch = linear offset from start of heap to begin search
+ * return: pointer to the allocated block, 0 if error
+ */
+static struct mem_block *
+mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch)
+{
+ struct mem_block *p;
+ const int mask = (1 << align2)-1;
+ int startofs = 0;
+ int endofs;
+
+ if (!heap || align2 < 0 || size <= 0)
+ return NULL;
+
+ for (p = heap->next_free; p != heap; p = p->next_free) {
+ assert(p->free);
+
+ startofs = (p->ofs + mask) & ~mask;
+ if ( startofs < startSearch ) {
+ startofs = startSearch;
+ }
+ endofs = startofs+size;
+ if (endofs <= (p->ofs+p->size))
+ break;
+ }
+
+ if (p == heap)
+ return NULL;
+
+ assert(p->free);
+ p = SliceBlock(p,startofs,size,0,mask+1);
+
+ return p;
+}
+
+
+#if 0
+/**
+ * Free block starts at offset
+ * input: pointer to a heap, start offset
+ * return: pointer to a block
+ */
+static struct mem_block *
+mmFindBlock(struct mem_block *heap, int start)
+{
+ struct mem_block *p;
+
+ for (p = heap->next; p != heap; p = p->next) {
+ if (p->ofs == start)
+ return p;
+ }
+
+ return NULL;
+}
+#endif
+
+
+static INLINE int
+Join2Blocks(struct mem_block *p)
+{
+ /* XXX there should be some assertions here */
+
+ /* NOTE: heap->free == 0 */
+
+ if (p->free && p->next->free) {
+ struct mem_block *q = p->next;
+
+ assert(p->ofs + p->size == q->ofs);
+ p->size += q->size;
+
+ p->next = q->next;
+ q->next->prev = p;
+
+ q->next_free->prev_free = q->prev_free;
+ q->prev_free->next_free = q->next_free;
+
+ FREE(q);
+ return 1;
+ }
+ return 0;
+}
+
+
+/**
+ * Free block starts at offset
+ * input: pointer to a block
+ * return: 0 if OK, -1 if error
+ */
+static int
+mmFreeMem(struct mem_block *b)
+{
+ if (!b)
+ return 0;
+
+ if (b->free) {
+ debug_printf("block already free\n");
+ return -1;
+ }
+ if (b->reserved) {
+ debug_printf("block is reserved\n");
+ return -1;
+ }
+
+ b->free = 1;
+ b->next_free = b->heap->next_free;
+ b->prev_free = b->heap;
+ b->next_free->prev_free = b;
+ b->prev_free->next_free = b;
+
+ Join2Blocks(b);
+ if (b->prev != b->heap)
+ Join2Blocks(b->prev);
+
+ return 0;
+}
+
+
+/**
+ * destroy MM
+ */
+static void
+mmDestroy(struct mem_block *heap)
+{
+ struct mem_block *p;
+
+ if (!heap)
+ return;
+
+ for (p = heap->next; p != heap; ) {
+ struct mem_block *next = p->next;
+ FREE(p);
+ p = next;
+ }
+
+ FREE(heap);
+}
+
+
+struct mm_pb_manager
+{
+ struct pb_manager base;
+
+ _glthread_Mutex mutex;
+
+ size_t size;
+ struct mem_block *heap;
+
+ size_t align2;
+
+ struct pb_buffer *buffer;
+ void *map;
+};
+
+
+static INLINE struct mm_pb_manager *
+mm_pb_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct mm_pb_manager *)mgr;
+}
+
+
+struct mm_buffer
+{
+ struct pb_buffer base;
+
+ struct mm_pb_manager *mgr;
+
+ struct mem_block *block;
+};
+
+
+static INLINE struct mm_buffer *
+mm_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ return (struct mm_buffer *)buf;
+}
+
+
+static void
+mm_buffer_destroy(struct pb_buffer *buf)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+
+ assert(buf->base.refcount == 0);
+
+ _glthread_LOCK_MUTEX(mm->mutex);
+ mmFreeMem(mm_buf->block);
+ FREE(buf);
+ _glthread_UNLOCK_MUTEX(mm->mutex);
+}
+
+
+static void *
+mm_buffer_map(struct pb_buffer *buf,
+ unsigned flags)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+
+ return (unsigned char *) mm->map + mm_buf->block->ofs;
+}
+
+
+static void
+mm_buffer_unmap(struct pb_buffer *buf)
+{
+ /* No-op */
+}
+
+
+static void
+mm_buffer_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct mm_buffer *mm_buf = mm_buffer(buf);
+ struct mm_pb_manager *mm = mm_buf->mgr;
+ pb_get_base_buffer(mm->buffer, base_buf, offset);
+ *offset += mm_buf->block->ofs;
+}
+
+
+static const struct pb_vtbl
+mm_buffer_vtbl = {
+ mm_buffer_destroy,
+ mm_buffer_map,
+ mm_buffer_unmap,
+ mm_buffer_get_base_buffer
+};
+
+
+static struct pb_buffer *
+mm_bufmgr_create_buffer(struct pb_manager *mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct mm_pb_manager *mm = mm_pb_manager(mgr);
+ struct mm_buffer *mm_buf;
+
+ /* We don't handle alignments larger then the one initially setup */
+ assert(desc->alignment % (1 << mm->align2) == 0);
+ if(desc->alignment % (1 << mm->align2))
+ return NULL;
+
+ _glthread_LOCK_MUTEX(mm->mutex);
+
+ mm_buf = CALLOC_STRUCT(mm_buffer);
+ if (!mm_buf) {
+ _glthread_UNLOCK_MUTEX(mm->mutex);
+ return NULL;
+ }
+
+ mm_buf->base.base.refcount = 1;
+ mm_buf->base.base.alignment = desc->alignment;
+ mm_buf->base.base.usage = desc->usage;
+ mm_buf->base.base.size = size;
+
+ mm_buf->base.vtbl = &mm_buffer_vtbl;
+
+ mm_buf->mgr = mm;
+
+ mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
+ if(!mm_buf->block) {
+ debug_printf("warning: heap full\n");
+#if 0
+ mmDumpMemInfo(mm->heap);
+#endif
+
+ mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
+ if(!mm_buf->block) {
+ assert(0);
+ FREE(mm_buf);
+ _glthread_UNLOCK_MUTEX(mm->mutex);
+ return NULL;
+ }
+ }
+
+ /* Some sanity checks */
+ assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size);
+ assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size);
+
+ _glthread_UNLOCK_MUTEX(mm->mutex);
+ return SUPER(mm_buf);
+}
+
+
+static void
+mm_bufmgr_destroy(struct pb_manager *mgr)
+{
+ struct mm_pb_manager *mm = mm_pb_manager(mgr);
+
+ _glthread_LOCK_MUTEX(mm->mutex);
+
+ mmDestroy(mm->heap);
+
+ pb_unmap(mm->buffer);
+ pb_reference(&mm->buffer, NULL);
+
+ _glthread_UNLOCK_MUTEX(mm->mutex);
+
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
+ size_t size, size_t align2)
+{
+ struct mm_pb_manager *mm;
+
+ if(!buffer)
+ return NULL;
+
+ mm = CALLOC_STRUCT(mm_pb_manager);
+ if (!mm)
+ return NULL;
+
+ mm->base.create_buffer = mm_bufmgr_create_buffer;
+ mm->base.destroy = mm_bufmgr_destroy;
+
+ mm->size = size;
+ mm->align2 = align2; /* 64-byte alignment */
+
+ _glthread_INIT_MUTEX(mm->mutex);
+
+ mm->buffer = buffer;
+
+ mm->map = pb_map(mm->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ if(!mm->map)
+ goto failure;
+
+ mm->heap = mmInit(0, size);
+ if (!mm->heap)
+ goto failure;
+
+ return SUPER(mm);
+
+failure:
+if(mm->heap)
+ mmDestroy(mm->heap);
+ if(mm->map)
+ pb_unmap(mm->buffer);
+ if(mm)
+ FREE(mm);
+ return NULL;
+}
+
+
+struct pb_manager *
+mm_bufmgr_create(struct pb_manager *provider,
+ size_t size, size_t align2)
+{
+ struct pb_buffer *buffer;
+ struct pb_manager *mgr;
+ struct pb_desc desc;
+
+ assert(provider);
+ assert(provider->create_buffer);
+
+ memset(&desc, 0, sizeof(desc));
+ desc.alignment = 1 << align2;
+
+ buffer = provider->create_buffer(provider, size, &desc);
+ if (!buffer)
+ return NULL;
+
+ mgr = mm_bufmgr_create_from_buffer(buffer, size, align2);
+ if (!mgr) {
+ pb_reference(&buffer, NULL);
+ return NULL;
+ }
+
+ return mgr;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Batch buffer pool management.
+ *
+ * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com>
+ * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com>
+ */
+
+
+#include "linked_list.h"
+
+#include "p_compiler.h"
+#include "p_debug.h"
+#include "p_thread.h"
+#include "p_defines.h"
+#include "p_util.h"
+
+#include "pb_buffer.h"
+#include "pb_bufmgr.h"
+
+
+/**
+ * Convenience macro (type safe).
+ */
+#define SUPER(__derived) (&(__derived)->base)
+
+
+struct pool_pb_manager
+{
+ struct pb_manager base;
+
+ _glthread_Mutex mutex;
+
+ size_t bufSize;
+ size_t bufAlign;
+
+ size_t numFree;
+ size_t numTot;
+
+ struct list_head free;
+
+ struct pb_buffer *buffer;
+ void *map;
+
+ struct pool_buffer *bufs;
+};
+
+
+static INLINE struct pool_pb_manager *
+pool_pb_manager(struct pb_manager *mgr)
+{
+ assert(mgr);
+ return (struct pool_pb_manager *)mgr;
+}
+
+
+struct pool_buffer
+{
+ struct pb_buffer base;
+
+ struct pool_pb_manager *mgr;
+
+ struct list_head head;
+
+ size_t start;
+};
+
+
+static INLINE struct pool_buffer *
+pool_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ return (struct pool_buffer *)buf;
+}
+
+
+
+static void
+pool_buffer_destroy(struct pb_buffer *buf)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+
+ assert(pool_buf->base.base.refcount == 0);
+
+ _glthread_LOCK_MUTEX(pool->mutex);
+ LIST_ADD(&pool_buf->head, &pool->free);
+ pool->numFree++;
+ _glthread_UNLOCK_MUTEX(pool->mutex);
+}
+
+
+static void *
+pool_buffer_map(struct pb_buffer *buf, unsigned flags)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+ void *map;
+
+ _glthread_LOCK_MUTEX(pool->mutex);
+ map = (unsigned char *) pool->map + pool_buf->start;
+ _glthread_UNLOCK_MUTEX(pool->mutex);
+ return map;
+}
+
+
+static void
+pool_buffer_unmap(struct pb_buffer *buf)
+{
+ /* No-op */
+}
+
+
+static void
+pool_buffer_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ struct pool_buffer *pool_buf = pool_buffer(buf);
+ struct pool_pb_manager *pool = pool_buf->mgr;
+ pb_get_base_buffer(pool->buffer, base_buf, offset);
+ *offset += pool_buf->start;
+}
+
+
+static const struct pb_vtbl
+pool_buffer_vtbl = {
+ pool_buffer_destroy,
+ pool_buffer_map,
+ pool_buffer_unmap,
+ pool_buffer_get_base_buffer
+};
+
+
+static struct pb_buffer *
+pool_bufmgr_create_buffer(struct pb_manager *mgr,
+ size_t size,
+ const struct pb_desc *desc)
+{
+ struct pool_pb_manager *pool = pool_pb_manager(mgr);
+ struct pool_buffer *pool_buf;
+ struct list_head *item;
+
+ assert(size == pool->bufSize);
+ assert(pool->bufAlign % desc->alignment == 0);
+
+ _glthread_LOCK_MUTEX(pool->mutex);
+
+ if (pool->numFree == 0) {
+ _glthread_UNLOCK_MUTEX(pool->mutex);
+ debug_printf("warning: out of fixed size buffer objects\n");
+ return NULL;
+ }
+
+ item = pool->free.next;
+
+ if (item == &pool->free) {
+ _glthread_UNLOCK_MUTEX(pool->mutex);
+ debug_printf("error: fixed size buffer pool corruption\n");
+ return NULL;
+ }
+
+ LIST_DEL(item);
+ --pool->numFree;
+
+ _glthread_UNLOCK_MUTEX(pool->mutex);
+
+ pool_buf = LIST_ENTRY(struct pool_buffer, item, head);
+ assert(pool_buf->base.base.refcount == 0);
+ pool_buf->base.base.refcount = 1;
+ pool_buf->base.base.alignment = desc->alignment;
+ pool_buf->base.base.usage = desc->usage;
+
+ return SUPER(pool_buf);
+}
+
+
+static void
+pool_bufmgr_destroy(struct pb_manager *mgr)
+{
+ struct pool_pb_manager *pool = pool_pb_manager(mgr);
+ _glthread_LOCK_MUTEX(pool->mutex);
+
+ FREE(pool->bufs);
+
+ pb_unmap(pool->buffer);
+ pb_reference(&pool->buffer, NULL);
+
+ _glthread_UNLOCK_MUTEX(pool->mutex);
+
+ FREE(mgr);
+}
+
+
+struct pb_manager *
+pool_bufmgr_create(struct pb_manager *provider,
+ size_t numBufs,
+ size_t bufSize,
+ const struct pb_desc *desc)
+{
+ struct pool_pb_manager *pool;
+ struct pool_buffer *pool_buf;
+ size_t i;
+
+ pool = (struct pool_pb_manager *)CALLOC(1, sizeof(*pool));
+ if (!pool)
+ return NULL;
+
+ pool->base.destroy = pool_bufmgr_destroy;
+ pool->base.create_buffer = pool_bufmgr_create_buffer;
+
+ LIST_INITHEAD(&pool->free);
+
+ pool->numTot = numBufs;
+ pool->numFree = numBufs;
+ pool->bufSize = bufSize;
+ pool->bufAlign = desc->alignment;
+
+ _glthread_INIT_MUTEX(pool->mutex);
+
+ pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc);
+ if (!pool->buffer)
+ goto failure;
+
+ pool->map = pb_map(pool->buffer,
+ PIPE_BUFFER_USAGE_CPU_READ |
+ PIPE_BUFFER_USAGE_CPU_WRITE);
+ if(!pool->map)
+ goto failure;
+
+ pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs));
+ if (!pool->bufs)
+ goto failure;
+
+ pool_buf = pool->bufs;
+ for (i = 0; i < numBufs; ++i) {
+ pool_buf->base.base.refcount = 0;
+ pool_buf->base.base.alignment = 0;
+ pool_buf->base.base.usage = 0;
+ pool_buf->base.base.size = bufSize;
+ pool_buf->base.vtbl = &pool_buffer_vtbl;
+ pool_buf->mgr = pool;
+ pool_buf->start = i * bufSize;
+ LIST_ADDTAIL(&pool_buf->head, &pool->free);
+ pool_buf++;
+ }
+
+ return SUPER(pool);
+
+failure:
+ if(pool->bufs)
+ FREE(pool->bufs);
+ if(pool->map)
+ pb_unmap(pool->buffer);
+ if(pool->buffer)
+ pb_reference(&pool->buffer, NULL);
+ if(pool)
+ FREE(pool);
+ return NULL;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * \file
+ * Implementation of client buffer (also designated as "user buffers"), which
+ * are just state-tracker owned data masqueraded as buffers.
+ *
+ * \author José Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
+
+#include "pb_buffer.h"
+
+
+/**
+ * User buffers are special buffers that initially reference memory
+ * held by the user but which may if necessary copy that memory into
+ * device memory behind the scenes, for submission to hardware.
+ *
+ * These are particularly useful when the referenced data is never
+ * submitted to hardware at all, in the particular case of software
+ * vertex processing.
+ */
+struct pb_user_buffer
+{
+ struct pb_buffer base;
+ void *data;
+};
+
+
+extern const struct pb_vtbl pb_user_buffer_vtbl;
+
+
+static INLINE struct pb_user_buffer *
+pb_user_buffer(struct pb_buffer *buf)
+{
+ assert(buf);
+ assert(buf->vtbl == &pb_user_buffer_vtbl);
+ return (struct pb_user_buffer *)buf;
+}
+
+
+static void
+pb_user_buffer_destroy(struct pb_buffer *buf)
+{
+ assert(buf);
+ FREE(buf);
+}
+
+
+static void *
+pb_user_buffer_map(struct pb_buffer *buf,
+ unsigned flags)
+{
+ return pb_user_buffer(buf)->data;
+}
+
+
+static void
+pb_user_buffer_unmap(struct pb_buffer *buf)
+{
+ /* No-op */
+}
+
+
+static void
+pb_user_buffer_get_base_buffer(struct pb_buffer *buf,
+ struct pb_buffer **base_buf,
+ unsigned *offset)
+{
+ *base_buf = buf;
+ *offset = 0;
+}
+
+
+const struct pb_vtbl
+pb_user_buffer_vtbl = {
+ pb_user_buffer_destroy,
+ pb_user_buffer_map,
+ pb_user_buffer_unmap,
+ pb_user_buffer_get_base_buffer
+};
+
+
+static struct pipe_buffer *
+pb_winsys_user_buffer_create(struct pipe_winsys *winsys,
+ void *data,
+ unsigned bytes)
+{
+ struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer);
+
+ if(!buf)
+ return NULL;
+
+ buf->base.base.refcount = 1;
+ buf->base.base.size = bytes;
+ buf->base.base.alignment = 0;
+ buf->base.base.usage = 0;
+
+ buf->base.vtbl = &pb_user_buffer_vtbl;
+ buf->data = data;
+
+ return &buf->base.base;
+}
+
+
+static void *
+pb_winsys_buffer_map(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf,
+ unsigned flags)
+{
+ (void)winsys;
+ return pb_map(pb_buffer(buf), flags);
+}
+
+
+static void
+pb_winsys_buffer_unmap(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf)
+{
+ (void)winsys;
+ pb_unmap(pb_buffer(buf));
+}
+
+
+static void
+pb_winsys_buffer_destroy(struct pipe_winsys *winsys,
+ struct pipe_buffer *buf)
+{
+ (void)winsys;
+ pb_destroy(pb_buffer(buf));
+}
+
+
+void
+pb_init_winsys(struct pipe_winsys *winsys)
+{
+ winsys->user_buffer_create = pb_winsys_user_buffer_create;
+ winsys->buffer_map = pb_winsys_buffer_map;
+ winsys->buffer_unmap = pb_winsys_buffer_unmap;
+ winsys->buffer_destroy = pb_winsys_buffer_destroy;
+}
--- /dev/null
+default:
+ cd ../.. ; make
+
--- /dev/null
+default:
+ cd ../../.. ; make
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI interpretor/executor.
+ *
+ * Flow control information:
+ *
+ * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
+ * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
+ * care since a condition may be true for some quad components but false
+ * for other components.
+ *
+ * We basically execute all statements (even if they're in the part of
+ * an IF/ELSE clause that's "not taken") and use a special mask to
+ * control writing to destination registers. This is the ExecMask.
+ * See store_dest().
+ *
+ * The ExecMask is computed from three other masks (CondMask, LoopMask and
+ * ContMask) which are controlled by the flow control instructions (namely:
+ * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
+ *
+ *
+ * Authors:
+ * Michal Krol
+ * Brian Paul
+ */
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi_exec.h"
+
+#define TILE_TOP_LEFT 0
+#define TILE_TOP_RIGHT 1
+#define TILE_BOTTOM_LEFT 2
+#define TILE_BOTTOM_RIGHT 3
+
+/*
+ * Shorthand locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
+#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
+#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
+#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
+#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
+#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
+#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
+#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
+#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
+#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
+#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
+#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
+#define TEMP_128_I TGSI_EXEC_TEMP_128_I
+#define TEMP_128_C TGSI_EXEC_TEMP_128_C
+#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
+#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
+#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
+#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
+#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
+#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
+#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
+#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_R0 TGSI_EXEC_TEMP_R0
+
+#define FOR_EACH_CHANNEL(CHAN)\
+ for (CHAN = 0; CHAN < 4; CHAN++)
+
+#define IS_CHANNEL_ENABLED(INST, CHAN)\
+ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IS_CHANNEL_ENABLED2(INST, CHAN)\
+ ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
+ FOR_EACH_CHANNEL( CHAN )\
+ if (IS_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
+ FOR_EACH_CHANNEL( CHAN )\
+ if (IS_CHANNEL_ENABLED2( INST, CHAN ))
+
+
+/** The execution mask depends on the conditional mask and the loop mask */
+#define UPDATE_EXEC_MASK(MACH) \
+ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+
+
+static void
+tgsi_exec_prepare( struct tgsi_exec_machine *mach )
+{
+ struct tgsi_exec_labels *labels = &mach->Labels;
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction *instructions;
+ struct tgsi_full_declaration *declarations;
+ uint maxInstructions = 10, numInstructions = 0;
+ uint maxDeclarations = 10, numDeclarations = 0;
+ uint k;
+ uint instno = 0;
+
+ mach->ImmLimit = 0;
+ labels->count = 0;
+
+ declarations = (struct tgsi_full_declaration *)
+ MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
+
+ instructions = (struct tgsi_full_instruction *)
+ MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
+
+ k = tgsi_parse_init( &parse, mach->Tokens );
+ if (k != TGSI_PARSE_OK) {
+ debug_printf("Problem parsing!\n");
+ return;
+ }
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ uint pointer = parse.Position;
+ uint i;
+
+ tgsi_parse_token( &parse );
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ /* save expanded declaration */
+ if (numDeclarations == maxDeclarations) {
+ declarations = REALLOC(declarations,
+ maxDeclarations
+ * sizeof(struct tgsi_full_declaration),
+ (maxDeclarations + 10)
+ * sizeof(struct tgsi_full_declaration));
+ maxDeclarations += 10;
+ }
+ memcpy(declarations + numDeclarations,
+ &parse.FullToken.FullDeclaration,
+ sizeof(declarations[0]));
+ numDeclarations++;
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
+ assert( size % 4 == 0 );
+ assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+
+ for( i = 0; i < size; i++ ) {
+ mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ }
+ mach->ImmLimit += size / 4;
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ assert( labels->count < 128 );
+
+ labels->labels[labels->count][0] = instno;
+ labels->labels[labels->count][1] = pointer;
+ labels->count++;
+
+ /* save expanded instruction */
+ if (numInstructions == maxInstructions) {
+ instructions = REALLOC(instructions,
+ maxInstructions
+ * sizeof(struct tgsi_full_instruction),
+ (maxInstructions + 10)
+ * sizeof(struct tgsi_full_instruction));
+ maxInstructions += 10;
+ }
+ memcpy(instructions + numInstructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(instructions[0]));
+ numInstructions++;
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+ tgsi_parse_free (&parse);
+
+ if (mach->Declarations) {
+ FREE( mach->Declarations );
+ }
+ mach->Declarations = declarations;
+ mach->NumDeclarations = numDeclarations;
+
+ if (mach->Instructions) {
+ FREE( mach->Instructions );
+ }
+ mach->Instructions = instructions;
+ mach->NumInstructions = numInstructions;
+}
+
+
+/**
+ * Initialize machine state by expanding tokens to full instructions,
+ * allocating temporary storage, setting up constants, etc.
+ * After this, we can call tgsi_exec_machine_run() many times.
+ */
+void
+tgsi_exec_machine_init(
+ struct tgsi_exec_machine *mach,
+ const struct tgsi_token *tokens,
+ uint numSamplers,
+ struct tgsi_sampler *samplers)
+{
+ uint i, k;
+ struct tgsi_parse_context parse;
+
+#if 0
+ tgsi_dump(tokens, 0);
+#endif
+
+ mach->Tokens = tokens;
+
+ mach->Samplers = samplers;
+
+ k = tgsi_parse_init (&parse, mach->Tokens);
+ if (k != TGSI_PARSE_OK) {
+ debug_printf( "Problem parsing!\n" );
+ return;
+ }
+
+ mach->Processor = parse.FullHeader.Processor.Processor;
+ tgsi_parse_free (&parse);
+
+ mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+ mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
+
+ /* Setup constants. */
+ for( i = 0; i < 4; i++ ) {
+ mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000;
+ mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF;
+ mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000;
+ mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF;
+ mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f;
+ mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f;
+ mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f;
+ mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f;
+ }
+
+ tgsi_exec_prepare( mach );
+}
+
+
+void
+tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+{
+ if (mach->Instructions) {
+ FREE(mach->Instructions);
+ mach->Instructions = NULL;
+ mach->NumInstructions = 0;
+ }
+ if (mach->Declarations) {
+ FREE(mach->Declarations);
+ mach->Declarations = NULL;
+ mach->NumDeclarations = 0;
+ }
+}
+
+
+static void
+micro_abs(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) fabs( (double) src->f[0] );
+ dst->f[1] = (float) fabs( (double) src->f[1] );
+ dst->f[2] = (float) fabs( (double) src->f[2] );
+ dst->f[3] = (float) fabs( (double) src->f[3] );
+}
+
+static void
+micro_add(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = src0->f[0] + src1->f[0];
+ dst->f[1] = src0->f[1] + src1->f[1];
+ dst->f[2] = src0->f[2] + src1->f[2];
+ dst->f[3] = src0->f[3] + src1->f[3];
+}
+
+static void
+micro_iadd(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->i[0] = src0->i[0] + src1->i[0];
+ dst->i[1] = src0->i[1] + src1->i[1];
+ dst->i[2] = src0->i[2] + src1->i[2];
+ dst->i[3] = src0->i[3] + src1->i[3];
+}
+
+static void
+micro_and(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] & src1->u[0];
+ dst->u[1] = src0->u[1] & src1->u[1];
+ dst->u[2] = src0->u[2] & src1->u[2];
+ dst->u[3] = src0->u[3] & src1->u[3];
+}
+
+static void
+micro_ceil(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) ceil( (double) src->f[0] );
+ dst->f[1] = (float) ceil( (double) src->f[1] );
+ dst->f[2] = (float) ceil( (double) src->f[2] );
+ dst->f[3] = (float) ceil( (double) src->f[3] );
+}
+
+static void
+micro_cos(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) cos( (double) src->f[0] );
+ dst->f[1] = (float) cos( (double) src->f[1] );
+ dst->f[2] = (float) cos( (double) src->f[2] );
+ dst->f[3] = (float) cos( (double) src->f[3] );
+}
+
+static void
+micro_ddx(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] =
+ dst->f[1] =
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_ddy(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] =
+ dst->f[1] =
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+static void
+micro_div(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = src0->f[0] / src1->f[0];
+ dst->f[1] = src0->f[1] / src1->f[1];
+ dst->f[2] = src0->f[2] / src1->f[2];
+ dst->f[3] = src0->f[3] / src1->f[3];
+}
+
+static void
+micro_udiv(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] / src1->u[0];
+ dst->u[1] = src0->u[1] / src1->u[1];
+ dst->u[2] = src0->u[2] / src1->u[2];
+ dst->u[3] = src0->u[3] / src1->u[3];
+}
+
+static void
+micro_eq(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0];
+ dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1];
+ dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2];
+ dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_ieq(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0];
+ dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1];
+ dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2];
+ dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3];
+}
+
+static void
+micro_exp2(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] = (float) pow( 2.0, (double) src->f[0] );
+ dst->f[1] = (float) pow( 2.0, (double) src->f[1] );
+ dst->f[2] = (float) pow( 2.0, (double) src->f[2] );
+ dst->f[3] = (float) pow( 2.0, (double) src->f[3] );
+}
+
+static void
+micro_f2it(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->i[0] = (int) src->f[0];
+ dst->i[1] = (int) src->f[1];
+ dst->i[2] = (int) src->f[2];
+ dst->i[3] = (int) src->f[3];
+}
+
+static void
+micro_f2ut(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->u[0] = (uint) src->f[0];
+ dst->u[1] = (uint) src->f[1];
+ dst->u[2] = (uint) src->f[2];
+ dst->u[3] = (uint) src->f[3];
+}
+
+static void
+micro_flr(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) floor( (double) src->f[0] );
+ dst->f[1] = (float) floor( (double) src->f[1] );
+ dst->f[2] = (float) floor( (double) src->f[2] );
+ dst->f[3] = (float) floor( (double) src->f[3] );
+}
+
+static void
+micro_frc(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] );
+ dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] );
+ dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] );
+ dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] );
+}
+
+static void
+micro_ge(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0];
+ dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1];
+ dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2];
+ dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_i2f(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) src->i[0];
+ dst->f[1] = (float) src->i[1];
+ dst->f[2] = (float) src->i[2];
+ dst->f[3] = (float) src->i[3];
+}
+
+static void
+micro_lg2(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f;
+ dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f;
+ dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f;
+ dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f;
+}
+
+static void
+micro_lt(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
+ dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
+ dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
+ dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
+}
+
+static void
+micro_ilt(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0];
+ dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1];
+ dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2];
+ dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3];
+}
+
+static void
+micro_ult(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2,
+ const union tgsi_exec_channel *src3 )
+{
+ dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0];
+ dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1];
+ dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2];
+ dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3];
+}
+
+static void
+micro_max(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
+ dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
+ dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
+ dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
+}
+
+static void
+micro_imax(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
+ dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
+ dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
+ dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
+}
+
+static void
+micro_umax(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
+ dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
+ dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
+ dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
+}
+
+static void
+micro_min(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
+ dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
+ dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
+ dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
+}
+
+static void
+micro_imin(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
+ dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
+ dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
+ dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
+}
+
+static void
+micro_umin(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
+ dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
+ dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
+ dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
+}
+
+static void
+micro_umod(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] % src1->u[0];
+ dst->u[1] = src0->u[1] % src1->u[1];
+ dst->u[2] = src0->u[2] % src1->u[2];
+ dst->u[3] = src0->u[3] % src1->u[3];
+}
+
+static void
+micro_mul(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = src0->f[0] * src1->f[0];
+ dst->f[1] = src0->f[1] * src1->f[1];
+ dst->f[2] = src0->f[2] * src1->f[2];
+ dst->f[3] = src0->f[3] * src1->f[3];
+}
+
+static void
+micro_imul(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->i[0] = src0->i[0] * src1->i[0];
+ dst->i[1] = src0->i[1] * src1->i[1];
+ dst->i[2] = src0->i[2] * src1->i[2];
+ dst->i[3] = src0->i[3] * src1->i[3];
+}
+
+static void
+micro_imul64(
+ union tgsi_exec_channel *dst0,
+ union tgsi_exec_channel *dst1,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst1->i[0] = src0->i[0] * src1->i[0];
+ dst1->i[1] = src0->i[1] * src1->i[1];
+ dst1->i[2] = src0->i[2] * src1->i[2];
+ dst1->i[3] = src0->i[3] * src1->i[3];
+ dst0->i[0] = 0;
+ dst0->i[1] = 0;
+ dst0->i[2] = 0;
+ dst0->i[3] = 0;
+}
+
+static void
+micro_umul64(
+ union tgsi_exec_channel *dst0,
+ union tgsi_exec_channel *dst1,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst1->u[0] = src0->u[0] * src1->u[0];
+ dst1->u[1] = src0->u[1] * src1->u[1];
+ dst1->u[2] = src0->u[2] * src1->u[2];
+ dst1->u[3] = src0->u[3] * src1->u[3];
+ dst0->u[0] = 0;
+ dst0->u[1] = 0;
+ dst0->u[2] = 0;
+ dst0->u[3] = 0;
+}
+
+static void
+micro_movc(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1,
+ const union tgsi_exec_channel *src2 )
+{
+ dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
+ dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
+ dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
+ dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
+}
+
+static void
+micro_neg(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = -src->f[0];
+ dst->f[1] = -src->f[1];
+ dst->f[2] = -src->f[2];
+ dst->f[3] = -src->f[3];
+}
+
+static void
+micro_ineg(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->i[0] = -src->i[0];
+ dst->i[1] = -src->i[1];
+ dst->i[2] = -src->i[2];
+ dst->i[3] = -src->i[3];
+}
+
+static void
+micro_not(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->u[0] = ~src->u[0];
+ dst->u[1] = ~src->u[1];
+ dst->u[2] = ~src->u[2];
+ dst->u[3] = ~src->u[3];
+}
+
+static void
+micro_or(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] | src1->u[0];
+ dst->u[1] = src0->u[1] | src1->u[1];
+ dst->u[2] = src0->u[2] | src1->u[2];
+ dst->u[3] = src0->u[3] | src1->u[3];
+}
+
+static void
+micro_pow(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] );
+ dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] );
+ dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] );
+ dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] );
+}
+
+static void
+micro_rnd(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) );
+ dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) );
+ dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) );
+ dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) );
+}
+
+static void
+micro_shl(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->i[0] = src0->i[0] << src1->i[0];
+ dst->i[1] = src0->i[1] << src1->i[1];
+ dst->i[2] = src0->i[2] << src1->i[2];
+ dst->i[3] = src0->i[3] << src1->i[3];
+}
+
+static void
+micro_ishr(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->i[0] = src0->i[0] >> src1->i[0];
+ dst->i[1] = src0->i[1] >> src1->i[1];
+ dst->i[2] = src0->i[2] >> src1->i[2];
+ dst->i[3] = src0->i[3] >> src1->i[3];
+}
+
+static void
+micro_trunc(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0 )
+{
+ dst->f[0] = (float) (int) src0->f[0];
+ dst->f[1] = (float) (int) src0->f[1];
+ dst->f[2] = (float) (int) src0->f[2];
+ dst->f[3] = (float) (int) src0->f[3];
+}
+
+static void
+micro_ushr(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] >> src1->u[0];
+ dst->u[1] = src0->u[1] >> src1->u[1];
+ dst->u[2] = src0->u[2] >> src1->u[2];
+ dst->u[3] = src0->u[3] >> src1->u[3];
+}
+
+static void
+micro_sin(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) sin( (double) src->f[0] );
+ dst->f[1] = (float) sin( (double) src->f[1] );
+ dst->f[2] = (float) sin( (double) src->f[2] );
+ dst->f[3] = (float) sin( (double) src->f[3] );
+}
+
+static void
+micro_sqrt( union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) sqrt( (double) src->f[0] );
+ dst->f[1] = (float) sqrt( (double) src->f[1] );
+ dst->f[2] = (float) sqrt( (double) src->f[2] );
+ dst->f[3] = (float) sqrt( (double) src->f[3] );
+}
+
+static void
+micro_sub(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->f[0] = src0->f[0] - src1->f[0];
+ dst->f[1] = src0->f[1] - src1->f[1];
+ dst->f[2] = src0->f[2] - src1->f[2];
+ dst->f[3] = src0->f[3] - src1->f[3];
+}
+
+static void
+micro_u2f(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src )
+{
+ dst->f[0] = (float) src->u[0];
+ dst->f[1] = (float) src->u[1];
+ dst->f[2] = (float) src->u[2];
+ dst->f[3] = (float) src->u[3];
+}
+
+static void
+micro_xor(
+ union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src0,
+ const union tgsi_exec_channel *src1 )
+{
+ dst->u[0] = src0->u[0] ^ src1->u[0];
+ dst->u[1] = src0->u[1] ^ src1->u[1];
+ dst->u[2] = src0->u[2] ^ src1->u[2];
+ dst->u[3] = src0->u[3] ^ src1->u[3];
+}
+
+static void
+fetch_src_file_channel(
+ const struct tgsi_exec_machine *mach,
+ const uint file,
+ const uint swizzle,
+ const union tgsi_exec_channel *index,
+ union tgsi_exec_channel *chan )
+{
+ switch( swizzle ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch( file ) {
+ case TGSI_FILE_CONSTANT:
+ chan->f[0] = mach->Consts[index->i[0]][swizzle];
+ chan->f[1] = mach->Consts[index->i[1]][swizzle];
+ chan->f[2] = mach->Consts[index->i[2]][swizzle];
+ chan->f[3] = mach->Consts[index->i[3]][swizzle];
+ break;
+
+ case TGSI_FILE_INPUT:
+ chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ assert( index->i[0] < (int) mach->ImmLimit );
+ chan->f[0] = mach->Imms[index->i[0]][swizzle];
+ assert( index->i[1] < (int) mach->ImmLimit );
+ chan->f[1] = mach->Imms[index->i[1]][swizzle];
+ assert( index->i[2] < (int) mach->ImmLimit );
+ chan->f[2] = mach->Imms[index->i[2]][swizzle];
+ assert( index->i[3] < (int) mach->ImmLimit );
+ chan->f[3] = mach->Imms[index->i[3]][swizzle];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ /* vertex/fragment output vars can be read too */
+ chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ default:
+ assert( 0 );
+ }
+ break;
+
+ case TGSI_EXTSWIZZLE_ZERO:
+ *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+static void
+fetch_source(
+ const struct tgsi_exec_machine *mach,
+ union tgsi_exec_channel *chan,
+ const struct tgsi_full_src_register *reg,
+ const uint chan_index )
+{
+ union tgsi_exec_channel index;
+ uint swizzle;
+
+ index.i[0] =
+ index.i[1] =
+ index.i[2] =
+ index.i[3] = reg->SrcRegister.Index;
+
+ if (reg->SrcRegister.Indirect) {
+ union tgsi_exec_channel index2;
+ union tgsi_exec_channel indir_index;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->SrcRegisterInd.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegisterInd.File,
+ swizzle,
+ &index2,
+ &indir_index );
+
+ index.i[0] += indir_index.i[0];
+ index.i[1] += indir_index.i[1];
+ index.i[2] += indir_index.i[2];
+ index.i[3] += indir_index.i[3];
+ }
+
+ if( reg->SrcRegister.Dimension ) {
+ switch( reg->SrcRegister.File ) {
+ case TGSI_FILE_INPUT:
+ index.i[0] *= 17;
+ index.i[1] *= 17;
+ index.i[2] *= 17;
+ index.i[3] *= 17;
+ break;
+ case TGSI_FILE_CONSTANT:
+ index.i[0] *= 4096;
+ index.i[1] *= 4096;
+ index.i[2] *= 4096;
+ index.i[3] *= 4096;
+ break;
+ default:
+ assert( 0 );
+ }
+
+ index.i[0] += reg->SrcRegisterDim.Index;
+ index.i[1] += reg->SrcRegisterDim.Index;
+ index.i[2] += reg->SrcRegisterDim.Index;
+ index.i[3] += reg->SrcRegisterDim.Index;
+
+ if (reg->SrcRegisterDim.Indirect) {
+ union tgsi_exec_channel index2;
+ union tgsi_exec_channel indir_index;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->SrcRegisterDimInd.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegisterDimInd.File,
+ swizzle,
+ &index2,
+ &indir_index );
+
+ index.i[0] += indir_index.i[0];
+ index.i[1] += indir_index.i[1];
+ index.i[2] += indir_index.i[2];
+ index.i[3] += indir_index.i[3];
+ }
+ }
+
+ swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegister.File,
+ swizzle,
+ &index,
+ chan );
+
+ switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ micro_abs( chan, chan );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ micro_abs( chan, chan );
+ micro_neg( chan, chan );
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ micro_neg( chan, chan );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ }
+
+ if (reg->SrcRegisterExtMod.Complement) {
+ micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
+ }
+}
+
+static void
+store_dest(
+ struct tgsi_exec_machine *mach,
+ const union tgsi_exec_channel *chan,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ uint chan_index )
+{
+ union tgsi_exec_channel *dst;
+
+ switch( reg->DstRegister.File ) {
+ case TGSI_FILE_NULL:
+ return;
+
+ case TGSI_FILE_OUTPUT:
+ dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ + reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ default:
+ assert( 0 );
+ return;
+ }
+
+ switch (inst->Instruction.Saturate)
+ {
+ case TGSI_SAT_NONE:
+ if (mach->ExecMask & 0x1)
+ dst->i[0] = chan->i[0];
+ if (mach->ExecMask & 0x2)
+ dst->i[1] = chan->i[1];
+ if (mach->ExecMask & 0x4)
+ dst->i[2] = chan->i[2];
+ if (mach->ExecMask & 0x8)
+ dst->i[3] = chan->i[3];
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ /* XXX need to obey ExecMask here */
+ micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+#define FETCH(VAL,INDEX,CHAN)\
+ fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+
+#define STORE(VAL,INDEX,CHAN)\
+ store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+
+
+/**
+ * Execute ARB-style KIL which is predicated by a src register.
+ * Kill fragment if any of the four values is less than zero.
+ */
+static void
+exec_kilp(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint uniquemask;
+ uint chan_index;
+ uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+ union tgsi_exec_channel r[1];
+
+ /* This mask stores component bits that were already tested. Note that
+ * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+ * tested. */
+ uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+ for (chan_index = 0; chan_index < 4; chan_index++)
+ {
+ uint swizzle;
+ uint i;
+
+ /* unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_extswizzle (
+ &inst->FullSrcRegisters[0],
+ chan_index);
+
+ /* check if the component has not been already tested */
+ if (uniquemask & (1 << swizzle))
+ continue;
+ uniquemask |= 1 << swizzle;
+
+ FETCH(&r[0], 0, chan_index);
+ for (i = 0; i < 4; i++)
+ if (r[0].f[i] < 0.0f)
+ kilmask |= 1 << i;
+ }
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
+
+/*
+ * Fetch a texel using STR texture coordinates.
+ */
+static void
+fetch_texel( struct tgsi_sampler *sampler,
+ const union tgsi_exec_channel *s,
+ const union tgsi_exec_channel *t,
+ const union tgsi_exec_channel *p,
+ float lodbias, /* XXX should be float[4] */
+ union tgsi_exec_channel *r,
+ union tgsi_exec_channel *g,
+ union tgsi_exec_channel *b,
+ union tgsi_exec_channel *a )
+{
+ uint j;
+ float rgba[NUM_CHANNELS][QUAD_SIZE];
+
+ sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
+
+ for (j = 0; j < 4; j++) {
+ r->f[j] = rgba[0][j];
+ g->f[j] = rgba[1][j];
+ b->f[j] = rgba[2][j];
+ a->f[j] = rgba[3][j];
+ }
+}
+
+
+static void
+exec_tex(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ boolean biasLod)
+{
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ union tgsi_exec_channel r[8];
+ uint chan_index;
+ float lodBias;
+
+ /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+ case TGSI_EXTSWIZZLE_W:
+ FETCH(&r[1], 0, CHAN_W);
+ micro_div( &r[0], &r[0], &r[1] );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if (biasLod) {
+ FETCH(&r[1], 0, CHAN_W);
+ lodBias = r[2].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+ case TGSI_EXTSWIZZLE_W:
+ FETCH(&r[3], 0, CHAN_W);
+ micro_div( &r[0], &r[0], &r[3] );
+ micro_div( &r[1], &r[1], &r[3] );
+ micro_div( &r[2], &r[2], &r[3] );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if (biasLod) {
+ FETCH(&r[3], 0, CHAN_W);
+ lodBias = r[3].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lodBias, /* inputs */
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+ case TGSI_EXTSWIZZLE_W:
+ FETCH(&r[3], 0, CHAN_W);
+ micro_div( &r[0], &r[0], &r[3] );
+ micro_div( &r[1], &r[1], &r[3] );
+ micro_div( &r[2], &r[2], &r[3] );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if (biasLod) {
+ FETCH(&r[3], 0, CHAN_W);
+ lodBias = r[3].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lodBias,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert (0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[chan_index], 0, chan_index );
+ }
+}
+
+
+/**
+ * Evaluate a constant-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_constant_coef(
+ struct tgsi_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+ }
+}
+
+/**
+ * Evaluate a linear-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_linear_coef(
+ struct tgsi_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
+ const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+ const float dady = mach->InterpCoefs[attrib].dady[chan];
+ const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0;
+ mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
+ mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
+ mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
+}
+
+/**
+ * Evaluate a perspective-valued coefficient at the position of the
+ * current quad.
+ */
+static void
+eval_perspective_coef(
+ struct tgsi_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
+ const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+ const float dady = mach->InterpCoefs[attrib].dady[chan];
+ const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+ const float *w = mach->QuadPos.xyzw[3].f;
+ /* divide by W here */
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+ mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+ mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+ mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
+}
+
+
+typedef void (* eval_coef_func)(
+ struct tgsi_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan );
+
+static void
+exec_declaration(
+ struct tgsi_exec_machine *mach,
+ const struct tgsi_full_declaration *decl )
+{
+ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ eval_coef_func eval;
+
+ assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+
+ first = decl->u.DeclarationRange.First;
+ last = decl->u.DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ switch( decl->Interpolation.Interpolate ) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ eval = eval_constant_coef;
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ eval = eval_linear_coef;
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ eval = eval_perspective_coef;
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ if( mask == TGSI_WRITEMASK_XYZW ) {
+ unsigned i, j;
+
+ for( i = first; i <= last; i++ ) {
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ eval( mach, i, j );
+ }
+ }
+ }
+ else {
+ unsigned i, j;
+
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ for( i = first; i <= last; i++ ) {
+ eval( mach, i, j );
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+exec_instruction(
+ struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ int *pc )
+{
+ uint chan_index;
+ union tgsi_exec_channel r[8];
+
+ (*pc)++;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_f2it( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOV:
+ /* TGSI_OPCODE_SWZ */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LIT:
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[0], 0, CHAN_X );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, CHAN_Y );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[1], 0, CHAN_Y );
+ micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+
+ FETCH( &r[2], 0, CHAN_W );
+ micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
+ micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
+ micro_pow( &r[1], &r[1], &r[2] );
+ micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, CHAN_Z );
+ }
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ FETCH( &r[0], 0, CHAN_X );
+ micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ FETCH( &r[0], 0, CHAN_X );
+ micro_sqrt( &r[0], &r[0] );
+ micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] );
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_LOG:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
+ {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ micro_mul( &r[0], &r[0], &r[1] );
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_ADD:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_add( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ micro_mul( &r[0], &r[0], &r[1] );
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FETCH( &r[1], 0, CHAN_Z );
+ FETCH( &r[2], 1, CHAN_Z );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ micro_mul( &r[0], &r[0], &r[1] );
+
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 1, CHAN_Y);
+
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FETCH(&r[1], 0, CHAN_Z);
+ FETCH(&r[2], 1, CHAN_Z);
+
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FETCH(&r[1], 0, CHAN_W);
+ FETCH(&r[2], 1, CHAN_W);
+
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DST:
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ FETCH( &r[0], 0, CHAN_Y );
+ FETCH( &r[1], 1, CHAN_Y);
+ micro_mul( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, CHAN_Y );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[0], 0, CHAN_Z );
+ STORE( &r[0], 0, CHAN_Z );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ FETCH( &r[0], 1, CHAN_W );
+ STORE( &r[0], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MIN:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ /* XXX use micro_min()?? */
+ micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_MAX:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ /* XXX use micro_max()?? */
+ micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
+
+ STORE(&r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_mul( &r[0], &r[0], &r[1] );
+ FETCH( &r[1], 2, chan_index );
+ micro_add( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ micro_sub( &r[0], &r[0], &r[1] );
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_LERP:
+ /* TGSI_OPCODE_LRP */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+
+ micro_sub( &r[1], &r[1], &r[2] );
+ micro_mul( &r[0], &r[0], &r[1] );
+ micro_add( &r[0], &r[0], &r[2] );
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_CND:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CND0:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_DOT2ADD:
+ /* TGSI_OPCODE_DP2A */
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_INDEX:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_NEGATE:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_FRAC:
+ /* TGSI_OPCODE_FRC */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_frc( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_FLOOR:
+ /* TGSI_OPCODE_FLR */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_flr( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_rnd( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXPBASE2:
+ /* TGSI_OPCODE_EX2 */
+ FETCH(&r[0], 0, CHAN_X);
+
+ micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LOGBASE2:
+ /* TGSI_OPCODE_LG2 */
+ FETCH( &r[0], 0, CHAN_X );
+ micro_lg2( &r[0], &r[0] );
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_POWER:
+ /* TGSI_OPCODE_POW */
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ micro_pow( &r[0], &r[0], &r[1] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ /* TGSI_OPCODE_XPD */
+ FETCH(&r[0], 0, CHAN_Y);
+ FETCH(&r[1], 1, CHAN_Z);
+
+ micro_mul( &r[2], &r[0], &r[1] );
+
+ FETCH(&r[3], 0, CHAN_Z);
+ FETCH(&r[4], 1, CHAN_Y);
+
+ micro_mul( &r[5], &r[3], &r[4] );
+ micro_sub( &r[2], &r[2], &r[5] );
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &r[2], 0, CHAN_X );
+ }
+
+ FETCH(&r[2], 1, CHAN_X);
+
+ micro_mul( &r[3], &r[3], &r[2] );
+
+ FETCH(&r[5], 0, CHAN_X);
+
+ micro_mul( &r[1], &r[1], &r[5] );
+ micro_sub( &r[3], &r[3], &r[1] );
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ STORE( &r[3], 0, CHAN_Y );
+ }
+
+ micro_mul( &r[5], &r[5], &r[4] );
+ micro_mul( &r[0], &r[0], &r[2] );
+ micro_sub( &r[5], &r[5], &r[0] );
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ STORE( &r[5], 0, CHAN_Z );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ABS:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+
+ micro_abs( &r[0], &r[0] );
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_RCC:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ micro_mul( &r[0], &r[0], &r[1] );
+
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 1, CHAN_Y);
+
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FETCH(&r[1], 0, CHAN_Z);
+ FETCH(&r[2], 1, CHAN_Z);
+
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FETCH(&r[1], 1, CHAN_W);
+
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_COS:
+ FETCH(&r[0], 0, CHAN_X);
+
+ micro_cos( &r[0], &r[0] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDX:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_ddx( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_ddy( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_KILP:
+ exec_kilp (mach, inst);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ /* for enabled ExecMask bits, set the killed bit */
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
+ break;
+
+ case TGSI_OPCODE_PK2H:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_RFL:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_eq( &r[0], &r[0], &r[1],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
+ &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SFL:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_SGT:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SIN:
+ FETCH( &r[0], 0, CHAN_X );
+ micro_sin( &r[0], &r[0] );
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLE:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SNE:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_STR:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ /* simple texture lookup */
+ /* src[0] = texcoord */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, FALSE);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ /* Texture lookup with lod bias */
+ /* src[0] = texcoord (src[0].w = LOD bias) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE);
+ break;
+
+ case TGSI_OPCODE_TXD:
+ /* Texture lookup with explict partial derivatives */
+ /* src[0] = texcoord */
+ /* src[1] = d[strq]/dx */
+ /* src[2] = d[strq]/dy */
+ /* src[3] = sampler unit */
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TXL:
+ /* Texture lookup with explit LOD */
+ /* src[0] = texcoord (src[0].w = LOD) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE);
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_X2D:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ARA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ARR:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_BRA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CAL:
+ /* skip the call if no execution channels are enabled */
+ if (mach->ExecMask) {
+ /* do the call */
+
+ /* push the Cond, Loop, Cont stacks */
+ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+
+ assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+ mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
+
+ /* note that PC was already incremented above */
+ mach->CallStack[mach->CallStackTop++] = *pc;
+ *pc = inst->InstructionExtLabel.Label;
+ }
+ break;
+
+ case TGSI_OPCODE_RET:
+ mach->FuncMask &= ~mach->ExecMask;
+ UPDATE_EXEC_MASK(mach);
+
+ if (mach->ExecMask == 0x0) {
+ /* really return now (otherwise, keep executing */
+
+ if (mach->CallStackTop == 0) {
+ /* returning from main() */
+ *pc = -1;
+ return;
+ }
+ *pc = mach->CallStack[--mach->CallStackTop];
+
+ /* pop the Cond, Loop, Cont stacks */
+ assert(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->FuncStackTop > 0);
+ mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+ UPDATE_EXEC_MASK(mach);
+ }
+ break;
+
+ case TGSI_OPCODE_SSG:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+
+ micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_SCS:
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( &r[0], 0, CHAN_X );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+ micro_cos( &r[1], &r[0] );
+ STORE( &r[1], 0, CHAN_X );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ micro_sin( &r[1], &r[0] );
+ STORE( &r[1], 0, CHAN_Y );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_NRM:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_DIV:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DP2:
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ micro_mul( &r[0], &r[0], &r[1] );
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ micro_mul( &r[1], &r[1], &r[2] );
+ micro_add( &r[0], &r[0], &r[1] );
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_IF:
+ /* push CondMask */
+ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ FETCH( &r[0], 0, CHAN_X );
+ /* update CondMask */
+ if( ! r[0].u[0] ) {
+ mach->CondMask &= ~0x1;
+ }
+ if( ! r[0].u[1] ) {
+ mach->CondMask &= ~0x2;
+ }
+ if( ! r[0].u[2] ) {
+ mach->CondMask &= ~0x4;
+ }
+ if( ! r[0].u[3] ) {
+ mach->CondMask &= ~0x8;
+ }
+ UPDATE_EXEC_MASK(mach);
+ /* Todo: If CondMask==0, jump to ELSE */
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ /* invert CondMask wrt previous mask */
+ {
+ uint prevMask;
+ assert(mach->CondStackTop > 0);
+ prevMask = mach->CondStack[mach->CondStackTop - 1];
+ mach->CondMask = ~mach->CondMask & prevMask;
+ UPDATE_EXEC_MASK(mach);
+ /* Todo: If CondMask==0, jump to ENDIF */
+ }
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ /* pop CondMask */
+ assert(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_END:
+ /* halt execution */
+ *pc = -1;
+ break;
+
+ case TGSI_OPCODE_REP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ENDREP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PUSHA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_POPA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_ceil( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_I2F:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_i2f( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_NOT:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_not( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ micro_trunc( &r[0], &r[0] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SHL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_shl( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SHR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_ishr( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_AND:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_and( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_OR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_or( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOD:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_XOR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ micro_xor( &r[0], &r[0], &r[1] );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SAD:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TXF:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_EMIT:
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+ break;
+
+ case TGSI_OPCODE_LOOP:
+ /* fall-through (for now) */
+ case TGSI_OPCODE_BGNLOOP2:
+ /* push LoopMask and ContMasks */
+ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ /* fall-through (for now at least) */
+ case TGSI_OPCODE_ENDLOOP2:
+ /* Restore ContMask, but don't pop */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+ if (mach->LoopMask) {
+ /* repeat loop: jump to instruction just past BGNLOOP */
+ *pc = inst->InstructionExtLabel.Label + 1;
+ }
+ else {
+ /* exit loop: pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ /* pop ContMask */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ }
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_BRK:
+ /* turn off loop channels for each enabled exec channel */
+ mach->LoopMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_CONT:
+ /* turn off cont channels for each enabled exec channel */
+ mach->ContMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_BGNSUB:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_NOISE1:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE2:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE3:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE4:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+
+/**
+ * Run TGSI interpreter.
+ * \return bitmask of "alive" quad components
+ */
+uint
+tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
+{
+ uint i;
+ int pc = 0;
+
+ mach->CondMask = 0xf;
+ mach->LoopMask = 0xf;
+ mach->ContMask = 0xf;
+ mach->FuncMask = 0xf;
+ mach->ExecMask = 0xf;
+
+ mach->CondStackTop = 0; /* temporarily subvert this assertion */
+ assert(mach->CondStackTop == 0);
+ assert(mach->LoopStackTop == 0);
+ assert(mach->ContStackTop == 0);
+ assert(mach->CallStackTop == 0);
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
+
+ if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
+ mach->Primitives[0] = 0;
+ }
+
+
+ /* execute declarations (interpolants) */
+ for (i = 0; i < mach->NumDeclarations; i++) {
+ exec_declaration( mach, mach->Declarations+i );
+ }
+
+ /* execute instructions, until pc is set to -1 */
+ while (pc != -1) {
+ assert(pc < mach->NumInstructions);
+ exec_instruction( mach, mach->Instructions + pc, &pc );
+ }
+
+#if 0
+ /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
+ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ /*
+ * Scale back depth component.
+ */
+ for (i = 0; i < 4; i++)
+ mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
+ }
+#endif
+
+ return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+}
+
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if !defined TGSI_EXEC_H
+#define TGSI_EXEC_H
+
+#include "pipe/p_compiler.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+#define NUM_CHANNELS 4 /* R,G,B,A */
+#define QUAD_SIZE 4 /* 4 pixel/quad */
+
+/**
+ * Registers may be treated as float, signed int or unsigned int.
+ */
+union tgsi_exec_channel
+{
+ float f[QUAD_SIZE];
+ int i[QUAD_SIZE];
+ unsigned u[QUAD_SIZE];
+};
+
+/**
+ * A vector[RGBA] of channels[4 pixels]
+ */
+struct tgsi_exec_vector
+{
+ union tgsi_exec_channel xyzw[NUM_CHANNELS];
+};
+
+/**
+ * For fragment programs, information for computing fragment input
+ * values from plane equation of the triangle/line.
+ */
+struct tgsi_interp_coef
+{
+ float a0[NUM_CHANNELS]; /* in an xyzw layout */
+ float dadx[NUM_CHANNELS];
+ float dady[NUM_CHANNELS];
+};
+
+
+struct softpipe_tile_cache; /**< Opaque to TGSI */
+
+/**
+ * Information for sampling textures, which must be implemented
+ * by code outside the TGSI executor.
+ */
+struct tgsi_sampler
+{
+ const struct pipe_sampler_state *state;
+ struct pipe_texture *texture;
+ /** Get samples for four fragments in a quad */
+ void (*get_samples)(struct tgsi_sampler *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+ void *pipe; /*XXX temporary*/
+ struct softpipe_tile_cache *cache;
+};
+
+/**
+ * For branching/calling subroutines.
+ */
+struct tgsi_exec_labels
+{
+ unsigned labels[128][2];
+ unsigned count;
+};
+
+/*
+ * Locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TGSI_EXEC_TEMP_00000000_I 32
+#define TGSI_EXEC_TEMP_00000000_C 0
+
+#define TGSI_EXEC_TEMP_7FFFFFFF_I 32
+#define TGSI_EXEC_TEMP_7FFFFFFF_C 1
+
+#define TGSI_EXEC_TEMP_80000000_I 32
+#define TGSI_EXEC_TEMP_80000000_C 2
+
+#define TGSI_EXEC_TEMP_FFFFFFFF_I 32
+#define TGSI_EXEC_TEMP_FFFFFFFF_C 3
+
+#define TGSI_EXEC_TEMP_ONE_I 33
+#define TGSI_EXEC_TEMP_ONE_C 0
+
+#define TGSI_EXEC_TEMP_TWO_I 33
+#define TGSI_EXEC_TEMP_TWO_C 1
+
+#define TGSI_EXEC_TEMP_128_I 33
+#define TGSI_EXEC_TEMP_128_C 2
+
+#define TGSI_EXEC_TEMP_MINUS_128_I 33
+#define TGSI_EXEC_TEMP_MINUS_128_C 3
+
+#define TGSI_EXEC_TEMP_KILMASK_I 34
+#define TGSI_EXEC_TEMP_KILMASK_C 0
+
+#define TGSI_EXEC_TEMP_OUTPUT_I 34
+#define TGSI_EXEC_TEMP_OUTPUT_C 1
+
+#define TGSI_EXEC_TEMP_PRIMITIVE_I 34
+#define TGSI_EXEC_TEMP_PRIMITIVE_C 2
+
+#define TGSI_EXEC_TEMP_R0 35
+
+#define TGSI_EXEC_NUM_TEMPS (32 + 4)
+#define TGSI_EXEC_NUM_ADDRS 1
+#define TGSI_EXEC_NUM_IMMEDIATES 256
+
+#define TGSI_EXEC_MAX_COND_NESTING 10
+#define TGSI_EXEC_MAX_LOOP_NESTING 10
+#define TGSI_EXEC_MAX_CALL_NESTING 10
+
+/**
+ * Run-time virtual machine state for executing TGSI shader.
+ */
+struct tgsi_exec_machine
+{
+ /*
+ * 32 program temporaries
+ * 4 internal temporaries
+ * 1 address
+ * 1 temporary of padding to align to 16 bytes
+ */
+ struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_ADDRS + 1];
+
+ /*
+ * This will point to _Temps after aligning to 16B boundary.
+ */
+ struct tgsi_exec_vector *Temps;
+ struct tgsi_exec_vector *Addrs;
+
+ struct tgsi_sampler *Samplers;
+
+ float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+ unsigned ImmLimit;
+ float (*Consts)[4];
+ struct tgsi_exec_vector *Inputs;
+ struct tgsi_exec_vector *Outputs;
+ const struct tgsi_token *Tokens;
+ unsigned Processor;
+
+ /* GEOMETRY processor only. */
+ unsigned *Primitives;
+
+ /* FRAGMENT processor only. */
+ const struct tgsi_interp_coef *InterpCoefs;
+ struct tgsi_exec_vector QuadPos;
+
+ /* Conditional execution masks */
+ uint CondMask; /**< For IF/ELSE/ENDIF */
+ uint LoopMask; /**< For BGNLOOP/ENDLOOP */
+ uint ContMask; /**< For loop CONT statements */
+ uint FuncMask; /**< For function calls */
+ uint ExecMask; /**< = CondMask & LoopMask */
+
+ /** Condition mask stack (for nested conditionals) */
+ uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
+ int CondStackTop;
+
+ /** Loop mask stack (for nested loops) */
+ uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int LoopStackTop;
+
+ /** Loop continue mask stack (see comments in tgsi_exec.c) */
+ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int ContStackTop;
+
+ /** Function execution mask stack (for executing subroutine code) */
+ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
+ int FuncStackTop;
+
+ /** Function call stack for saving/restoring the program counter */
+ uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
+ int CallStackTop;
+
+ struct tgsi_full_instruction *Instructions;
+ uint NumInstructions;
+
+ struct tgsi_full_declaration *Declarations;
+ uint NumDeclarations;
+
+ struct tgsi_exec_labels Labels;
+};
+
+
+void
+tgsi_exec_machine_init(
+ struct tgsi_exec_machine *mach,
+ const struct tgsi_token *tokens,
+ unsigned numSamplers,
+ struct tgsi_sampler *samplers);
+
+uint
+tgsi_exec_machine_run(
+ struct tgsi_exec_machine *mach );
+
+
+void
+tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
+
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* TGSI_EXEC_H */
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_util.h"
+#include "tgsi_exec.h"
+#include "tgsi_sse2.h"
+
+#include "x86/rtasm/x86sse.h"
+
+#if defined(__i386__) || defined(__386__)
+
+#define DUMP_SSE 0
+
+#if DUMP_SSE
+
+static void
+_print_reg(
+ struct x86_reg reg )
+{
+ if (reg.mod != mod_REG)
+ debug_printf( "[" );
+
+ switch( reg.file ) {
+ case file_REG32:
+ switch( reg.idx ) {
+ case reg_AX:
+ debug_printf( "EAX" );
+ break;
+ case reg_CX:
+ debug_printf( "ECX" );
+ break;
+ case reg_DX:
+ debug_printf( "EDX" );
+ break;
+ case reg_BX:
+ debug_printf( "EBX" );
+ break;
+ case reg_SP:
+ debug_printf( "ESP" );
+ break;
+ case reg_BP:
+ debug_printf( "EBP" );
+ break;
+ case reg_SI:
+ debug_printf( "ESI" );
+ break;
+ case reg_DI:
+ debug_printf( "EDI" );
+ break;
+ }
+ break;
+ case file_MMX:
+ assert( 0 );
+ break;
+ case file_XMM:
+ debug_printf( "XMM%u", reg.idx );
+ break;
+ case file_x87:
+ assert( 0 );
+ break;
+ }
+
+ if (reg.mod == mod_DISP8 ||
+ reg.mod == mod_DISP32)
+ debug_printf("+%d", reg.disp);
+
+ if (reg.mod != mod_REG)
+ debug_printf( "]" );
+}
+
+static void
+_fill(
+ const char *op )
+{
+ unsigned count = 10 - strlen( op );
+
+ while( count-- ) {
+ debug_printf( " " );
+ }
+}
+
+#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
+#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
+#define DUMP( OP ) debug_printf( "\n%s", OP )
+#define DUMP_I( OP, I ) do {\
+ debug_printf( "\n%s", OP );\
+ _fill( OP );\
+ debug_printf( "%u", I ); } while( 0 )
+#define DUMP_R( OP, R0 ) do {\
+ debug_printf( "\n%s", OP );\
+ _fill( OP );\
+ _print_reg( R0 ); } while( 0 )
+#define DUMP_RR( OP, R0, R1 ) do {\
+ debug_printf( "\n%s", OP );\
+ _fill( OP );\
+ _print_reg( R0 );\
+ debug_printf( ", " );\
+ _print_reg( R1 ); } while( 0 )
+#define DUMP_RRI( OP, R0, R1, I ) do {\
+ debug_printf( "\n%s", OP );\
+ _fill( OP );\
+ _print_reg( R0 );\
+ debug_printf( ", " );\
+ _print_reg( R1 );\
+ debug_printf( ", " );\
+ debug_printf( "%u", I ); } while( 0 )
+
+#else
+
+#define DUMP_START()
+#define DUMP_END()
+#define DUMP( OP )
+#define DUMP_I( OP, I )
+#define DUMP_R( OP, R0 )
+#define DUMP_RR( OP, R0, R1 )
+#define DUMP_RRI( OP, R0, R1, I )
+
+#endif
+
+#define FOR_EACH_CHANNEL( CHAN )\
+ for( CHAN = 0; CHAN < 4; CHAN++ )
+
+#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
+ if( IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
+ FOR_EACH_CHANNEL( CHAN )\
+ IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+#define TEMP_R0 TGSI_EXEC_TEMP_R0
+
+/**
+ * X86 utility functions.
+ */
+
+static struct x86_reg
+make_xmm(
+ unsigned xmm )
+{
+ return x86_make_reg(
+ file_XMM,
+ (enum x86_reg_name) xmm );
+}
+
+/**
+ * X86 register mapping helpers.
+ */
+
+static struct x86_reg
+get_const_base( void )
+{
+ return x86_make_reg(
+ file_REG32,
+ reg_CX );
+}
+
+static struct x86_reg
+get_input_base( void )
+{
+ return x86_make_reg(
+ file_REG32,
+ reg_AX );
+}
+
+static struct x86_reg
+get_output_base( void )
+{
+ return x86_make_reg(
+ file_REG32,
+ reg_DX );
+}
+
+static struct x86_reg
+get_temp_base( void )
+{
+#ifdef WIN32
+ return x86_make_reg(
+ file_REG32,
+ reg_BX );
+#else
+ return x86_make_reg(
+ file_REG32,
+ reg_SI );
+#endif
+}
+
+static struct x86_reg
+get_coef_base( void )
+{
+ return get_output_base();
+}
+
+/**
+ * Data access helpers.
+ */
+
+static struct x86_reg
+get_argument(
+ unsigned index )
+{
+ return x86_make_disp(
+ x86_make_reg( file_REG32, reg_SP ),
+ (index + 1) * 4 );
+}
+
+static struct x86_reg
+get_const(
+ unsigned vec,
+ unsigned chan )
+{
+ return x86_make_disp(
+ get_const_base(),
+ (vec * 4 + chan) * 4 );
+}
+
+static struct x86_reg
+get_input(
+ unsigned vec,
+ unsigned chan )
+{
+ return x86_make_disp(
+ get_input_base(),
+ (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_output(
+ unsigned vec,
+ unsigned chan )
+{
+ return x86_make_disp(
+ get_output_base(),
+ (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_temp(
+ unsigned vec,
+ unsigned chan )
+{
+ return x86_make_disp(
+ get_temp_base(),
+ (vec * 4 + chan) * 16 );
+}
+
+static struct x86_reg
+get_coef(
+ unsigned vec,
+ unsigned chan,
+ unsigned member )
+{
+ return x86_make_disp(
+ get_coef_base(),
+ ((vec * 3 + member) * 4 + chan) * 4 );
+}
+
+/**
+ * X86 rtasm wrappers.
+ */
+
+static void
+emit_addps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "ADDPS", dst, src );
+ sse_addps( func, dst, src );
+}
+
+static void
+emit_andnps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "ANDNPS", dst, src );
+ sse_andnps( func, dst, src );
+}
+
+static void
+emit_andps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "ANDPS", dst, src );
+ sse_andps( func, dst, src );
+}
+
+static void
+emit_call(
+ struct x86_function *func,
+ void (* addr)() )
+{
+ struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
+
+ DUMP_I( "CALL", addr );
+ x86_mov_reg_imm( func, ecx, (unsigned long) addr );
+ x86_call( func, ecx );
+}
+
+static void
+emit_cmpps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src,
+ enum sse_cc cc )
+{
+ DUMP_RRI( "CMPPS", dst, src, cc );
+ sse_cmpps( func, dst, src, cc );
+}
+
+static void
+emit_cvttps2dq(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "CVTTPS2DQ", dst, src );
+ sse2_cvttps2dq( func, dst, src );
+}
+
+static void
+emit_maxps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MAXPS", dst, src );
+ sse_maxps( func, dst, src );
+}
+
+static void
+emit_minps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MINPS", dst, src );
+ sse_minps( func, dst, src );
+}
+
+static void
+emit_mov(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MOV", dst, src );
+ x86_mov( func, dst, src );
+}
+
+static void
+emit_movaps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MOVAPS", dst, src );
+ sse_movaps( func, dst, src );
+}
+
+static void
+emit_movss(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MOVSS", dst, src );
+ sse_movss( func, dst, src );
+}
+
+static void
+emit_movups(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MOVUPS", dst, src );
+ sse_movups( func, dst, src );
+}
+
+static void
+emit_mulps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "MULPS", dst, src );
+ sse_mulps( func, dst, src );
+}
+
+static void
+emit_or(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "OR", dst, src );
+ x86_or( func, dst, src );
+}
+
+static void
+emit_orps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "ORPS", dst, src );
+ sse_orps( func, dst, src );
+}
+
+static void
+emit_pmovmskb(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "PMOVMSKB", dst, src );
+ sse_pmovmskb( func, dst, src );
+}
+
+static void
+emit_pop(
+ struct x86_function *func,
+ struct x86_reg dst )
+{
+ DUMP_R( "POP", dst );
+ x86_pop( func, dst );
+}
+
+static void
+emit_push(
+ struct x86_function *func,
+ struct x86_reg dst )
+{
+ DUMP_R( "PUSH", dst );
+ x86_push( func, dst );
+}
+
+static void
+emit_rcpps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "RCPPS", dst, src );
+ sse2_rcpps( func, dst, src );
+}
+
+#ifdef WIN32
+static void
+emit_retw(
+ struct x86_function *func,
+ unsigned size )
+{
+ DUMP_I( "RET", size );
+ x86_retw( func, size );
+}
+#else
+static void
+emit_ret(
+ struct x86_function *func )
+{
+ DUMP( "RET" );
+ x86_ret( func );
+}
+#endif
+
+static void
+emit_rsqrtps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "RSQRTPS", dst, src );
+ sse_rsqrtps( func, dst, src );
+}
+
+static void
+emit_shufps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src,
+ unsigned char shuf )
+{
+ DUMP_RRI( "SHUFPS", dst, src, shuf );
+ sse_shufps( func, dst, src, shuf );
+}
+
+static void
+emit_subps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "SUBPS", dst, src );
+ sse_subps( func, dst, src );
+}
+
+static void
+emit_xorps(
+ struct x86_function *func,
+ struct x86_reg dst,
+ struct x86_reg src )
+{
+ DUMP_RR( "XORPS", dst, src );
+ sse_xorps( func, dst, src );
+}
+
+/**
+ * Data fetch helpers.
+ */
+
+static void
+emit_const(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_movss(
+ func,
+ make_xmm( xmm ),
+ get_const( vec, chan ) );
+ emit_shufps(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ),
+ SHUF( 0, 0, 0, 0 ) );
+}
+
+static void
+emit_inputf(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_movups(
+ func,
+ make_xmm( xmm ),
+ get_input( vec, chan ) );
+}
+
+static void
+emit_output(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_movups(
+ func,
+ get_output( vec, chan ),
+ make_xmm( xmm ) );
+}
+
+static void
+emit_tempf(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_movaps(
+ func,
+ make_xmm( xmm ),
+ get_temp( vec, chan ) );
+}
+
+static void
+emit_coef(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan,
+ unsigned member )
+{
+ emit_movss(
+ func,
+ make_xmm( xmm ),
+ get_coef( vec, chan, member ) );
+ emit_shufps(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ),
+ SHUF( 0, 0, 0, 0 ) );
+}
+
+/**
+ * Data store helpers.
+ */
+
+static void
+emit_inputs(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_movups(
+ func,
+ get_input( vec, chan ),
+ make_xmm( xmm ) );
+}
+
+static void
+emit_temps(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_movaps(
+ func,
+ get_temp( vec, chan ),
+ make_xmm( xmm ) );
+}
+
+static void
+emit_addrs(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_temps(
+ func,
+ xmm,
+ vec + TGSI_EXEC_NUM_TEMPS,
+ chan );
+}
+
+/**
+ * Coefficent fetch helpers.
+ */
+
+static void
+emit_coef_a0(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_coef(
+ func,
+ xmm,
+ vec,
+ chan,
+ 0 );
+}
+
+static void
+emit_coef_dadx(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_coef(
+ func,
+ xmm,
+ vec,
+ chan,
+ 1 );
+}
+
+static void
+emit_coef_dady(
+ struct x86_function *func,
+ unsigned xmm,
+ unsigned vec,
+ unsigned chan )
+{
+ emit_coef(
+ func,
+ xmm,
+ vec,
+ chan,
+ 2 );
+}
+
+/**
+ * Function call helpers.
+ */
+
+static void
+emit_push_gp(
+ struct x86_function *func )
+{
+ emit_push(
+ func,
+ get_const_base() );
+ emit_push(
+ func,
+ get_input_base() );
+ emit_push(
+ func,
+ get_output_base() );
+
+ /* It is important on non-win32 platforms that temp base is pushed last.
+ */
+ emit_push(
+ func,
+ get_temp_base() );
+}
+
+static void
+emit_pop_gp(
+ struct x86_function *func )
+{
+ /* Restore GP registers in a reverse order.
+ */
+ emit_pop(
+ func,
+ get_temp_base() );
+ emit_pop(
+ func,
+ get_output_base() );
+ emit_pop(
+ func,
+ get_input_base() );
+ emit_pop(
+ func,
+ get_const_base() );
+}
+
+static void
+emit_func_call_dst(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ void (*code)() )
+{
+ emit_movaps(
+ func,
+ get_temp( TEMP_R0, 0 ),
+ make_xmm( xmm_dst ) );
+
+ emit_push_gp(
+ func );
+
+#ifdef WIN32
+ emit_push(
+ func,
+ get_temp( TEMP_R0, 0 ) );
+#endif
+
+ emit_call(
+ func,
+ code );
+
+ emit_pop_gp(
+ func );
+
+ emit_movaps(
+ func,
+ make_xmm( xmm_dst ),
+ get_temp( TEMP_R0, 0 ) );
+}
+
+static void
+emit_func_call_dst_src(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src,
+ void (*code)() )
+{
+ emit_movaps(
+ func,
+ get_temp( TEMP_R0, 1 ),
+ make_xmm( xmm_src ) );
+
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ code );
+}
+
+/**
+ * Low-level instruction translators.
+ */
+
+static void
+emit_abs(
+ struct x86_function *func,
+ unsigned xmm )
+{
+ emit_andps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_7FFFFFFF_I,
+ TGSI_EXEC_TEMP_7FFFFFFF_C ) );
+}
+
+static void
+emit_add(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ emit_addps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+static void XSTDCALL
+cos4f(
+ float *store )
+{
+#ifdef WIN32
+ store[0] = (float) cos( (double) store[0] );
+ store[1] = (float) cos( (double) store[1] );
+ store[2] = (float) cos( (double) store[2] );
+ store[3] = (float) cos( (double) store[3] );
+#else
+ const unsigned X = TEMP_R0 * 16;
+ store[X + 0] = cosf( store[X + 0] );
+ store[X + 1] = cosf( store[X + 1] );
+ store[X + 2] = cosf( store[X + 2] );
+ store[X + 3] = cosf( store[X + 3] );
+#endif
+}
+
+static void
+emit_cos(
+ struct x86_function *func,
+ unsigned xmm_dst )
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ cos4f );
+}
+
+static void XSTDCALL
+ex24f(
+ float *store )
+{
+#ifdef WIN32
+ store[0] = (float) pow( 2.0, (double) store[0] );
+ store[1] = (float) pow( 2.0, (double) store[1] );
+ store[2] = (float) pow( 2.0, (double) store[2] );
+ store[3] = (float) pow( 2.0, (double) store[3] );
+#else
+ const unsigned X = TEMP_R0 * 16;
+ store[X + 0] = powf( 2.0f, store[X + 0] );
+ store[X + 1] = powf( 2.0f, store[X + 1] );
+ store[X + 2] = powf( 2.0f, store[X + 2] );
+ store[X + 3] = powf( 2.0f, store[X + 3] );
+#endif
+}
+
+static void
+emit_ex2(
+ struct x86_function *func,
+ unsigned xmm_dst )
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ ex24f );
+}
+
+static void
+emit_f2it(
+ struct x86_function *func,
+ unsigned xmm )
+{
+ emit_cvttps2dq(
+ func,
+ make_xmm( xmm ),
+ make_xmm( xmm ) );
+}
+
+static void XSTDCALL
+flr4f(
+ float *store )
+{
+#ifdef WIN32
+ const unsigned X = 0;
+#else
+ const unsigned X = TEMP_R0 * 16;
+#endif
+ store[X + 0] = (float) floor( (double) store[X + 0] );
+ store[X + 1] = (float) floor( (double) store[X + 1] );
+ store[X + 2] = (float) floor( (double) store[X + 2] );
+ store[X + 3] = (float) floor( (double) store[X + 3] );
+}
+
+static void
+emit_flr(
+ struct x86_function *func,
+ unsigned xmm_dst )
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ flr4f );
+}
+
+static void XSTDCALL
+frc4f(
+ float *store )
+{
+#ifdef WIN32
+ const unsigned X = 0;
+#else
+ const unsigned X = TEMP_R0 * 16;
+#endif
+ store[X + 0] -= (float) floor( (double) store[X + 0] );
+ store[X + 1] -= (float) floor( (double) store[X + 1] );
+ store[X + 2] -= (float) floor( (double) store[X + 2] );
+ store[X + 3] -= (float) floor( (double) store[X + 3] );
+}
+
+static void
+emit_frc(
+ struct x86_function *func,
+ unsigned xmm_dst )
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ frc4f );
+}
+
+static void XSTDCALL
+lg24f(
+ float *store )
+{
+#ifdef WIN32
+ const unsigned X = 0;
+#else
+ const unsigned X = TEMP_R0 * 16;
+#endif
+ store[X + 0] = LOG2( store[X + 0] );
+ store[X + 1] = LOG2( store[X + 1] );
+ store[X + 2] = LOG2( store[X + 2] );
+ store[X + 3] = LOG2( store[X + 3] );
+}
+
+static void
+emit_lg2(
+ struct x86_function *func,
+ unsigned xmm_dst )
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ lg24f );
+}
+
+static void
+emit_MOV(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ emit_movups(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+static void
+emit_mul (struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src)
+{
+ emit_mulps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+static void
+emit_neg(
+ struct x86_function *func,
+ unsigned xmm )
+{
+ emit_xorps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_80000000_I,
+ TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void XSTDCALL
+pow4f(
+ float *store )
+{
+#ifdef WIN32
+ store[0] = (float) pow( (double) store[0], (double) store[4] );
+ store[1] = (float) pow( (double) store[1], (double) store[5] );
+ store[2] = (float) pow( (double) store[2], (double) store[6] );
+ store[3] = (float) pow( (double) store[3], (double) store[7] );
+#else
+ const unsigned X = TEMP_R0 * 16;
+ store[X + 0] = powf( store[X + 0], store[X + 4] );
+ store[X + 1] = powf( store[X + 1], store[X + 5] );
+ store[X + 2] = powf( store[X + 2], store[X + 6] );
+ store[X + 3] = powf( store[X + 3], store[X + 7] );
+#endif
+}
+
+static void
+emit_pow(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ emit_func_call_dst_src(
+ func,
+ xmm_dst,
+ xmm_src,
+ pow4f );
+}
+
+static void
+emit_rcp (
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ emit_rcpps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+static void
+emit_rsqrt(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ emit_rsqrtps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+static void
+emit_setsign(
+ struct x86_function *func,
+ unsigned xmm )
+{
+ emit_orps(
+ func,
+ make_xmm( xmm ),
+ get_temp(
+ TGSI_EXEC_TEMP_80000000_I,
+ TGSI_EXEC_TEMP_80000000_C ) );
+}
+
+static void XSTDCALL
+sin4f(
+ float *store )
+{
+#ifdef WIN32
+ store[0] = (float) sin( (double) store[0] );
+ store[1] = (float) sin( (double) store[1] );
+ store[2] = (float) sin( (double) store[2] );
+ store[3] = (float) sin( (double) store[3] );
+#else
+ const unsigned X = TEMP_R0 * 16;
+ store[X + 0] = sinf( store[X + 0] );
+ store[X + 1] = sinf( store[X + 1] );
+ store[X + 2] = sinf( store[X + 2] );
+ store[X + 3] = sinf( store[X + 3] );
+#endif
+}
+
+static void
+emit_sin (struct x86_function *func,
+ unsigned xmm_dst)
+{
+ emit_func_call_dst(
+ func,
+ xmm_dst,
+ sin4f );
+}
+
+static void
+emit_sub(
+ struct x86_function *func,
+ unsigned xmm_dst,
+ unsigned xmm_src )
+{
+ emit_subps(
+ func,
+ make_xmm( xmm_dst ),
+ make_xmm( xmm_src ) );
+}
+
+/**
+ * Register fetch.
+ */
+
+static void
+emit_fetch(
+ struct x86_function *func,
+ unsigned xmm,
+ const struct tgsi_full_src_register *reg,
+ const unsigned chan_index )
+{
+ unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+
+ switch( swizzle ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch( reg->SrcRegister.File ) {
+ case TGSI_FILE_CONSTANT:
+ emit_const(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
+ case TGSI_FILE_INPUT:
+ emit_inputf(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ emit_tempf(
+ func,
+ xmm,
+ reg->SrcRegister.Index,
+ swizzle );
+ break;
+
+ default:
+ assert( 0 );
+ }
+ break;
+
+ case TGSI_EXTSWIZZLE_ZERO:
+ emit_tempf(
+ func,
+ xmm,
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C );
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ emit_tempf(
+ func,
+ xmm,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C );
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ emit_abs( func, xmm );
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ emit_setsign( func, xmm );
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ emit_neg( func, xmm );
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ }
+}
+
+#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
+ emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
+
+/**
+ * Register store.
+ */
+
+static void
+emit_store(
+ struct x86_function *func,
+ unsigned xmm,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ unsigned chan_index )
+{
+ switch( reg->DstRegister.File ) {
+ case TGSI_FILE_OUTPUT:
+ emit_output(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ emit_temps(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ emit_addrs(
+ func,
+ xmm,
+ reg->DstRegister.Index,
+ chan_index );
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+// assert( 0 );
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+ }
+}
+
+#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
+ emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+/**
+ * High-level instruction translators.
+ */
+
+static void
+emit_kil(
+ struct x86_function *func,
+ const struct tgsi_full_src_register *reg )
+{
+ unsigned uniquemask;
+ unsigned registers[4];
+ unsigned nextregister = 0;
+ unsigned firstchan = ~0;
+ unsigned chan_index;
+
+ /* This mask stores component bits that were already tested. Note that
+ * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+ * tested. */
+ uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+ FOR_EACH_CHANNEL( chan_index ) {
+ unsigned swizzle;
+
+ /* unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_extswizzle(
+ reg,
+ chan_index );
+
+ /* check if the component has not been already tested */
+ if( !(uniquemask & (1 << swizzle)) ) {
+ uniquemask |= 1 << swizzle;
+
+ /* allocate register */
+ registers[chan_index] = nextregister;
+ emit_fetch(
+ func,
+ nextregister,
+ reg,
+ chan_index );
+ nextregister++;
+
+ /* mark the first channel used */
+ if( firstchan == ~0 ) {
+ firstchan = chan_index;
+ }
+ }
+ }
+
+ emit_push(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+ emit_push(
+ func,
+ x86_make_reg( file_REG32, reg_DX ) );
+
+ FOR_EACH_CHANNEL( chan_index ) {
+ if( uniquemask & (1 << chan_index) ) {
+ emit_cmpps(
+ func,
+ make_xmm( registers[chan_index] ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ),
+ cc_LessThan );
+
+ if( chan_index == firstchan ) {
+ emit_pmovmskb(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ make_xmm( registers[chan_index] ) );
+ }
+ else {
+ emit_pmovmskb(
+ func,
+ x86_make_reg( file_REG32, reg_DX ),
+ make_xmm( registers[chan_index] ) );
+ emit_or(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ x86_make_reg( file_REG32, reg_DX ) );
+ }
+ }
+ }
+
+ emit_or(
+ func,
+ get_temp(
+ TGSI_EXEC_TEMP_KILMASK_I,
+ TGSI_EXEC_TEMP_KILMASK_C ),
+ x86_make_reg( file_REG32, reg_AX ) );
+
+ emit_pop(
+ func,
+ x86_make_reg( file_REG32, reg_DX ) );
+ emit_pop(
+ func,
+ x86_make_reg( file_REG32, reg_AX ) );
+}
+
+static void
+emit_setcc(
+ struct x86_function *func,
+ struct tgsi_full_instruction *inst,
+ enum sse_cc cc )
+{
+ unsigned chan_index;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_cmpps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ),
+ cc );
+ emit_andps(
+ func,
+ make_xmm( 0 ),
+ get_temp(
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+}
+
+static void
+emit_cmp(
+ struct x86_function *func,
+ struct tgsi_full_instruction *inst )
+{
+ unsigned chan_index;
+
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ FETCH( func, *inst, 2, 2, chan_index );
+ emit_cmpps(
+ func,
+ make_xmm( 0 ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ),
+ cc_LessThan );
+ emit_andps(
+ func,
+ make_xmm( 1 ),
+ make_xmm( 0 ) );
+ emit_andnps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 2 ) );
+ emit_orps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+}
+
+static int
+emit_instruction(
+ struct x86_function *func,
+ struct tgsi_full_instruction *inst )
+{
+ unsigned chan_index;
+
+ switch( inst->Instruction.Opcode ) {
+ case TGSI_OPCODE_ARL:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_f2it( func, 0 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOV:
+ /* TGSI_OPCODE_SWZ */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LIT:
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ emit_tempf(
+ func,
+ 0,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C);
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+ STORE( func, *inst, 0, 0, CHAN_X );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_maxps(
+ func,
+ make_xmm( 0 ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ) );
+ STORE( func, *inst, 0, 0, CHAN_Y );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ emit_maxps(
+ func,
+ make_xmm( 1 ),
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ) );
+ FETCH( func, *inst, 2, 0, CHAN_W );
+ emit_minps(
+ func,
+ make_xmm( 2 ),
+ get_temp(
+ TGSI_EXEC_TEMP_128_I,
+ TGSI_EXEC_TEMP_128_C ) );
+ emit_maxps(
+ func,
+ make_xmm( 2 ),
+ get_temp(
+ TGSI_EXEC_TEMP_MINUS_128_I,
+ TGSI_EXEC_TEMP_MINUS_128_C ) );
+ emit_pow( func, 1, 2 );
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_xorps(
+ func,
+ make_xmm( 2 ),
+ make_xmm( 2 ) );
+ emit_cmpps(
+ func,
+ make_xmm( 2 ),
+ make_xmm( 0 ),
+ cc_LessThanEqual );
+ emit_andps(
+ func,
+ make_xmm( 2 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 2, 0, CHAN_Z );
+ }
+ }
+ break;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_rcp( func, 0, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_rsqrt( func, 0, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXP:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_LOG:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_mul( func, 0, 1 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ADD:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_add( func, 0, 1 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_mul( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ FETCH( func, *inst, 2, 1, CHAN_Y );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Z );
+ FETCH( func, *inst, 2, 1, CHAN_Z );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_mul( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ FETCH( func, *inst, 2, 1, CHAN_Y );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Z );
+ FETCH( func, *inst, 2, 1, CHAN_Z );
+ emit_mul(func, 1, 2 );
+ emit_add(func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_W );
+ FETCH( func, *inst, 2, 1, CHAN_W );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DST:
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ emit_tempf(
+ func,
+ 0,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ FETCH( func, *inst, 0, 0, CHAN_Y );
+ FETCH( func, *inst, 1, 1, CHAN_Y );
+ emit_mul( func, 0, 1 );
+ STORE( func, *inst, 0, 0, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+ FETCH( func, *inst, 0, 0, CHAN_Z );
+ STORE( func, *inst, 0, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+ FETCH( func, *inst, 0, 1, CHAN_W );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MIN:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_minps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MAX:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_maxps(
+ func,
+ make_xmm( 0 ),
+ make_xmm( 1 ) );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ emit_setcc( func, inst, cc_LessThan );
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ emit_setcc( func, inst, cc_NotLessThan );
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ FETCH( func, *inst, 2, 2, chan_index );
+ emit_mul( func, 0, 1 );
+ emit_add( func, 0, 2 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ emit_sub( func, 0, 1 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LERP:
+ /* TGSI_OPCODE_LRP */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ FETCH( func, *inst, 1, 1, chan_index );
+ FETCH( func, *inst, 2, 2, chan_index );
+ emit_sub( func, 1, 2 );
+ emit_mul( func, 0, 1 );
+ emit_add( func, 0, 2 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CND:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CND0:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_DOT2ADD:
+ /* TGSI_OPCODE_DP2A */
+ return 0;
+ break;
+
+ case TGSI_OPCODE_INDEX:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_NEGATE:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_FRAC:
+ /* TGSI_OPCODE_FRC */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_frc( func, 0 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_FLOOR:
+ /* TGSI_OPCODE_FLR */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_flr( func, 0 );
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_EXPBASE2:
+ /* TGSI_OPCODE_EX2 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_ex2( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LOGBASE2:
+ /* TGSI_OPCODE_LG2 */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_lg2( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_POWER:
+ /* TGSI_OPCODE_POW */
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_pow( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ /* TGSI_OPCODE_XPD */
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( func, *inst, 1, 1, CHAN_Z );
+ FETCH( func, *inst, 3, 0, CHAN_Z );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ FETCH( func, *inst, 0, 0, CHAN_Y );
+ FETCH( func, *inst, 4, 1, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ emit_MOV( func, 2, 0 );
+ emit_mul( func, 2, 1 );
+ emit_MOV( func, 5, 3 );
+ emit_mul( func, 5, 4 );
+ emit_sub( func, 2, 5 );
+ STORE( func, *inst, 2, 0, CHAN_X );
+ }
+ if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
+ IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ FETCH( func, *inst, 2, 1, CHAN_X );
+ FETCH( func, *inst, 5, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ emit_mul( func, 3, 2 );
+ emit_mul( func, 1, 5 );
+ emit_sub( func, 3, 1 );
+ STORE( func, *inst, 3, 0, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+ emit_mul( func, 5, 4 );
+ emit_mul( func, 0, 2 );
+ emit_sub( func, 5, 0 );
+ STORE( func, *inst, 5, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+ FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ABS:
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( func, *inst, 0, 0, chan_index );
+ emit_abs( func, 0) ;
+
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RCC:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_DPH:
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ FETCH( func, *inst, 1, 1, CHAN_X );
+ emit_mul( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Y );
+ FETCH( func, *inst, 2, 1, CHAN_Y );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 0, CHAN_Z );
+ FETCH( func, *inst, 2, 1, CHAN_Z );
+ emit_mul( func, 1, 2 );
+ emit_add( func, 0, 1 );
+ FETCH( func, *inst, 1, 1, CHAN_W );
+ emit_add( func, 0, 1 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_COS:
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_cos( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDX:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_DDY:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_KIL:
+ emit_kil( func, &inst->FullSrcRegisters[0] );
+ break;
+
+ case TGSI_OPCODE_PK2H:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_RFL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SFL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SGT:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SIN:
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_sin( func, 0 );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLE:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SNE:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_STR:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TEX:
+ emit_tempf(
+ func,
+ 0,
+ TGSI_EXEC_TEMP_ONE_I,
+ TGSI_EXEC_TEMP_ONE_C );
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( func, *inst, 0, 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_TXD:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_X2D:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ARA:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ARR:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_BRA:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CAL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_RET:
+ case TGSI_OPCODE_END:
+#ifdef WIN32
+ emit_retw( func, 16 );
+#else
+ emit_ret( func );
+#endif
+ break;
+
+ case TGSI_OPCODE_SSG:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CMP:
+ emit_cmp (func, inst);
+ break;
+
+ case TGSI_OPCODE_SCS:
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
+ FETCH( func, *inst, 0, 0, CHAN_X );
+ emit_cos( func, 0 );
+ STORE( func, *inst, 0, 0, CHAN_X );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
+ FETCH( func, *inst, 0, 0, CHAN_Y );
+ emit_sin( func, 0 );
+ STORE( func, *inst, 0, 0, CHAN_Y );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
+ FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C );
+ STORE( func, *inst, 0, 0, CHAN_Z );
+ }
+ IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
+ FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C );
+ STORE( func, *inst, 0, 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_TXB:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_NRM:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_DIV:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_DP2:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TXL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_BRK:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_IF:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_LOOP:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_REP:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDREP:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_PUSHA:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_POPA:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_I2F:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_NOT:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SHL:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SHR:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_AND:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_OR:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_MOD:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_XOR:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_SAD:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TXF:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_CONT:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_EMIT:
+ return 0;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ return 0;
+ break;
+
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static void
+emit_declaration(
+ struct x86_function *func,
+ struct tgsi_full_declaration *decl )
+{
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ unsigned i, j;
+
+ assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+
+ first = decl->u.DeclarationRange.First;
+ last = decl->u.DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ /* Do not touch WPOS.xy */
+ if( first == 0 ) {
+ mask &= ~TGSI_WRITEMASK_XY;
+ if( mask == TGSI_WRITEMASK_NONE ) {
+ first++;
+ }
+ }
+
+ for( i = first; i <= last; i++ ) {
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ switch( decl->Interpolation.Interpolate ) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ emit_coef_a0( func, 0, i, j );
+ emit_inputs( func, 0, i, j );
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
+ emit_coef_dadx( func, 1, i, j );
+ emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
+ emit_coef_dady( func, 3, i, j );
+ emit_mul( func, 0, 1 ); /* x * dadx */
+ emit_coef_a0( func, 4, i, j );
+ emit_mul( func, 2, 3 ); /* y * dady */
+ emit_add( func, 0, 4 ); /* x * dadx + a0 */
+ emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
+ emit_inputs( func, 0, i, j );
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ emit_inputf( func, 0, 0, TGSI_SWIZZLE_X );
+ emit_coef_dadx( func, 1, i, j );
+ emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y );
+ emit_coef_dady( func, 3, i, j );
+ emit_mul( func, 0, 1 ); /* x * dadx */
+ emit_inputf( func, 4, 0, TGSI_SWIZZLE_W );
+ emit_coef_a0( func, 5, i, j );
+ emit_rcp( func, 4, 4 ); /* 1.0 / w */
+ emit_mul( func, 2, 3 ); /* y * dady */
+ emit_add( func, 0, 5 ); /* x * dadx + a0 */
+ emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */
+ emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */
+ emit_inputs( func, 0, i, j );
+ break;
+
+ default:
+ assert( 0 );
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+unsigned
+tgsi_emit_sse2(
+ struct tgsi_token *tokens,
+ struct x86_function *func )
+{
+ struct tgsi_parse_context parse;
+ unsigned ok = 1;
+
+ DUMP_START();
+
+ func->csr = func->store;
+
+ emit_mov(
+ func,
+ get_input_base(),
+ get_argument( 0 ) );
+ emit_mov(
+ func,
+ get_output_base(),
+ get_argument( 1 ) );
+ emit_mov(
+ func,
+ get_const_base(),
+ get_argument( 2 ) );
+ emit_mov(
+ func,
+ get_temp_base(),
+ get_argument( 3 ) );
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ ok = emit_instruction(
+ func,
+ &parse.FullToken.FullInstruction );
+
+ if (!ok) {
+ debug_printf("failed to translate tgsi opcode %d\n",
+ parse.FullToken.FullInstruction.Instruction.Opcode );
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* XXX implement this */
+ ok = 0;
+ debug_printf("failed to emit immediate value\n");
+ break;
+
+ default:
+ assert( 0 );
+ ok = 0;
+ break;
+ }
+ }
+
+ tgsi_parse_free( &parse );
+
+ DUMP_END();
+
+ return ok;
+}
+
+/**
+ * Fragment shaders are responsible for interpolating shader inputs. Because on
+ * x86 we have only 4 GP registers, and here we have 5 shader arguments (input,
+ * output, const, temp and coef), the code is split into two phases --
+ * DECLARATION and INSTRUCTION phase.
+ * GP register holding the output argument is aliased with the coeff argument,
+ * as outputs are not needed in the DECLARATION phase.
+ */
+unsigned
+tgsi_emit_sse2_fs(
+ struct tgsi_token *tokens,
+ struct x86_function *func )
+{
+ struct tgsi_parse_context parse;
+ boolean instruction_phase = FALSE;
+
+ DUMP_START();
+
+ func->csr = func->store;
+
+ /* DECLARATION phase, do not load output argument. */
+ emit_mov(
+ func,
+ get_input_base(),
+ get_argument( 0 ) );
+ emit_mov(
+ func,
+ get_const_base(),
+ get_argument( 2 ) );
+ emit_mov(
+ func,
+ get_temp_base(),
+ get_argument( 3 ) );
+ emit_mov(
+ func,
+ get_coef_base(),
+ get_argument( 4 ) );
+
+ tgsi_parse_init( &parse, tokens );
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ emit_declaration(
+ func,
+ &parse.FullToken.FullDeclaration );
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ if( !instruction_phase ) {
+ /* INSTRUCTION phase, overwrite coeff with output. */
+ instruction_phase = TRUE;
+ emit_mov(
+ func,
+ get_output_base(),
+ get_argument( 1 ) );
+ }
+ emit_instruction(
+ func,
+ &parse.FullToken.FullInstruction );
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ /* XXX implement this */
+ assert(0);
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ tgsi_parse_free( &parse );
+
+ DUMP_END();
+
+ return 1;
+}
+
+#endif /* i386 */
--- /dev/null
+#if !defined TGSI_SSE2_H
+#define TGSI_SSE2_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+struct tgsi_token;
+struct x86_function;
+
+unsigned
+tgsi_emit_sse2(
+ struct tgsi_token *tokens,
+ struct x86_function *function );
+
+unsigned
+tgsi_emit_sse2_fs(
+ struct tgsi_token *tokens,
+ struct x86_function *function );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_SSE2_H
+
--- /dev/null
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_build.h"
+#include "tgsi_parse.h"
+
+/*
+ * version
+ */
+
+struct tgsi_version
+tgsi_build_version( void )
+{
+ struct tgsi_version version;
+
+ version.MajorVersion = 1;
+ version.MinorVersion = 1;
+ version.Padding = 0;
+
+ return version;
+}
+
+/*
+ * header
+ */
+
+struct tgsi_header
+tgsi_build_header( void )
+{
+ struct tgsi_header header;
+
+ header.HeaderSize = 1;
+ header.BodySize = 0;
+
+ return header;
+}
+
+static void
+header_headersize_grow( struct tgsi_header *header )
+{
+ assert( header->HeaderSize < 0xFF );
+ assert( header->BodySize == 0 );
+
+ header->HeaderSize++;
+}
+
+static void
+header_bodysize_grow( struct tgsi_header *header )
+{
+ assert( header->BodySize < 0xFFFFFF );
+
+ header->BodySize++;
+}
+
+struct tgsi_processor
+tgsi_default_processor( void )
+{
+ struct tgsi_processor processor;
+
+ processor.Processor = TGSI_PROCESSOR_FRAGMENT;
+ processor.Padding = 0;
+
+ return processor;
+}
+
+struct tgsi_processor
+tgsi_build_processor(
+ unsigned type,
+ struct tgsi_header *header )
+{
+ struct tgsi_processor processor;
+
+ processor = tgsi_default_processor();
+ processor.Processor = type;
+
+ header_headersize_grow( header );
+
+ return processor;
+}
+
+/*
+ * declaration
+ */
+
+struct tgsi_declaration
+tgsi_default_declaration( void )
+{
+ struct tgsi_declaration declaration;
+
+ declaration.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ declaration.Size = 1;
+ declaration.File = TGSI_FILE_NULL;
+ declaration.Declare = TGSI_DECLARE_RANGE;
+ declaration.UsageMask = TGSI_WRITEMASK_XYZW;
+ declaration.Interpolate = 0;
+ declaration.Semantic = 0;
+ declaration.Padding = 0;
+ declaration.Extended = 0;
+
+ return declaration;
+}
+
+struct tgsi_declaration
+tgsi_build_declaration(
+ unsigned file,
+ unsigned declare,
+ unsigned usage_mask,
+ unsigned interpolate,
+ unsigned semantic,
+ struct tgsi_header *header )
+{
+ struct tgsi_declaration declaration;
+
+ assert( file <= TGSI_FILE_IMMEDIATE );
+ assert( declare <= TGSI_DECLARE_MASK );
+
+ declaration = tgsi_default_declaration();
+ declaration.File = file;
+ declaration.Declare = declare;
+ declaration.UsageMask = usage_mask;
+ declaration.Interpolate = interpolate;
+ declaration.Semantic = semantic;
+
+ header_bodysize_grow( header );
+
+ return declaration;
+}
+
+static void
+declaration_grow(
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ assert( declaration->Size < 0xFF );
+
+ declaration->Size++;
+
+ header_bodysize_grow( header );
+}
+
+struct tgsi_full_declaration
+tgsi_default_full_declaration( void )
+{
+ struct tgsi_full_declaration full_declaration;
+
+ full_declaration.Declaration = tgsi_default_declaration();
+ full_declaration.Interpolation = tgsi_default_declaration_interpolation();
+ full_declaration.Semantic = tgsi_default_declaration_semantic();
+
+ return full_declaration;
+}
+
+unsigned
+tgsi_build_full_declaration(
+ const struct tgsi_full_declaration *full_decl,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize )
+{
+ unsigned size = 0;
+ struct tgsi_declaration *declaration;
+
+ if( maxsize <= size )
+ return 0;
+ declaration = (struct tgsi_declaration *) &tokens[size];
+ size++;
+
+ *declaration = tgsi_build_declaration(
+ full_decl->Declaration.File,
+ full_decl->Declaration.Declare,
+ full_decl->Declaration.UsageMask,
+ full_decl->Declaration.Interpolate,
+ full_decl->Declaration.Semantic,
+ header );
+
+ switch( full_decl->Declaration.Declare ) {
+ case TGSI_DECLARE_RANGE:
+ {
+ struct tgsi_declaration_range *dr;
+
+ if( maxsize <= size )
+ return 0;
+ dr = (struct tgsi_declaration_range *) &tokens[size];
+ size++;
+
+ *dr = tgsi_build_declaration_range(
+ full_decl->u.DeclarationRange.First,
+ full_decl->u.DeclarationRange.Last,
+ declaration,
+ header );
+ break;
+ }
+
+ case TGSI_DECLARE_MASK:
+ {
+ struct tgsi_declaration_mask *dm;
+
+ if( maxsize <= size )
+ return 0;
+ dm = (struct tgsi_declaration_mask *) &tokens[size];
+ size++;
+
+ *dm = tgsi_build_declaration_mask(
+ full_decl->u.DeclarationMask.Mask,
+ declaration,
+ header );
+ break;
+ }
+
+ default:
+ assert( 0 );
+ }
+
+ if( full_decl->Declaration.Interpolate ) {
+ struct tgsi_declaration_interpolation *di;
+
+ if( maxsize <= size )
+ return 0;
+ di = (struct tgsi_declaration_interpolation *) &tokens[size];
+ size++;
+
+ *di = tgsi_build_declaration_interpolation(
+ full_decl->Interpolation.Interpolate,
+ declaration,
+ header );
+ }
+
+ if( full_decl->Declaration.Semantic ) {
+ struct tgsi_declaration_semantic *ds;
+
+ if( maxsize <= size )
+ return 0;
+ ds = (struct tgsi_declaration_semantic *) &tokens[size];
+ size++;
+
+ *ds = tgsi_build_declaration_semantic(
+ full_decl->Semantic.SemanticName,
+ full_decl->Semantic.SemanticIndex,
+ declaration,
+ header );
+ }
+
+ return size;
+}
+
+struct tgsi_declaration_range
+tgsi_build_declaration_range(
+ unsigned first,
+ unsigned last,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ struct tgsi_declaration_range declaration_range;
+
+ assert( last >= first );
+ assert( last <= 0xFFFF );
+
+ declaration_range.First = first;
+ declaration_range.Last = last;
+
+ declaration_grow( declaration, header );
+
+ return declaration_range;
+}
+
+struct tgsi_declaration_mask
+tgsi_build_declaration_mask(
+ unsigned mask,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ struct tgsi_declaration_mask declaration_mask;
+
+ declaration_mask.Mask = mask;
+
+ declaration_grow( declaration, header );
+
+ return declaration_mask;
+}
+
+struct tgsi_declaration_interpolation
+tgsi_default_declaration_interpolation( void )
+{
+ struct tgsi_declaration_interpolation di;
+
+ di.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+ di.Padding = 0;
+
+ return di;
+}
+
+struct tgsi_declaration_interpolation
+tgsi_build_declaration_interpolation(
+ unsigned interpolate,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ struct tgsi_declaration_interpolation di;
+
+ assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
+
+ di = tgsi_default_declaration_interpolation();
+ di.Interpolate = interpolate;
+
+ declaration_grow( declaration, header );
+
+ return di;
+}
+
+struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void )
+{
+ struct tgsi_declaration_semantic ds;
+
+ ds.SemanticName = TGSI_SEMANTIC_POSITION;
+ ds.SemanticIndex = 0;
+ ds.Padding = 0;
+
+ return ds;
+}
+
+struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+ unsigned semantic_name,
+ unsigned semantic_index,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header )
+{
+ struct tgsi_declaration_semantic ds;
+
+ assert( semantic_name <= TGSI_SEMANTIC_COUNT );
+ assert( semantic_index <= 0xFFFF );
+
+ ds = tgsi_default_declaration_semantic();
+ ds.SemanticName = semantic_name;
+ ds.SemanticIndex = semantic_index;
+
+ declaration_grow( declaration, header );
+
+ return ds;
+}
+
+/*
+ * immediate
+ */
+
+struct tgsi_immediate
+tgsi_default_immediate( void )
+{
+ struct tgsi_immediate immediate;
+
+ immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
+ immediate.Size = 1;
+ immediate.DataType = TGSI_IMM_FLOAT32;
+ immediate.Padding = 0;
+ immediate.Extended = 0;
+
+ return immediate;
+}
+
+struct tgsi_immediate
+tgsi_build_immediate(
+ struct tgsi_header *header )
+{
+ struct tgsi_immediate immediate;
+
+ immediate = tgsi_default_immediate();
+
+ header_bodysize_grow( header );
+
+ return immediate;
+}
+
+struct tgsi_full_immediate
+tgsi_default_full_immediate( void )
+{
+ struct tgsi_full_immediate fullimm;
+
+ fullimm.Immediate = tgsi_default_immediate();
+ fullimm.u.Pointer = (void *) 0;
+
+ return fullimm;
+}
+
+static void
+immediate_grow(
+ struct tgsi_immediate *immediate,
+ struct tgsi_header *header )
+{
+ assert( immediate->Size < 0xFF );
+
+ immediate->Size++;
+
+ header_bodysize_grow( header );
+}
+
+struct tgsi_immediate_float32
+tgsi_build_immediate_float32(
+ float value,
+ struct tgsi_immediate *immediate,
+ struct tgsi_header *header )
+{
+ struct tgsi_immediate_float32 immediate_float32;
+
+ immediate_float32.Float = value;
+
+ immediate_grow( immediate, header );
+
+ return immediate_float32;
+}
+
+unsigned
+tgsi_build_full_immediate(
+ const struct tgsi_full_immediate *full_imm,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize )
+{
+ unsigned size = 0, i;
+ struct tgsi_immediate *immediate;
+
+ if( maxsize <= size )
+ return 0;
+ immediate = (struct tgsi_immediate *) &tokens[size];
+ size++;
+
+ *immediate = tgsi_build_immediate( header );
+
+ for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) {
+ struct tgsi_immediate_float32 *if32;
+
+ if( maxsize <= size )
+ return 0;
+ if32 = (struct tgsi_immediate_float32 *) &tokens[size];
+ size++;
+
+ *if32 = tgsi_build_immediate_float32(
+ full_imm->u.ImmediateFloat32[i].Float,
+ immediate,
+ header );
+ }
+
+ return size;
+}
+
+/*
+ * instruction
+ */
+
+struct tgsi_instruction
+tgsi_default_instruction( void )
+{
+ struct tgsi_instruction instruction;
+
+ instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
+ instruction.Size = 1;
+ instruction.Opcode = TGSI_OPCODE_MOV;
+ instruction.Saturate = TGSI_SAT_NONE;
+ instruction.NumDstRegs = 1;
+ instruction.NumSrcRegs = 1;
+ instruction.Padding = 0;
+ instruction.Extended = 0;
+
+ return instruction;
+}
+
+struct tgsi_instruction
+tgsi_build_instruction(
+ unsigned opcode,
+ unsigned saturate,
+ unsigned num_dst_regs,
+ unsigned num_src_regs,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction instruction;
+
+ assert (opcode <= TGSI_OPCODE_LAST);
+ assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE);
+ assert (num_dst_regs <= 3);
+ assert (num_src_regs <= 15);
+
+ instruction = tgsi_default_instruction();
+ instruction.Opcode = opcode;
+ instruction.Saturate = saturate;
+ instruction.NumDstRegs = num_dst_regs;
+ instruction.NumSrcRegs = num_src_regs;
+
+ header_bodysize_grow( header );
+
+ return instruction;
+}
+
+static void
+instruction_grow(
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ assert (instruction->Size < 0xFF);
+
+ instruction->Size++;
+
+ header_bodysize_grow( header );
+}
+
+struct tgsi_full_instruction
+tgsi_default_full_instruction( void )
+{
+ struct tgsi_full_instruction full_instruction;
+ unsigned i;
+
+ full_instruction.Instruction = tgsi_default_instruction();
+ full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv();
+ full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
+ full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
+ for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
+ full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
+ }
+ for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) {
+ full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
+ }
+
+ return full_instruction;
+}
+
+unsigned
+tgsi_build_full_instruction(
+ const struct tgsi_full_instruction *full_inst,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize )
+{
+ unsigned size = 0;
+ unsigned i;
+ struct tgsi_instruction *instruction;
+ struct tgsi_token *prev_token;
+
+ if( maxsize <= size )
+ return 0;
+ instruction = (struct tgsi_instruction *) &tokens[size];
+ size++;
+
+ *instruction = tgsi_build_instruction(
+ full_inst->Instruction.Opcode,
+ full_inst->Instruction.Saturate,
+ full_inst->Instruction.NumDstRegs,
+ full_inst->Instruction.NumSrcRegs,
+ header );
+ prev_token = (struct tgsi_token *) instruction;
+
+ if( tgsi_compare_instruction_ext_nv(
+ full_inst->InstructionExtNv,
+ tgsi_default_instruction_ext_nv() ) ) {
+ struct tgsi_instruction_ext_nv *instruction_ext_nv;
+
+ if( maxsize <= size )
+ return 0;
+ instruction_ext_nv =
+ (struct tgsi_instruction_ext_nv *) &tokens[size];
+ size++;
+
+ *instruction_ext_nv = tgsi_build_instruction_ext_nv(
+ full_inst->InstructionExtNv.Precision,
+ full_inst->InstructionExtNv.CondDstIndex,
+ full_inst->InstructionExtNv.CondFlowIndex,
+ full_inst->InstructionExtNv.CondMask,
+ full_inst->InstructionExtNv.CondSwizzleX,
+ full_inst->InstructionExtNv.CondSwizzleY,
+ full_inst->InstructionExtNv.CondSwizzleZ,
+ full_inst->InstructionExtNv.CondSwizzleW,
+ full_inst->InstructionExtNv.CondDstUpdate,
+ full_inst->InstructionExtNv.CondFlowEnable,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) instruction_ext_nv;
+ }
+
+ if( tgsi_compare_instruction_ext_label(
+ full_inst->InstructionExtLabel,
+ tgsi_default_instruction_ext_label() ) ) {
+ struct tgsi_instruction_ext_label *instruction_ext_label;
+
+ if( maxsize <= size )
+ return 0;
+ instruction_ext_label =
+ (struct tgsi_instruction_ext_label *) &tokens[size];
+ size++;
+
+ *instruction_ext_label = tgsi_build_instruction_ext_label(
+ full_inst->InstructionExtLabel.Label,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) instruction_ext_label;
+ }
+
+ if( tgsi_compare_instruction_ext_texture(
+ full_inst->InstructionExtTexture,
+ tgsi_default_instruction_ext_texture() ) ) {
+ struct tgsi_instruction_ext_texture *instruction_ext_texture;
+
+ if( maxsize <= size )
+ return 0;
+ instruction_ext_texture =
+ (struct tgsi_instruction_ext_texture *) &tokens[size];
+ size++;
+
+ *instruction_ext_texture = tgsi_build_instruction_ext_texture(
+ full_inst->InstructionExtTexture.Texture,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) instruction_ext_texture;
+ }
+
+ for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
+ const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
+ struct tgsi_dst_register *dst_register;
+ struct tgsi_token *prev_token;
+
+ if( maxsize <= size )
+ return 0;
+ dst_register = (struct tgsi_dst_register *) &tokens[size];
+ size++;
+
+ *dst_register = tgsi_build_dst_register(
+ reg->DstRegister.File,
+ reg->DstRegister.WriteMask,
+ reg->DstRegister.Index,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) dst_register;
+
+ if( tgsi_compare_dst_register_ext_concode(
+ reg->DstRegisterExtConcode,
+ tgsi_default_dst_register_ext_concode() ) ) {
+ struct tgsi_dst_register_ext_concode *dst_register_ext_concode;
+
+ if( maxsize <= size )
+ return 0;
+ dst_register_ext_concode =
+ (struct tgsi_dst_register_ext_concode *) &tokens[size];
+ size++;
+
+ *dst_register_ext_concode = tgsi_build_dst_register_ext_concode(
+ reg->DstRegisterExtConcode.CondMask,
+ reg->DstRegisterExtConcode.CondSwizzleX,
+ reg->DstRegisterExtConcode.CondSwizzleY,
+ reg->DstRegisterExtConcode.CondSwizzleZ,
+ reg->DstRegisterExtConcode.CondSwizzleW,
+ reg->DstRegisterExtConcode.CondSrcIndex,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) dst_register_ext_concode;
+ }
+
+ if( tgsi_compare_dst_register_ext_modulate(
+ reg->DstRegisterExtModulate,
+ tgsi_default_dst_register_ext_modulate() ) ) {
+ struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate;
+
+ if( maxsize <= size )
+ return 0;
+ dst_register_ext_modulate =
+ (struct tgsi_dst_register_ext_modulate *) &tokens[size];
+ size++;
+
+ *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate(
+ reg->DstRegisterExtModulate.Modulate,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) dst_register_ext_modulate;
+ }
+ }
+
+ for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) {
+ const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i];
+ struct tgsi_src_register *src_register;
+ struct tgsi_token *prev_token;
+
+ if( maxsize <= size )
+ return 0;
+ src_register = (struct tgsi_src_register *) &tokens[size];
+ size++;
+
+ *src_register = tgsi_build_src_register(
+ reg->SrcRegister.File,
+ reg->SrcRegister.SwizzleX,
+ reg->SrcRegister.SwizzleY,
+ reg->SrcRegister.SwizzleZ,
+ reg->SrcRegister.SwizzleW,
+ reg->SrcRegister.Negate,
+ reg->SrcRegister.Indirect,
+ reg->SrcRegister.Dimension,
+ reg->SrcRegister.Index,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) src_register;
+
+ if( tgsi_compare_src_register_ext_swz(
+ reg->SrcRegisterExtSwz,
+ tgsi_default_src_register_ext_swz() ) ) {
+ struct tgsi_src_register_ext_swz *src_register_ext_swz;
+
+ if( maxsize <= size )
+ return 0;
+ src_register_ext_swz =
+ (struct tgsi_src_register_ext_swz *) &tokens[size];
+ size++;
+
+ *src_register_ext_swz = tgsi_build_src_register_ext_swz(
+ reg->SrcRegisterExtSwz.ExtSwizzleX,
+ reg->SrcRegisterExtSwz.ExtSwizzleY,
+ reg->SrcRegisterExtSwz.ExtSwizzleZ,
+ reg->SrcRegisterExtSwz.ExtSwizzleW,
+ reg->SrcRegisterExtSwz.NegateX,
+ reg->SrcRegisterExtSwz.NegateY,
+ reg->SrcRegisterExtSwz.NegateZ,
+ reg->SrcRegisterExtSwz.NegateW,
+ reg->SrcRegisterExtSwz.ExtDivide,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) src_register_ext_swz;
+ }
+
+ if( tgsi_compare_src_register_ext_mod(
+ reg->SrcRegisterExtMod,
+ tgsi_default_src_register_ext_mod() ) ) {
+ struct tgsi_src_register_ext_mod *src_register_ext_mod;
+
+ if( maxsize <= size )
+ return 0;
+ src_register_ext_mod =
+ (struct tgsi_src_register_ext_mod *) &tokens[size];
+ size++;
+
+ *src_register_ext_mod = tgsi_build_src_register_ext_mod(
+ reg->SrcRegisterExtMod.Complement,
+ reg->SrcRegisterExtMod.Bias,
+ reg->SrcRegisterExtMod.Scale2X,
+ reg->SrcRegisterExtMod.Absolute,
+ reg->SrcRegisterExtMod.Negate,
+ prev_token,
+ instruction,
+ header );
+ prev_token = (struct tgsi_token *) src_register_ext_mod;
+ }
+
+ if( reg->SrcRegister.Indirect ) {
+ struct tgsi_src_register *ind;
+
+ if( maxsize <= size )
+ return 0;
+ ind = (struct tgsi_src_register *) &tokens[size];
+ size++;
+
+ *ind = tgsi_build_src_register(
+ reg->SrcRegisterInd.File,
+ reg->SrcRegisterInd.SwizzleX,
+ reg->SrcRegisterInd.SwizzleY,
+ reg->SrcRegisterInd.SwizzleZ,
+ reg->SrcRegisterInd.SwizzleW,
+ reg->SrcRegisterInd.Negate,
+ reg->SrcRegisterInd.Indirect,
+ reg->SrcRegisterInd.Dimension,
+ reg->SrcRegisterInd.Index,
+ instruction,
+ header );
+ }
+
+ if( reg->SrcRegister.Dimension ) {
+ struct tgsi_dimension *dim;
+
+ assert( !reg->SrcRegisterDim.Dimension );
+
+ if( maxsize <= size )
+ return 0;
+ dim = (struct tgsi_dimension *) &tokens[size];
+ size++;
+
+ *dim = tgsi_build_dimension(
+ reg->SrcRegisterDim.Indirect,
+ reg->SrcRegisterDim.Index,
+ instruction,
+ header );
+
+ if( reg->SrcRegisterDim.Indirect ) {
+ struct tgsi_src_register *ind;
+
+ if( maxsize <= size )
+ return 0;
+ ind = (struct tgsi_src_register *) &tokens[size];
+ size++;
+
+ *ind = tgsi_build_src_register(
+ reg->SrcRegisterDimInd.File,
+ reg->SrcRegisterDimInd.SwizzleX,
+ reg->SrcRegisterDimInd.SwizzleY,
+ reg->SrcRegisterDimInd.SwizzleZ,
+ reg->SrcRegisterDimInd.SwizzleW,
+ reg->SrcRegisterDimInd.Negate,
+ reg->SrcRegisterDimInd.Indirect,
+ reg->SrcRegisterDimInd.Dimension,
+ reg->SrcRegisterDimInd.Index,
+ instruction,
+ header );
+ }
+ }
+ }
+
+ return size;
+}
+
+struct tgsi_instruction_ext_nv
+tgsi_default_instruction_ext_nv( void )
+{
+ struct tgsi_instruction_ext_nv instruction_ext_nv;
+
+ instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV;
+ instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT;
+ instruction_ext_nv.CondDstIndex = 0;
+ instruction_ext_nv.CondFlowIndex = 0;
+ instruction_ext_nv.CondMask = TGSI_CC_TR;
+ instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X;
+ instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y;
+ instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z;
+ instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W;
+ instruction_ext_nv.CondDstUpdate = 0;
+ instruction_ext_nv.CondFlowEnable = 0;
+ instruction_ext_nv.Padding = 0;
+ instruction_ext_nv.Extended = 0;
+
+ return instruction_ext_nv;
+}
+
+union token_u32
+{
+ unsigned u32;
+};
+
+unsigned
+tgsi_compare_instruction_ext_nv(
+ struct tgsi_instruction_ext_nv a,
+ struct tgsi_instruction_ext_nv b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_nv
+tgsi_build_instruction_ext_nv(
+ unsigned precision,
+ unsigned cond_dst_index,
+ unsigned cond_flow_index,
+ unsigned cond_mask,
+ unsigned cond_swizzle_x,
+ unsigned cond_swizzle_y,
+ unsigned cond_swizzle_z,
+ unsigned cond_swizzle_w,
+ unsigned cond_dst_update,
+ unsigned cond_flow_update,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction_ext_nv instruction_ext_nv;
+
+ instruction_ext_nv = tgsi_default_instruction_ext_nv();
+ instruction_ext_nv.Precision = precision;
+ instruction_ext_nv.CondDstIndex = cond_dst_index;
+ instruction_ext_nv.CondFlowIndex = cond_flow_index;
+ instruction_ext_nv.CondMask = cond_mask;
+ instruction_ext_nv.CondSwizzleX = cond_swizzle_x;
+ instruction_ext_nv.CondSwizzleY = cond_swizzle_y;
+ instruction_ext_nv.CondSwizzleZ = cond_swizzle_z;
+ instruction_ext_nv.CondSwizzleW = cond_swizzle_w;
+ instruction_ext_nv.CondDstUpdate = cond_dst_update;
+ instruction_ext_nv.CondFlowEnable = cond_flow_update;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return instruction_ext_nv;
+}
+
+struct tgsi_instruction_ext_label
+tgsi_default_instruction_ext_label( void )
+{
+ struct tgsi_instruction_ext_label instruction_ext_label;
+
+ instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
+ instruction_ext_label.Label = 0;
+ instruction_ext_label.Padding = 0;
+ instruction_ext_label.Extended = 0;
+
+ return instruction_ext_label;
+}
+
+unsigned
+tgsi_compare_instruction_ext_label(
+ struct tgsi_instruction_ext_label a,
+ struct tgsi_instruction_ext_label b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_label
+tgsi_build_instruction_ext_label(
+ unsigned label,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction_ext_label instruction_ext_label;
+
+ instruction_ext_label = tgsi_default_instruction_ext_label();
+ instruction_ext_label.Label = label;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return instruction_ext_label;
+}
+
+struct tgsi_instruction_ext_texture
+tgsi_default_instruction_ext_texture( void )
+{
+ struct tgsi_instruction_ext_texture instruction_ext_texture;
+
+ instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
+ instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN;
+ instruction_ext_texture.Padding = 0;
+ instruction_ext_texture.Extended = 0;
+
+ return instruction_ext_texture;
+}
+
+unsigned
+tgsi_compare_instruction_ext_texture(
+ struct tgsi_instruction_ext_texture a,
+ struct tgsi_instruction_ext_texture b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_instruction_ext_texture
+tgsi_build_instruction_ext_texture(
+ unsigned texture,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_instruction_ext_texture instruction_ext_texture;
+
+ instruction_ext_texture = tgsi_default_instruction_ext_texture();
+ instruction_ext_texture.Texture = texture;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return instruction_ext_texture;
+}
+
+struct tgsi_src_register
+tgsi_default_src_register( void )
+{
+ struct tgsi_src_register src_register;
+
+ src_register.File = TGSI_FILE_NULL;
+ src_register.SwizzleX = TGSI_SWIZZLE_X;
+ src_register.SwizzleY = TGSI_SWIZZLE_Y;
+ src_register.SwizzleZ = TGSI_SWIZZLE_Z;
+ src_register.SwizzleW = TGSI_SWIZZLE_W;
+ src_register.Negate = 0;
+ src_register.Indirect = 0;
+ src_register.Dimension = 0;
+ src_register.Index = 0;
+ src_register.Extended = 0;
+
+ return src_register;
+}
+
+struct tgsi_src_register
+tgsi_build_src_register(
+ unsigned file,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w,
+ unsigned negate,
+ unsigned indirect,
+ unsigned dimension,
+ int index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_src_register src_register;
+
+ assert( file <= TGSI_FILE_IMMEDIATE );
+ assert( swizzle_x <= TGSI_SWIZZLE_W );
+ assert( swizzle_y <= TGSI_SWIZZLE_W );
+ assert( swizzle_z <= TGSI_SWIZZLE_W );
+ assert( swizzle_w <= TGSI_SWIZZLE_W );
+ assert( negate <= 1 );
+ assert( index >= -0x8000 && index <= 0x7FFF );
+
+ src_register = tgsi_default_src_register();
+ src_register.File = file;
+ src_register.SwizzleX = swizzle_x;
+ src_register.SwizzleY = swizzle_y;
+ src_register.SwizzleZ = swizzle_z;
+ src_register.SwizzleW = swizzle_w;
+ src_register.Negate = negate;
+ src_register.Indirect = indirect;
+ src_register.Dimension = dimension;
+ src_register.Index = index;
+
+ instruction_grow( instruction, header );
+
+ return src_register;
+}
+
+struct tgsi_full_src_register
+tgsi_default_full_src_register( void )
+{
+ struct tgsi_full_src_register full_src_register;
+
+ full_src_register.SrcRegister = tgsi_default_src_register();
+ full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz();
+ full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
+ full_src_register.SrcRegisterInd = tgsi_default_src_register();
+ full_src_register.SrcRegisterDim = tgsi_default_dimension();
+ full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
+
+ return full_src_register;
+}
+
+struct tgsi_src_register_ext_swz
+tgsi_default_src_register_ext_swz( void )
+{
+ struct tgsi_src_register_ext_swz src_register_ext_swz;
+
+ src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
+ src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X;
+ src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y;
+ src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z;
+ src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W;
+ src_register_ext_swz.NegateX = 0;
+ src_register_ext_swz.NegateY = 0;
+ src_register_ext_swz.NegateZ = 0;
+ src_register_ext_swz.NegateW = 0;
+ src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE;
+ src_register_ext_swz.Padding = 0;
+ src_register_ext_swz.Extended = 0;
+
+ return src_register_ext_swz;
+}
+
+unsigned
+tgsi_compare_src_register_ext_swz(
+ struct tgsi_src_register_ext_swz a,
+ struct tgsi_src_register_ext_swz b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_src_register_ext_swz
+tgsi_build_src_register_ext_swz(
+ unsigned ext_swizzle_x,
+ unsigned ext_swizzle_y,
+ unsigned ext_swizzle_z,
+ unsigned ext_swizzle_w,
+ unsigned negate_x,
+ unsigned negate_y,
+ unsigned negate_z,
+ unsigned negate_w,
+ unsigned ext_divide,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_src_register_ext_swz src_register_ext_swz;
+
+ assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE );
+ assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE );
+ assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE );
+ assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE );
+ assert( negate_x <= 1 );
+ assert( negate_y <= 1 );
+ assert( negate_z <= 1 );
+ assert( negate_w <= 1 );
+ assert( ext_divide <= TGSI_EXTSWIZZLE_ONE );
+
+ src_register_ext_swz = tgsi_default_src_register_ext_swz();
+ src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
+ src_register_ext_swz.ExtSwizzleY = ext_swizzle_y;
+ src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z;
+ src_register_ext_swz.ExtSwizzleW = ext_swizzle_w;
+ src_register_ext_swz.NegateX = negate_x;
+ src_register_ext_swz.NegateY = negate_y;
+ src_register_ext_swz.NegateZ = negate_z;
+ src_register_ext_swz.NegateW = negate_w;
+ src_register_ext_swz.ExtDivide = ext_divide;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return src_register_ext_swz;
+}
+
+struct tgsi_src_register_ext_mod
+tgsi_default_src_register_ext_mod( void )
+{
+ struct tgsi_src_register_ext_mod src_register_ext_mod;
+
+ src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
+ src_register_ext_mod.Complement = 0;
+ src_register_ext_mod.Bias = 0;
+ src_register_ext_mod.Scale2X = 0;
+ src_register_ext_mod.Absolute = 0;
+ src_register_ext_mod.Negate = 0;
+ src_register_ext_mod.Padding = 0;
+ src_register_ext_mod.Extended = 0;
+
+ return src_register_ext_mod;
+}
+
+unsigned
+tgsi_compare_src_register_ext_mod(
+ struct tgsi_src_register_ext_mod a,
+ struct tgsi_src_register_ext_mod b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_src_register_ext_mod
+tgsi_build_src_register_ext_mod(
+ unsigned complement,
+ unsigned bias,
+ unsigned scale_2x,
+ unsigned absolute,
+ unsigned negate,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_src_register_ext_mod src_register_ext_mod;
+
+ assert( complement <= 1 );
+ assert( bias <= 1 );
+ assert( scale_2x <= 1 );
+ assert( absolute <= 1 );
+ assert( negate <= 1 );
+
+ src_register_ext_mod = tgsi_default_src_register_ext_mod();
+ src_register_ext_mod.Complement = complement;
+ src_register_ext_mod.Bias = bias;
+ src_register_ext_mod.Scale2X = scale_2x;
+ src_register_ext_mod.Absolute = absolute;
+ src_register_ext_mod.Negate = negate;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return src_register_ext_mod;
+}
+
+struct tgsi_dimension
+tgsi_default_dimension( void )
+{
+ struct tgsi_dimension dimension;
+
+ dimension.Indirect = 0;
+ dimension.Dimension = 0;
+ dimension.Padding = 0;
+ dimension.Index = 0;
+ dimension.Extended = 0;
+
+ return dimension;
+}
+
+struct tgsi_dimension
+tgsi_build_dimension(
+ unsigned indirect,
+ unsigned index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_dimension dimension;
+
+ dimension = tgsi_default_dimension();
+ dimension.Indirect = indirect;
+ dimension.Index = index;
+
+ instruction_grow( instruction, header );
+
+ return dimension;
+}
+
+struct tgsi_dst_register
+tgsi_default_dst_register( void )
+{
+ struct tgsi_dst_register dst_register;
+
+ dst_register.File = TGSI_FILE_NULL;
+ dst_register.WriteMask = TGSI_WRITEMASK_XYZW;
+ dst_register.Indirect = 0;
+ dst_register.Dimension = 0;
+ dst_register.Index = 0;
+ dst_register.Padding = 0;
+ dst_register.Extended = 0;
+
+ return dst_register;
+}
+
+struct tgsi_dst_register
+tgsi_build_dst_register(
+ unsigned file,
+ unsigned mask,
+ int index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_dst_register dst_register;
+
+ assert( file <= TGSI_FILE_IMMEDIATE );
+ assert( mask <= TGSI_WRITEMASK_XYZW );
+ assert( index >= -32768 && index <= 32767 );
+
+ dst_register = tgsi_default_dst_register();
+ dst_register.File = file;
+ dst_register.WriteMask = mask;
+ dst_register.Index = index;
+
+ instruction_grow( instruction, header );
+
+ return dst_register;
+}
+
+struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void )
+{
+ struct tgsi_full_dst_register full_dst_register;
+
+ full_dst_register.DstRegister = tgsi_default_dst_register();
+ full_dst_register.DstRegisterExtConcode =
+ tgsi_default_dst_register_ext_concode();
+ full_dst_register.DstRegisterExtModulate =
+ tgsi_default_dst_register_ext_modulate();
+
+ return full_dst_register;
+}
+
+struct tgsi_dst_register_ext_concode
+tgsi_default_dst_register_ext_concode( void )
+{
+ struct tgsi_dst_register_ext_concode dst_register_ext_concode;
+
+ dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE;
+ dst_register_ext_concode.CondMask = TGSI_CC_TR;
+ dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X;
+ dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y;
+ dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z;
+ dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W;
+ dst_register_ext_concode.CondSrcIndex = 0;
+ dst_register_ext_concode.Padding = 0;
+ dst_register_ext_concode.Extended = 0;
+
+ return dst_register_ext_concode;
+}
+
+unsigned
+tgsi_compare_dst_register_ext_concode(
+ struct tgsi_dst_register_ext_concode a,
+ struct tgsi_dst_register_ext_concode b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_dst_register_ext_concode
+tgsi_build_dst_register_ext_concode(
+ unsigned cc,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w,
+ int index,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_dst_register_ext_concode dst_register_ext_concode;
+
+ assert( cc <= TGSI_CC_FL );
+ assert( swizzle_x <= TGSI_SWIZZLE_W );
+ assert( swizzle_y <= TGSI_SWIZZLE_W );
+ assert( swizzle_z <= TGSI_SWIZZLE_W );
+ assert( swizzle_w <= TGSI_SWIZZLE_W );
+ assert( index >= -32768 && index <= 32767 );
+
+ dst_register_ext_concode = tgsi_default_dst_register_ext_concode();
+ dst_register_ext_concode.CondMask = cc;
+ dst_register_ext_concode.CondSwizzleX = swizzle_x;
+ dst_register_ext_concode.CondSwizzleY = swizzle_y;
+ dst_register_ext_concode.CondSwizzleZ = swizzle_z;
+ dst_register_ext_concode.CondSwizzleW = swizzle_w;
+ dst_register_ext_concode.CondSrcIndex = index;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return dst_register_ext_concode;
+}
+
+struct tgsi_dst_register_ext_modulate
+tgsi_default_dst_register_ext_modulate( void )
+{
+ struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
+
+ dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE;
+ dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X;
+ dst_register_ext_modulate.Padding = 0;
+ dst_register_ext_modulate.Extended = 0;
+
+ return dst_register_ext_modulate;
+}
+
+unsigned
+tgsi_compare_dst_register_ext_modulate(
+ struct tgsi_dst_register_ext_modulate a,
+ struct tgsi_dst_register_ext_modulate b )
+{
+ a.Padding = b.Padding = 0;
+ a.Extended = b.Extended = 0;
+ return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32;
+}
+
+struct tgsi_dst_register_ext_modulate
+tgsi_build_dst_register_ext_modulate(
+ unsigned modulate,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header )
+{
+ struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
+
+ assert( modulate <= TGSI_MODULATE_EIGHTH );
+
+ dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate();
+ dst_register_ext_modulate.Modulate = modulate;
+
+ prev_token->Extended = 1;
+ instruction_grow( instruction, header );
+
+ return dst_register_ext_modulate;
+}
+
--- /dev/null
+#if !defined TGSI_BUILD_H
+#define TGSI_BUILD_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+/*
+ * version
+ */
+
+struct tgsi_version
+tgsi_build_version( void );
+
+/*
+ * header
+ */
+
+struct tgsi_header
+tgsi_build_header( void );
+
+struct tgsi_processor
+tgsi_default_processor( void );
+
+struct tgsi_processor
+tgsi_build_processor(
+ unsigned processor,
+ struct tgsi_header *header );
+
+/*
+ * declaration
+ */
+
+struct tgsi_declaration
+tgsi_default_declaration( void );
+
+struct tgsi_declaration
+tgsi_build_declaration(
+ unsigned file,
+ unsigned declare,
+ unsigned usage_mask,
+ unsigned interpolate,
+ unsigned semantic,
+ struct tgsi_header *header );
+
+struct tgsi_full_declaration
+tgsi_default_full_declaration( void );
+
+unsigned
+tgsi_build_full_declaration(
+ const struct tgsi_full_declaration *full_decl,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize );
+
+struct tgsi_declaration_range
+tgsi_build_declaration_range(
+ unsigned first,
+ unsigned last,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header );
+
+struct tgsi_declaration_mask
+tgsi_build_declaration_mask(
+ unsigned mask,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header );
+
+struct tgsi_declaration_interpolation
+tgsi_default_declaration_interpolation( void );
+
+struct tgsi_declaration_interpolation
+tgsi_build_declaration_interpolation(
+ unsigned interpolate,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header );
+
+struct tgsi_declaration_semantic
+tgsi_default_declaration_semantic( void );
+
+struct tgsi_declaration_semantic
+tgsi_build_declaration_semantic(
+ unsigned semantic_name,
+ unsigned semantic_index,
+ struct tgsi_declaration *declaration,
+ struct tgsi_header *header );
+
+/*
+ * immediate
+ */
+
+struct tgsi_immediate
+tgsi_default_immediate( void );
+
+struct tgsi_immediate
+tgsi_build_immediate(
+ struct tgsi_header *header );
+
+struct tgsi_full_immediate
+tgsi_default_full_immediate( void );
+
+struct tgsi_immediate_float32
+tgsi_build_immediate_float32(
+ float value,
+ struct tgsi_immediate *immediate,
+ struct tgsi_header *header );
+
+unsigned
+tgsi_build_full_immediate(
+ const struct tgsi_full_immediate *full_imm,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize );
+
+/*
+ * instruction
+ */
+
+struct tgsi_instruction
+tgsi_default_instruction( void );
+
+struct tgsi_instruction
+tgsi_build_instruction(
+ unsigned opcode,
+ unsigned saturate,
+ unsigned num_dst_regs,
+ unsigned num_src_regs,
+ struct tgsi_header *header );
+
+struct tgsi_full_instruction
+tgsi_default_full_instruction( void );
+
+unsigned
+tgsi_build_full_instruction(
+ const struct tgsi_full_instruction *full_inst,
+ struct tgsi_token *tokens,
+ struct tgsi_header *header,
+ unsigned maxsize );
+
+struct tgsi_instruction_ext_nv
+tgsi_default_instruction_ext_nv( void );
+
+unsigned
+tgsi_compare_instruction_ext_nv(
+ struct tgsi_instruction_ext_nv a,
+ struct tgsi_instruction_ext_nv b );
+
+struct tgsi_instruction_ext_nv
+tgsi_build_instruction_ext_nv(
+ unsigned precision,
+ unsigned cond_dst_index,
+ unsigned cond_flow_index,
+ unsigned cond_mask,
+ unsigned cond_swizzle_x,
+ unsigned cond_swizzle_y,
+ unsigned cond_swizzle_z,
+ unsigned cond_swizzle_w,
+ unsigned cond_dst_update,
+ unsigned cond_flow_update,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_instruction_ext_label
+tgsi_default_instruction_ext_label( void );
+
+unsigned
+tgsi_compare_instruction_ext_label(
+ struct tgsi_instruction_ext_label a,
+ struct tgsi_instruction_ext_label b );
+
+struct tgsi_instruction_ext_label
+tgsi_build_instruction_ext_label(
+ unsigned label,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_instruction_ext_texture
+tgsi_default_instruction_ext_texture( void );
+
+unsigned
+tgsi_compare_instruction_ext_texture(
+ struct tgsi_instruction_ext_texture a,
+ struct tgsi_instruction_ext_texture b );
+
+struct tgsi_instruction_ext_texture
+tgsi_build_instruction_ext_texture(
+ unsigned texture,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_src_register
+tgsi_default_src_register( void );
+
+struct tgsi_src_register
+tgsi_build_src_register(
+ unsigned file,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w,
+ unsigned negate,
+ unsigned indirect,
+ unsigned dimension,
+ int index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_full_src_register
+tgsi_default_full_src_register( void );
+
+struct tgsi_src_register_ext_swz
+tgsi_default_src_register_ext_swz( void );
+
+unsigned
+tgsi_compare_src_register_ext_swz(
+ struct tgsi_src_register_ext_swz a,
+ struct tgsi_src_register_ext_swz b );
+
+struct tgsi_src_register_ext_swz
+tgsi_build_src_register_ext_swz(
+ unsigned ext_swizzle_x,
+ unsigned ext_swizzle_y,
+ unsigned ext_swizzle_z,
+ unsigned ext_swizzle_w,
+ unsigned negate_x,
+ unsigned negate_y,
+ unsigned negate_z,
+ unsigned negate_w,
+ unsigned ext_divide,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_src_register_ext_mod
+tgsi_default_src_register_ext_mod( void );
+
+unsigned
+tgsi_compare_src_register_ext_mod(
+ struct tgsi_src_register_ext_mod a,
+ struct tgsi_src_register_ext_mod b );
+
+struct tgsi_src_register_ext_mod
+tgsi_build_src_register_ext_mod(
+ unsigned complement,
+ unsigned bias,
+ unsigned scale_2x,
+ unsigned absolute,
+ unsigned negate,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_dimension
+tgsi_default_dimension( void );
+
+struct tgsi_dimension
+tgsi_build_dimension(
+ unsigned indirect,
+ unsigned index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_dst_register
+tgsi_default_dst_register( void );
+
+struct tgsi_dst_register
+tgsi_build_dst_register(
+ unsigned file,
+ unsigned mask,
+ int index,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_full_dst_register
+tgsi_default_full_dst_register( void );
+
+struct tgsi_dst_register_ext_concode
+tgsi_default_dst_register_ext_concode( void );
+
+unsigned
+tgsi_compare_dst_register_ext_concode(
+ struct tgsi_dst_register_ext_concode a,
+ struct tgsi_dst_register_ext_concode b );
+
+struct tgsi_dst_register_ext_concode
+tgsi_build_dst_register_ext_concode(
+ unsigned cc,
+ unsigned swizzle_x,
+ unsigned swizzle_y,
+ unsigned swizzle_z,
+ unsigned swizzle_w,
+ int index,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+struct tgsi_dst_register_ext_modulate
+tgsi_default_dst_register_ext_modulate( void );
+
+unsigned
+tgsi_compare_dst_register_ext_modulate(
+ struct tgsi_dst_register_ext_modulate a,
+ struct tgsi_dst_register_ext_modulate b );
+
+struct tgsi_dst_register_ext_modulate
+tgsi_build_dst_register_ext_modulate(
+ unsigned modulate,
+ struct tgsi_token *prev_token,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_BUILD_H
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdio.h>
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_dump.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+struct gen_dump
+{
+ unsigned tabs;
+ void (* write)(
+ struct gen_dump *dump,
+ const void *data,
+ unsigned size );
+};
+
+struct text_dump
+{
+ struct gen_dump base;
+ char *text;
+ unsigned length;
+ unsigned capacity;
+};
+
+static void
+_text_dump_write(
+ struct gen_dump *dump,
+ const void *data,
+ unsigned size )
+{
+ struct text_dump *td = (struct text_dump *) dump;
+ unsigned new_length = td->length + size;
+
+ if( new_length >= td->capacity ) {
+ unsigned new_capacity = td->capacity;
+
+ do {
+ if( new_capacity == 0 ) {
+ new_capacity = 256;
+ }
+ else {
+ new_capacity *= 2;
+ }
+ } while( new_length >= new_capacity );
+ td->text = (char *) REALLOC(
+ td->text,
+ td->capacity,
+ new_capacity );
+ td->capacity = new_capacity;
+ }
+ memcpy(
+ &td->text[td->length],
+ data,
+ size );
+ td->length = new_length;
+ td->text[td->length] = '\0';
+}
+
+struct file_dump
+{
+ struct gen_dump base;
+ FILE *file;
+};
+
+static void
+_file_dump_write(
+ struct gen_dump *dump,
+ const void *data,
+ unsigned size )
+{
+ struct file_dump *fd = (struct file_dump *) dump;
+
+#if 0
+ fwrite( data, 1, size, fd->file );
+#else
+ {
+ unsigned i;
+
+ for (i = 0; i < size; i++ ) {
+ fprintf( fd->file, "%c", ((const char *) data)[i] );
+ }
+ }
+#endif
+}
+
+static void
+gen_dump_str(
+ struct gen_dump *dump,
+ const char *str )
+{
+ unsigned i;
+ size_t len = strlen( str );
+
+ for (i = 0; i < len; i++) {
+ dump->write( dump, &str[i], 1 );
+ if (str[i] == '\n') {
+ unsigned i;
+
+ for (i = 0; i < dump->tabs; i++) {
+ dump->write( dump, " ", 4 );
+ }
+ }
+ }
+}
+
+static void
+gen_dump_chr(
+ struct gen_dump *dump,
+ const char chr )
+{
+ dump->write( dump, &chr, 1 );
+}
+
+static void
+gen_dump_uix(
+ struct gen_dump *dump,
+ const unsigned ui )
+{
+ char str[36];
+
+ sprintf( str, "0x%x", ui );
+ gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_uid(
+ struct gen_dump *dump,
+ const unsigned ui )
+{
+ char str[16];
+
+ sprintf( str, "%u", ui );
+ gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_sid(
+ struct gen_dump *dump,
+ const int si )
+{
+ char str[16];
+
+ sprintf( str, "%d", si );
+ gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_flt(
+ struct gen_dump *dump,
+ const float flt )
+{
+ char str[48];
+
+ sprintf( str, "%10.4f", flt );
+ gen_dump_str( dump, str );
+}
+
+static void
+gen_dump_enum(
+ struct gen_dump *dump,
+ const unsigned e,
+ const char **enums,
+ const unsigned enums_count )
+{
+ if (e >= enums_count) {
+ gen_dump_uid( dump, e );
+ }
+ else {
+ gen_dump_str( dump, enums[e] );
+ }
+}
+
+static void
+gen_dump_tab(
+ struct gen_dump *dump )
+{
+ ++dump->tabs;
+}
+
+static void
+gen_dump_untab(
+ struct gen_dump *dump )
+{
+ assert( dump->tabs > 0 );
+
+ --dump->tabs;
+}
+
+#define TXT(S) gen_dump_str( dump, S )
+#define CHR(C) gen_dump_chr( dump, C )
+#define UIX(I) gen_dump_uix( dump, I )
+#define UID(I) gen_dump_uid( dump, I )
+#define SID(I) gen_dump_sid( dump, I )
+#define FLT(F) gen_dump_flt( dump, F )
+#define TAB() gen_dump_tab( dump )
+#define UNT() gen_dump_untab( dump )
+#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )
+
+static const char *TGSI_PROCESSOR_TYPES[] =
+{
+ "PROCESSOR_FRAGMENT",
+ "PROCESSOR_VERTEX",
+ "PROCESSOR_GEOMETRY"
+};
+
+static const char *TGSI_PROCESSOR_TYPES_SHORT[] =
+{
+ "FRAG",
+ "VERT",
+ "GEOM"
+};
+
+static const char *TGSI_TOKEN_TYPES[] =
+{
+ "TOKEN_TYPE_DECLARATION",
+ "TOKEN_TYPE_IMMEDIATE",
+ "TOKEN_TYPE_INSTRUCTION"
+};
+
+static const char *TGSI_FILES[] =
+{
+ "FILE_NULL",
+ "FILE_CONSTANT",
+ "FILE_INPUT",
+ "FILE_OUTPUT",
+ "FILE_TEMPORARY",
+ "FILE_SAMPLER",
+ "FILE_ADDRESS",
+ "FILE_IMMEDIATE"
+};
+
+static const char *TGSI_FILES_SHORT[] =
+{
+ "NULL",
+ "CONST",
+ "IN",
+ "OUT",
+ "TEMP",
+ "SAMP",
+ "ADDR",
+ "IMM"
+};
+
+static const char *TGSI_DECLARES[] =
+{
+ "DECLARE_RANGE",
+ "DECLARE_MASK"
+};
+
+static const char *TGSI_INTERPOLATES[] =
+{
+ "INTERPOLATE_CONSTANT",
+ "INTERPOLATE_LINEAR",
+ "INTERPOLATE_PERSPECTIVE",
+ "INTERPOLATE_ATTRIB"
+};
+
+static const char *TGSI_INTERPOLATES_SHORT[] =
+{
+ "CONSTANT",
+ "LINEAR",
+ "PERSPECTIVE",
+ "ATTRIB"
+};
+
+static const char *TGSI_SEMANTICS[] =
+{
+ "SEMANTIC_POSITION",
+ "SEMANTIC_COLOR",
+ "SEMANTIC_BCOLOR",
+ "SEMANTIC_FOG",
+ "SEMANTIC_PSIZE",
+ "SEMANTIC_GENERIC,"
+};
+
+static const char *TGSI_SEMANTICS_SHORT[] =
+{
+ "POSITION",
+ "COLOR",
+ "BCOLOR",
+ "FOG",
+ "PSIZE",
+ "GENERIC",
+};
+
+static const char *TGSI_IMMS[] =
+{
+ "IMM_FLOAT32"
+};
+
+static const char *TGSI_IMMS_SHORT[] =
+{
+ "FLT32"
+};
+
+static const char *TGSI_OPCODES[] =
+{
+ "OPCODE_ARL",
+ "OPCODE_MOV",
+ "OPCODE_LIT",
+ "OPCODE_RCP",
+ "OPCODE_RSQ",
+ "OPCODE_EXP",
+ "OPCODE_LOG",
+ "OPCODE_MUL",
+ "OPCODE_ADD",
+ "OPCODE_DP3",
+ "OPCODE_DP4",
+ "OPCODE_DST",
+ "OPCODE_MIN",
+ "OPCODE_MAX",
+ "OPCODE_SLT",
+ "OPCODE_SGE",
+ "OPCODE_MAD",
+ "OPCODE_SUB",
+ "OPCODE_LERP",
+ "OPCODE_CND",
+ "OPCODE_CND0",
+ "OPCODE_DOT2ADD",
+ "OPCODE_INDEX",
+ "OPCODE_NEGATE",
+ "OPCODE_FRAC",
+ "OPCODE_CLAMP",
+ "OPCODE_FLOOR",
+ "OPCODE_ROUND",
+ "OPCODE_EXPBASE2",
+ "OPCODE_LOGBASE2",
+ "OPCODE_POWER",
+ "OPCODE_CROSSPRODUCT",
+ "OPCODE_MULTIPLYMATRIX",
+ "OPCODE_ABS",
+ "OPCODE_RCC",
+ "OPCODE_DPH",
+ "OPCODE_COS",
+ "OPCODE_DDX",
+ "OPCODE_DDY",
+ "OPCODE_KILP",
+ "OPCODE_PK2H",
+ "OPCODE_PK2US",
+ "OPCODE_PK4B",
+ "OPCODE_PK4UB",
+ "OPCODE_RFL",
+ "OPCODE_SEQ",
+ "OPCODE_SFL",
+ "OPCODE_SGT",
+ "OPCODE_SIN",
+ "OPCODE_SLE",
+ "OPCODE_SNE",
+ "OPCODE_STR",
+ "OPCODE_TEX",
+ "OPCODE_TXD",
+ "OPCODE_UP2H",
+ "OPCODE_UP2US",
+ "OPCODE_UP4B",
+ "OPCODE_UP4UB",
+ "OPCODE_X2D",
+ "OPCODE_ARA",
+ "OPCODE_ARR",
+ "OPCODE_BRA",
+ "OPCODE_CAL",
+ "OPCODE_RET",
+ "OPCODE_SSG",
+ "OPCODE_CMP",
+ "OPCODE_SCS",
+ "OPCODE_TXB",
+ "OPCODE_NRM",
+ "OPCODE_DIV",
+ "OPCODE_DP2",
+ "OPCODE_TXL",
+ "OPCODE_BRK",
+ "OPCODE_IF",
+ "OPCODE_LOOP",
+ "OPCODE_REP",
+ "OPCODE_ELSE",
+ "OPCODE_ENDIF",
+ "OPCODE_ENDLOOP",
+ "OPCODE_ENDREP",
+ "OPCODE_PUSHA",
+ "OPCODE_POPA",
+ "OPCODE_CEIL",
+ "OPCODE_I2F",
+ "OPCODE_NOT",
+ "OPCODE_TRUNC",
+ "OPCODE_SHL",
+ "OPCODE_SHR",
+ "OPCODE_AND",
+ "OPCODE_OR",
+ "OPCODE_MOD",
+ "OPCODE_XOR",
+ "OPCODE_SAD",
+ "OPCODE_TXF",
+ "OPCODE_TXQ",
+ "OPCODE_CONT",
+ "OPCODE_EMIT",
+ "OPCODE_ENDPRIM",
+ "OPCODE_BGNLOOP2",
+ "OPCODE_BGNSUB",
+ "OPCODE_ENDLOOP2",
+ "OPCODE_ENDSUB",
+ "OPCODE_NOISE1",
+ "OPCODE_NOISE2",
+ "OPCODE_NOISE3",
+ "OPCODE_NOISE4",
+ "OPCODE_NOP",
+ "OPCODE_TEXBEM",
+ "OPCODE_TEXBEML",
+ "OPCODE_TEXREG2AR",
+ "OPCODE_TEXM3X2PAD",
+ "OPCODE_TEXM3X2TEX",
+ "OPCODE_TEXM3X3PAD",
+ "OPCODE_TEXM3X3TEX",
+ "OPCODE_TEXM3X3SPEC",
+ "OPCODE_TEXM3X3VSPEC",
+ "OPCODE_TEXREG2GB",
+ "OPCODE_TEXREG2RGB",
+ "OPCODE_TEXDP3TEX",
+ "OPCODE_TEXDP3",
+ "OPCODE_TEXM3X3",
+ "OPCODE_TEXM3X2DEPTH",
+ "OPCODE_TEXDEPTH",
+ "OPCODE_BEM",
+ "OPCODE_M4X3",
+ "OPCODE_M3X4",
+ "OPCODE_M3X3",
+ "OPCODE_M3X2",
+ "OPCODE_NRM4",
+ "OPCODE_CALLNZ",
+ "OPCODE_IFC",
+ "OPCODE_BREAKC",
+ "OPCODE_KIL",
+ "OPCODE_END"
+};
+
+static const char *TGSI_OPCODES_SHORT[] =
+{
+ "ARL",
+ "MOV",
+ "LIT",
+ "RCP",
+ "RSQ",
+ "EXP",
+ "LOG",
+ "MUL",
+ "ADD",
+ "DP3",
+ "DP4",
+ "DST",
+ "MIN",
+ "MAX",
+ "SLT",
+ "SGE",
+ "MAD",
+ "SUB",
+ "LERP",
+ "CND",
+ "CND0",
+ "DOT2ADD",
+ "INDEX",
+ "NEGATE",
+ "FRAC",
+ "CLAMP",
+ "FLOOR",
+ "ROUND",
+ "EXPBASE2",
+ "LOGBASE2",
+ "POWER",
+ "CROSSPRODUCT",
+ "MULTIPLYMATRIX",
+ "ABS",
+ "RCC",
+ "DPH",
+ "COS",
+ "DDX",
+ "DDY",
+ "KILP",
+ "PK2H",
+ "PK2US",
+ "PK4B",
+ "PK4UB",
+ "RFL",
+ "SEQ",
+ "SFL",
+ "SGT",
+ "SIN",
+ "SLE",
+ "SNE",
+ "STR",
+ "TEX",
+ "TXD",
+ "UP2H",
+ "UP2US",
+ "UP4B",
+ "UP4UB",
+ "X2D",
+ "ARA",
+ "ARR",
+ "BRA",
+ "CAL",
+ "RET",
+ "SSG",
+ "CMP",
+ "SCS",
+ "TXB",
+ "NRM",
+ "DIV",
+ "DP2",
+ "TXL",
+ "BRK",
+ "IF",
+ "LOOP",
+ "REP",
+ "ELSE",
+ "ENDIF",
+ "ENDLOOP",
+ "ENDREP",
+ "PUSHA",
+ "POPA",
+ "CEIL",
+ "I2F",
+ "NOT",
+ "TRUNC",
+ "SHL",
+ "SHR",
+ "AND",
+ "OR",
+ "MOD",
+ "XOR",
+ "SAD",
+ "TXF",
+ "TXQ",
+ "CONT",
+ "EMIT",
+ "ENDPRIM",
+ "BGNLOOP2",
+ "BGNSUB",
+ "ENDLOOP2",
+ "ENDSUB",
+ "NOISE1",
+ "NOISE2",
+ "NOISE3",
+ "NOISE4",
+ "NOP",
+ "TEXBEM",
+ "TEXBEML",
+ "TEXREG2AR",
+ "TEXM3X2PAD",
+ "TEXM3X2TEX",
+ "TEXM3X3PAD",
+ "TEXM3X3TEX",
+ "TEXM3X3SPEC",
+ "TEXM3X3VSPEC",
+ "TEXREG2GB",
+ "TEXREG2RGB",
+ "TEXDP3TEX",
+ "TEXDP3",
+ "TEXM3X3",
+ "TEXM3X2DEPTH",
+ "TEXDEPTH",
+ "BEM",
+ "M4X3",
+ "M3X4",
+ "M3X3",
+ "M3X2",
+ "NRM4",
+ "CALLNZ",
+ "IFC",
+ "BREAKC",
+ "KIL",
+ "END"
+};
+
+static const char *TGSI_SATS[] =
+{
+ "SAT_NONE",
+ "SAT_ZERO_ONE",
+ "SAT_MINUS_PLUS_ONE"
+};
+
+static const char *TGSI_INSTRUCTION_EXTS[] =
+{
+ "INSTRUCTION_EXT_TYPE_NV",
+ "INSTRUCTION_EXT_TYPE_LABEL",
+ "INSTRUCTION_EXT_TYPE_TEXTURE"
+};
+
+static const char *TGSI_PRECISIONS[] =
+{
+ "PRECISION_DEFAULT",
+ "TGSI_PRECISION_FLOAT32",
+ "TGSI_PRECISION_FLOAT16",
+ "TGSI_PRECISION_FIXED12"
+};
+
+static const char *TGSI_CCS[] =
+{
+ "CC_GT",
+ "CC_EQ",
+ "CC_LT",
+ "CC_UN",
+ "CC_GE",
+ "CC_LE",
+ "CC_NE",
+ "CC_TR",
+ "CC_FL"
+};
+
+static const char *TGSI_SWIZZLES[] =
+{
+ "SWIZZLE_X",
+ "SWIZZLE_Y",
+ "SWIZZLE_Z",
+ "SWIZZLE_W"
+};
+
+static const char *TGSI_SWIZZLES_SHORT[] =
+{
+ "x",
+ "y",
+ "z",
+ "w"
+};
+
+static const char *TGSI_TEXTURES[] =
+{
+ "TEXTURE_UNKNOWN",
+ "TEXTURE_1D",
+ "TEXTURE_2D",
+ "TEXTURE_3D",
+ "TEXTURE_CUBE",
+ "TEXTURE_RECT",
+ "TEXTURE_SHADOW1D",
+ "TEXTURE_SHADOW2D",
+ "TEXTURE_SHADOWRECT"
+};
+
+static const char *TGSI_SRC_REGISTER_EXTS[] =
+{
+ "SRC_REGISTER_EXT_TYPE_SWZ",
+ "SRC_REGISTER_EXT_TYPE_MOD"
+};
+
+static const char *TGSI_EXTSWIZZLES[] =
+{
+ "EXTSWIZZLE_X",
+ "EXTSWIZZLE_Y",
+ "EXTSWIZZLE_Z",
+ "EXTSWIZZLE_W",
+ "EXTSWIZZLE_ZERO",
+ "EXTSWIZZLE_ONE"
+};
+
+static const char *TGSI_EXTSWIZZLES_SHORT[] =
+{
+ "x",
+ "y",
+ "z",
+ "w",
+ "0",
+ "1"
+};
+
+static const char *TGSI_WRITEMASKS[] =
+{
+ "0",
+ "WRITEMASK_X",
+ "WRITEMASK_Y",
+ "WRITEMASK_XY",
+ "WRITEMASK_Z",
+ "WRITEMASK_XZ",
+ "WRITEMASK_YZ",
+ "WRITEMASK_XYZ",
+ "WRITEMASK_W",
+ "WRITEMASK_XW",
+ "WRITEMASK_YW",
+ "WRITEMASK_XYW",
+ "WRITEMASK_ZW",
+ "WRITEMASK_XZW",
+ "WRITEMASK_YZW",
+ "WRITEMASK_XYZW"
+};
+
+static const char *TGSI_DST_REGISTER_EXTS[] =
+{
+ "DST_REGISTER_EXT_TYPE_CONDCODE",
+ "DST_REGISTER_EXT_TYPE_MODULATE"
+};
+
+static const char *TGSI_MODULATES[] =
+{
+ "MODULATE_1X",
+ "MODULATE_2X",
+ "MODULATE_4X",
+ "MODULATE_8X",
+ "MODULATE_HALF",
+ "MODULATE_QUARTER",
+ "MODULATE_EIGHTH"
+};
+
+static void
+dump_declaration_short(
+ struct gen_dump *dump,
+ struct tgsi_full_declaration *decl )
+{
+ TXT( "\nDCL " );
+ ENM( decl->Declaration.File, TGSI_FILES_SHORT );
+
+ switch( decl->Declaration.Declare ) {
+ case TGSI_DECLARE_RANGE:
+ CHR( '[' );
+ UID( decl->u.DeclarationRange.First );
+ if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) {
+ TXT( ".." );
+ UID( decl->u.DeclarationRange.Last );
+ }
+ CHR( ']' );
+ break;
+ default:
+ assert( 0 );
+ }
+
+ if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) {
+ CHR( '.' );
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+ CHR( 'x' );
+ }
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+ CHR( 'y' );
+ }
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+ CHR( 'z' );
+ }
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+ CHR( 'w' );
+ }
+ }
+
+ if( decl->Declaration.Interpolate ) {
+ TXT( ", " );
+ ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT );
+ }
+
+ if( decl->Declaration.Semantic ) {
+ TXT( ", " );
+ ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT );
+ CHR( '[' );
+ UID( decl->Semantic.SemanticIndex );
+ CHR( ']' );
+ }
+}
+
+static void
+dump_declaration_verbose(
+ struct gen_dump *dump,
+ struct tgsi_full_declaration *decl,
+ unsigned ignored,
+ unsigned deflt,
+ struct tgsi_full_declaration *fd )
+{
+ TXT( "\nFile : " );
+ ENM( decl->Declaration.File, TGSI_FILES );
+ TXT( "\nDeclare : " );
+ ENM( decl->Declaration.Declare, TGSI_DECLARES );
+ if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) {
+ TXT( "\nUsageMask : " );
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) {
+ CHR( 'X' );
+ }
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) {
+ CHR( 'Y' );
+ }
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) {
+ CHR( 'Z' );
+ }
+ if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) {
+ CHR( 'W' );
+ }
+ }
+ if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) {
+ TXT( "\nInterpolate: " );
+ UID( decl->Declaration.Interpolate );
+ }
+ if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) {
+ TXT( "\nSemantic : " );
+ UID( decl->Declaration.Semantic );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( decl->Declaration.Padding );
+ }
+
+ CHR( '\n' );
+ switch( decl->Declaration.Declare ) {
+ case TGSI_DECLARE_RANGE:
+ TXT( "\nFirst: " );
+ UID( decl->u.DeclarationRange.First );
+ TXT( "\nLast : " );
+ UID( decl->u.DeclarationRange.Last );
+ break;
+
+ case TGSI_DECLARE_MASK:
+ TXT( "\nMask: " );
+ UIX( decl->u.DeclarationMask.Mask );
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ if( decl->Declaration.Interpolate ) {
+ CHR( '\n' );
+ TXT( "\nInterpolate: " );
+ ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES );
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( decl->Interpolation.Padding );
+ }
+ }
+
+ if( decl->Declaration.Semantic ) {
+ CHR( '\n' );
+ TXT( "\nSemanticName : " );
+ ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
+ TXT( "\nSemanticIndex: " );
+ UID( decl->Semantic.SemanticIndex );
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( decl->Semantic.Padding );
+ }
+ }
+}
+
+static void
+dump_immediate_short(
+ struct gen_dump *dump,
+ struct tgsi_full_immediate *imm )
+{
+ unsigned i;
+
+ TXT( "\nIMM " );
+ ENM( imm->Immediate.DataType, TGSI_IMMS_SHORT );
+
+ TXT( " { " );
+ for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ switch( imm->Immediate.DataType ) {
+ case TGSI_IMM_FLOAT32:
+ FLT( imm->u.ImmediateFloat32[i].Float );
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ if( i < imm->Immediate.Size - 2 ) {
+ TXT( ", " );
+ }
+ }
+ TXT( " }" );
+}
+
+static void
+dump_immediate_verbose(
+ struct gen_dump *dump,
+ struct tgsi_full_immediate *imm,
+ unsigned ignored )
+{
+ unsigned i;
+
+ TXT( "\nDataType : " );
+ ENM( imm->Immediate.DataType, TGSI_IMMS );
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( imm->Immediate.Padding );
+ }
+
+ for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ CHR( '\n' );
+ switch( imm->Immediate.DataType ) {
+ case TGSI_IMM_FLOAT32:
+ TXT( "\nFloat: " );
+ FLT( imm->u.ImmediateFloat32[i].Float );
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+}
+
+static void
+dump_instruction_short(
+ struct gen_dump *dump,
+ struct tgsi_full_instruction *inst,
+ unsigned instno )
+{
+ unsigned i;
+ boolean first_reg = TRUE;
+
+ CHR( '\n' );
+ UID( instno );
+ CHR( ':' );
+ ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT );
+
+ switch( inst->Instruction.Saturate ) {
+ case TGSI_SAT_NONE:
+ break;
+ case TGSI_SAT_ZERO_ONE:
+ TXT( "_SAT" );
+ break;
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ TXT( "_SAT[-1,1]" );
+ break;
+ default:
+ assert( 0 );
+ }
+
+ for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+
+ if( !first_reg ) {
+ CHR( ',' );
+ }
+ CHR( ' ' );
+
+ ENM( dst->DstRegister.File, TGSI_FILES_SHORT );
+
+ CHR( '[' );
+ SID( dst->DstRegister.Index );
+ CHR( ']' );
+
+ if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) {
+ CHR( '.' );
+ if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) {
+ CHR( 'x' );
+ }
+ if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Y ) {
+ CHR( 'y' );
+ }
+ if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Z ) {
+ CHR( 'z' );
+ }
+ if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_W ) {
+ CHR( 'w' );
+ }
+ }
+
+ first_reg = FALSE;
+ }
+
+ for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+
+ if( !first_reg ) {
+ CHR( ',' );
+ }
+ CHR( ' ' );
+
+ if( src->SrcRegisterExtMod.Complement ) {
+ TXT( "(1 - " );
+ }
+ if( src->SrcRegisterExtMod.Negate ) {
+ CHR( '-' );
+ }
+ if( src->SrcRegisterExtMod.Absolute ) {
+ CHR( '|' );
+ }
+ if( src->SrcRegister.Negate ) {
+ CHR( '-' );
+ }
+
+ ENM( src->SrcRegister.File, TGSI_FILES_SHORT );
+
+ CHR( '[' );
+ SID( src->SrcRegister.Index );
+ CHR( ']' );
+
+ if (src->SrcRegister.Extended) {
+ if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
+ src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
+ src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
+ src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
+ CHR( '.' );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT );
+ }
+ }
+ else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
+ src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
+ src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
+ src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) {
+ CHR( '.' );
+ ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT );
+ ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT );
+ ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT );
+ ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT );
+ }
+
+ if( src->SrcRegisterExtMod.Absolute ) {
+ CHR( '|' );
+ }
+ if( src->SrcRegisterExtMod.Complement ) {
+ CHR( ')' );
+ }
+
+ first_reg = FALSE;
+ }
+
+ switch( inst->Instruction.Opcode ) {
+ case TGSI_OPCODE_IF:
+ case TGSI_OPCODE_ELSE:
+ case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_CAL:
+ TXT( " :" );
+ UID( inst->InstructionExtLabel.Label );
+ break;
+ }
+}
+
+static void
+dump_instruction_verbose(
+ struct gen_dump *dump,
+ struct tgsi_full_instruction *inst,
+ unsigned ignored,
+ unsigned deflt,
+ struct tgsi_full_instruction *fi )
+{
+ unsigned i;
+
+ TXT( "\nOpcode : " );
+ ENM( inst->Instruction.Opcode, TGSI_OPCODES );
+ if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) {
+ TXT( "\nSaturate : " );
+ ENM( inst->Instruction.Saturate, TGSI_SATS );
+ }
+ if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) {
+ TXT( "\nNumDstRegs : " );
+ UID( inst->Instruction.NumDstRegs );
+ }
+ if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) {
+ TXT( "\nNumSrcRegs : " );
+ UID( inst->Instruction.NumSrcRegs );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( inst->Instruction.Padding );
+ }
+
+ if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS );
+ if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) {
+ TXT( "\nPrecision : " );
+ ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS );
+ }
+ if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) {
+ TXT( "\nCondDstIndex : " );
+ UID( inst->InstructionExtNv.CondDstIndex );
+ }
+ if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) {
+ TXT( "\nCondFlowIndex : " );
+ UID( inst->InstructionExtNv.CondFlowIndex );
+ }
+ if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) {
+ TXT( "\nCondMask : " );
+ ENM( inst->InstructionExtNv.CondMask, TGSI_CCS );
+ }
+ if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) {
+ TXT( "\nCondSwizzleX : " );
+ ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES );
+ }
+ if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) {
+ TXT( "\nCondSwizzleY : " );
+ ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES );
+ }
+ if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) {
+ TXT( "\nCondSwizzleZ : " );
+ ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES );
+ }
+ if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) {
+ TXT( "\nCondSwizzleW : " );
+ ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES );
+ }
+ if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) {
+ TXT( "\nCondDstUpdate : " );
+ UID( inst->InstructionExtNv.CondDstUpdate );
+ }
+ if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) {
+ TXT( "\nCondFlowEnable: " );
+ UID( inst->InstructionExtNv.CondFlowEnable );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( inst->InstructionExtNv.Padding );
+ if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) {
+ TXT( "\nExtended : " );
+ UID( inst->InstructionExtNv.Extended );
+ }
+ }
+ }
+
+ if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS );
+ if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) {
+ TXT( "\nLabel : " );
+ UID( inst->InstructionExtLabel.Label );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( inst->InstructionExtLabel.Padding );
+ if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) {
+ TXT( "\nExtended: " );
+ UID( inst->InstructionExtLabel.Extended );
+ }
+ }
+ }
+
+ if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS );
+ if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) {
+ TXT( "\nTexture : " );
+ ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( inst->InstructionExtTexture.Padding );
+ if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) {
+ TXT( "\nExtended: " );
+ UID( inst->InstructionExtTexture.Extended );
+ }
+ }
+ }
+
+ for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+ struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+ struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
+
+ CHR( '\n' );
+ TXT( "\nFile : " );
+ ENM( dst->DstRegister.File, TGSI_FILES );
+ if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
+ TXT( "\nWriteMask: " );
+ ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
+ }
+ if( ignored ) {
+ if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
+ TXT( "\nIndirect : " );
+ UID( dst->DstRegister.Indirect );
+ }
+ if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
+ TXT( "\nDimension: " );
+ UID( dst->DstRegister.Dimension );
+ }
+ }
+ if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
+ TXT( "\nIndex : " );
+ SID( dst->DstRegister.Index );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( dst->DstRegister.Padding );
+ if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
+ TXT( "\nExtended : " );
+ UID( dst->DstRegister.Extended );
+ }
+ }
+
+ if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS );
+ if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) {
+ TXT( "\nCondMask : " );
+ ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS );
+ }
+ if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) {
+ TXT( "\nCondSwizzleX: " );
+ ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES );
+ }
+ if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) {
+ TXT( "\nCondSwizzleY: " );
+ ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES );
+ }
+ if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) {
+ TXT( "\nCondSwizzleZ: " );
+ ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES );
+ }
+ if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) {
+ TXT( "\nCondSwizzleW: " );
+ ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES );
+ }
+ if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) {
+ TXT( "\nCondSrcIndex: " );
+ UID( dst->DstRegisterExtConcode.CondSrcIndex );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( dst->DstRegisterExtConcode.Padding );
+ if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) {
+ TXT( "\nExtended : " );
+ UID( dst->DstRegisterExtConcode.Extended );
+ }
+ }
+ }
+
+ if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
+ if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
+ TXT( "\nModulate: " );
+ ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( dst->DstRegisterExtModulate.Padding );
+ if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
+ TXT( "\nExtended: " );
+ UID( dst->DstRegisterExtModulate.Extended );
+ }
+ }
+ }
+ }
+
+ for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+ struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+ struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
+
+ CHR( '\n' );
+ TXT( "\nFile : ");
+ ENM( src->SrcRegister.File, TGSI_FILES );
+ if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
+ TXT( "\nSwizzleX : " );
+ ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
+ }
+ if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
+ TXT( "\nSwizzleY : " );
+ ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
+ }
+ if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
+ TXT( "\nSwizzleZ : " );
+ ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
+ }
+ if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
+ TXT( "\nSwizzleW : " );
+ ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
+ }
+ if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
+ TXT( "\nNegate : " );
+ UID( src->SrcRegister.Negate );
+ }
+ if( ignored ) {
+ if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
+ TXT( "\nIndirect : " );
+ UID( src->SrcRegister.Indirect );
+ }
+ if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
+ TXT( "\nDimension: " );
+ UID( src->SrcRegister.Dimension );
+ }
+ }
+ if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
+ TXT( "\nIndex : " );
+ SID( src->SrcRegister.Index );
+ }
+ if( ignored ) {
+ if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
+ TXT( "\nExtended : " );
+ UID( src->SrcRegister.Extended );
+ }
+ }
+
+ if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
+ if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
+ TXT( "\nExtSwizzleX: " );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
+ TXT( "\nExtSwizzleY: " );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
+ TXT( "\nExtSwizzleZ: " );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
+ TXT( "\nExtSwizzleW: " );
+ ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
+ TXT( "\nNegateX : " );
+ UID( src->SrcRegisterExtSwz.NegateX );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
+ TXT( "\nNegateY : " );
+ UID( src->SrcRegisterExtSwz.NegateY );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
+ TXT( "\nNegateZ : " );
+ UID( src->SrcRegisterExtSwz.NegateZ );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
+ TXT( "\nNegateW : " );
+ UID( src->SrcRegisterExtSwz.NegateW );
+ }
+ if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) {
+ TXT( "\nExtDivide : " );
+ ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( src->SrcRegisterExtSwz.Padding );
+ if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
+ TXT( "\nExtended : " );
+ UID( src->SrcRegisterExtSwz.Extended );
+ }
+ }
+ }
+
+ if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
+ CHR( '\n' );
+ TXT( "\nType : " );
+ ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
+ if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
+ TXT( "\nComplement: " );
+ UID( src->SrcRegisterExtMod.Complement );
+ }
+ if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
+ TXT( "\nBias : " );
+ UID( src->SrcRegisterExtMod.Bias );
+ }
+ if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
+ TXT( "\nScale2X : " );
+ UID( src->SrcRegisterExtMod.Scale2X );
+ }
+ if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
+ TXT( "\nAbsolute : " );
+ UID( src->SrcRegisterExtMod.Absolute );
+ }
+ if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
+ TXT( "\nNegate : " );
+ UID( src->SrcRegisterExtMod.Negate );
+ }
+ if( ignored ) {
+ TXT( "\nPadding : " );
+ UIX( src->SrcRegisterExtMod.Padding );
+ if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
+ TXT( "\nExtended : " );
+ UID( src->SrcRegisterExtMod.Extended );
+ }
+ }
+ }
+ }
+}
+
+static void
+dump_gen(
+ struct gen_dump *dump,
+ const struct tgsi_token *tokens,
+ unsigned flags )
+{
+ struct tgsi_parse_context parse;
+ struct tgsi_full_instruction fi;
+ struct tgsi_full_declaration fd;
+ unsigned verbose = flags & TGSI_DUMP_VERBOSE;
+ unsigned ignored = !(flags & TGSI_DUMP_NO_IGNORED);
+ unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT);
+ unsigned instno = 0;
+
+ dump->tabs = 0;
+
+ /* sanity check */
+ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0);
+
+ tgsi_parse_init( &parse, tokens );
+
+ TXT( "tgsi-dump begin -----------------" );
+
+ CHR( '\n' );
+ ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT );
+ CHR( ' ' );
+ UID( parse.FullVersion.Version.MajorVersion );
+ CHR( '.' );
+ UID( parse.FullVersion.Version.MinorVersion );
+
+ if( verbose ) {
+ TXT( "\nMajorVersion: " );
+ UID( parse.FullVersion.Version.MajorVersion );
+ TXT( "\nMinorVersion: " );
+ UID( parse.FullVersion.Version.MinorVersion );
+ CHR( '\n' );
+
+ TXT( "\nHeaderSize: " );
+ UID( parse.FullHeader.Header.HeaderSize );
+ TXT( "\nBodySize : " );
+ UID( parse.FullHeader.Header.BodySize );
+ TXT( "\nProcessor : " );
+ ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES );
+ CHR( '\n' );
+ }
+
+ fi = tgsi_default_full_instruction();
+ fd = tgsi_default_full_declaration();
+
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ dump_declaration_short(
+ dump,
+ &parse.FullToken.FullDeclaration );
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ dump_immediate_short(
+ dump,
+ &parse.FullToken.FullImmediate );
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ dump_instruction_short(
+ dump,
+ &parse.FullToken.FullInstruction,
+ instno );
+ instno++;
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ if( verbose ) {
+ TXT( "\nType : " );
+ ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES );
+ if( ignored ) {
+ TXT( "\nSize : " );
+ UID( parse.FullToken.Token.Size );
+ if( deflt || parse.FullToken.Token.Extended ) {
+ TXT( "\nExtended : " );
+ UID( parse.FullToken.Token.Extended );
+ }
+ }
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ dump_declaration_verbose(
+ dump,
+ &parse.FullToken.FullDeclaration,
+ ignored,
+ deflt,
+ &fd );
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ dump_immediate_verbose(
+ dump,
+ &parse.FullToken.FullImmediate,
+ ignored );
+ break;
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ dump_instruction_verbose(
+ dump,
+ &parse.FullToken.FullInstruction,
+ ignored,
+ deflt,
+ &fi );
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ CHR( '\n' );
+ }
+ }
+
+ TXT( "\ntgsi-dump end -------------------\n" );
+
+ tgsi_parse_free( &parse );
+}
+
+
+static void
+sanity_checks(void)
+{
+ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0);
+ assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0);
+}
+
+
+void
+tgsi_dump(
+ const struct tgsi_token *tokens,
+ unsigned flags )
+{
+ struct file_dump dump;
+
+ sanity_checks();
+
+ dump.base.write = _file_dump_write;
+#if 0
+ {
+ static unsigned counter = 0;
+ char buffer[64];
+ sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ );
+ dump.file = fopen( buffer, "wt" );
+ }
+#else
+ dump.file = stderr;
+#endif
+
+ dump_gen(
+ &dump.base,
+ tokens,
+ flags );
+
+#if 0
+ fclose( dump.file );
+#endif
+}
+
+void
+tgsi_dump_str(
+ char **str,
+ const struct tgsi_token *tokens,
+ unsigned flags )
+{
+ struct text_dump dump;
+
+ dump.base.write = _text_dump_write;
+ dump.text = NULL;
+ dump.length = 0;
+ dump.capacity = 0;
+
+ dump_gen(
+ &dump.base,
+ tokens,
+ flags );
+
+ *str = dump.text;
+}
--- /dev/null
+#if !defined TGSI_DUMP_H
+#define TGSI_DUMP_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+#define TGSI_DUMP_VERBOSE 1
+#define TGSI_DUMP_NO_IGNORED 2
+#define TGSI_DUMP_NO_DEFAULT 4
+
+void
+tgsi_dump(
+ const struct tgsi_token *tokens,
+ unsigned flags );
+
+void
+tgsi_dump_str(
+ char **str,
+ const struct tgsi_token *tokens,
+ unsigned flags );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_DUMP_H
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+
+void
+tgsi_full_token_init(
+ union tgsi_full_token *full_token )
+{
+ full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION;
+}
+
+void
+tgsi_full_token_free(
+ union tgsi_full_token *full_token )
+{
+ if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
+ FREE( full_token->FullImmediate.u.Pointer );
+ }
+}
+
+unsigned
+tgsi_parse_init(
+ struct tgsi_parse_context *ctx,
+ const struct tgsi_token *tokens )
+{
+ ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0];
+ if( ctx->FullVersion.Version.MajorVersion > 1 ) {
+ return TGSI_PARSE_ERROR;
+ }
+
+ ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1];
+ if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
+ ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2];
+ }
+ else {
+ ctx->FullHeader.Processor = tgsi_default_processor();
+ }
+
+ ctx->Tokens = tokens;
+ ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize;
+
+ tgsi_full_token_init( &ctx->FullToken );
+
+ return TGSI_PARSE_OK;
+}
+
+void
+tgsi_parse_free(
+ struct tgsi_parse_context *ctx )
+{
+ tgsi_full_token_free( &ctx->FullToken );
+}
+
+boolean
+tgsi_parse_end_of_tokens(
+ struct tgsi_parse_context *ctx )
+{
+ return ctx->Position >=
+ 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize;
+}
+
+static void
+next_token(
+ struct tgsi_parse_context *ctx,
+ void *token )
+{
+ assert( !tgsi_parse_end_of_tokens( ctx ) );
+
+ *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++];
+}
+
+void
+tgsi_parse_token(
+ struct tgsi_parse_context *ctx )
+{
+ struct tgsi_token token;
+ unsigned i;
+
+ tgsi_full_token_free( &ctx->FullToken );
+ tgsi_full_token_init( &ctx->FullToken );
+
+ next_token( ctx, &token );
+
+ switch( token.Type ) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
+
+ *decl = tgsi_default_full_declaration();
+ decl->Declaration = *(struct tgsi_declaration *) &token;
+
+ switch( decl->Declaration.Type ) {
+ case TGSI_DECLARE_RANGE:
+ next_token( ctx, &decl->u.DeclarationRange );
+ break;
+
+ case TGSI_DECLARE_MASK:
+ next_token( ctx, &decl->u.DeclarationMask );
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if( decl->Declaration.Interpolate ) {
+ next_token( ctx, &decl->Interpolation );
+ }
+
+ if( decl->Declaration.Semantic ) {
+ next_token( ctx, &decl->Semantic );
+ }
+
+ break;
+ }
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
+
+ *imm = tgsi_default_full_immediate();
+ imm->Immediate = *(struct tgsi_immediate *) &token;
+
+ assert( !imm->Immediate.Extended );
+
+ switch (imm->Immediate.DataType) {
+ case TGSI_IMM_FLOAT32:
+ imm->u.Pointer = MALLOC(
+ sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) );
+ for( i = 0; i < imm->Immediate.Size - 1; i++ ) {
+ next_token( ctx, &imm->u.ImmediateFloat32[i] );
+ }
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ break;
+ }
+
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction;
+ unsigned extended;
+
+ *inst = tgsi_default_full_instruction();
+ inst->Instruction = *(struct tgsi_instruction *) &token;
+
+ extended = inst->Instruction.Extended;
+
+ while( extended ) {
+ struct tgsi_src_register_ext token;
+
+ next_token( ctx, &token );
+
+ switch( token.Type ) {
+ case TGSI_INSTRUCTION_EXT_TYPE_NV:
+ inst->InstructionExtNv =
+ *(struct tgsi_instruction_ext_nv *) &token;
+ break;
+
+ case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
+ inst->InstructionExtLabel =
+ *(struct tgsi_instruction_ext_label *) &token;
+ break;
+
+ case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
+ inst->InstructionExtTexture =
+ *(struct tgsi_instruction_ext_texture *) &token;
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ extended = token.Extended;
+ }
+
+ assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
+
+ for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+ unsigned extended;
+
+ next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
+
+ /*
+ * No support for indirect or multi-dimensional addressing.
+ */
+ assert( !inst->FullDstRegisters[i].DstRegister.Indirect );
+ assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
+
+ extended = inst->FullDstRegisters[i].DstRegister.Extended;
+
+ while( extended ) {
+ struct tgsi_src_register_ext token;
+
+ next_token( ctx, &token );
+
+ switch( token.Type ) {
+ case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE:
+ inst->FullDstRegisters[i].DstRegisterExtConcode =
+ *(struct tgsi_dst_register_ext_concode *) &token;
+ break;
+
+ case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
+ inst->FullDstRegisters[i].DstRegisterExtModulate =
+ *(struct tgsi_dst_register_ext_modulate *) &token;
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ extended = token.Extended;
+ }
+ }
+
+ assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
+
+ for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+ unsigned extended;
+
+ next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
+
+ extended = inst->FullSrcRegisters[i].SrcRegister.Extended;
+
+ while( extended ) {
+ struct tgsi_src_register_ext token;
+
+ next_token( ctx, &token );
+
+ switch( token.Type ) {
+ case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
+ inst->FullSrcRegisters[i].SrcRegisterExtSwz =
+ *(struct tgsi_src_register_ext_swz *) &token;
+ break;
+
+ case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
+ inst->FullSrcRegisters[i].SrcRegisterExtMod =
+ *(struct tgsi_src_register_ext_mod *) &token;
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ extended = token.Extended;
+ }
+
+ if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
+ next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
+
+ /*
+ * No support for indirect or multi-dimensional addressing.
+ */
+ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
+ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
+ }
+
+ if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
+ next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim );
+
+ /*
+ * No support for multi-dimensional addressing.
+ */
+ assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
+ assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended );
+
+ if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
+ next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
+
+ /*
+ * No support for indirect or multi-dimensional addressing.
+ */
+ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
+ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+ assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
+ }
+ }
+ }
+
+ break;
+ }
+
+ default:
+ assert( 0 );
+ }
+}
+
--- /dev/null
+#if !defined TGSI_PARSE_H
+#define TGSI_PARSE_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+struct tgsi_full_version
+{
+ struct tgsi_version Version;
+};
+
+struct tgsi_full_header
+{
+ struct tgsi_header Header;
+ struct tgsi_processor Processor;
+};
+
+struct tgsi_full_dst_register
+{
+ struct tgsi_dst_register DstRegister;
+ struct tgsi_dst_register_ext_concode DstRegisterExtConcode;
+ struct tgsi_dst_register_ext_modulate DstRegisterExtModulate;
+};
+
+struct tgsi_full_src_register
+{
+ struct tgsi_src_register SrcRegister;
+ struct tgsi_src_register_ext_swz SrcRegisterExtSwz;
+ struct tgsi_src_register_ext_mod SrcRegisterExtMod;
+ struct tgsi_src_register SrcRegisterInd;
+ struct tgsi_dimension SrcRegisterDim;
+ struct tgsi_src_register SrcRegisterDimInd;
+};
+
+struct tgsi_full_declaration
+{
+ struct tgsi_declaration Declaration;
+ union
+ {
+ struct tgsi_declaration_range DeclarationRange;
+ struct tgsi_declaration_mask DeclarationMask;
+ } u;
+ struct tgsi_declaration_interpolation Interpolation;
+ struct tgsi_declaration_semantic Semantic;
+};
+
+struct tgsi_full_immediate
+{
+ struct tgsi_immediate Immediate;
+ union
+ {
+ void *Pointer;
+ struct tgsi_immediate_float32 *ImmediateFloat32;
+ } u;
+};
+
+#define TGSI_FULL_MAX_DST_REGISTERS 2
+#define TGSI_FULL_MAX_SRC_REGISTERS 3
+
+struct tgsi_full_instruction
+{
+ struct tgsi_instruction Instruction;
+ struct tgsi_instruction_ext_nv InstructionExtNv;
+ struct tgsi_instruction_ext_label InstructionExtLabel;
+ struct tgsi_instruction_ext_texture InstructionExtTexture;
+ struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
+ struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
+};
+
+union tgsi_full_token
+{
+ struct tgsi_token Token;
+ struct tgsi_full_declaration FullDeclaration;
+ struct tgsi_full_immediate FullImmediate;
+ struct tgsi_full_instruction FullInstruction;
+};
+
+void
+tgsi_full_token_init(
+ union tgsi_full_token *full_token );
+
+void
+tgsi_full_token_free(
+ union tgsi_full_token *full_token );
+
+struct tgsi_parse_context
+{
+ const struct tgsi_token *Tokens;
+ unsigned Position;
+ struct tgsi_full_version FullVersion;
+ struct tgsi_full_header FullHeader;
+ union tgsi_full_token FullToken;
+};
+
+#define TGSI_PARSE_OK 0
+#define TGSI_PARSE_ERROR 1
+
+unsigned
+tgsi_parse_init(
+ struct tgsi_parse_context *ctx,
+ const struct tgsi_token *tokens );
+
+void
+tgsi_parse_free(
+ struct tgsi_parse_context *ctx );
+
+boolean
+tgsi_parse_end_of_tokens(
+ struct tgsi_parse_context *ctx );
+
+void
+tgsi_parse_token(
+ struct tgsi_parse_context *ctx );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_PARSE_H
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI program transformation utility.
+ *
+ * Authors: Brian Paul
+ */
+
+
+#include "tgsi_transform.h"
+
+
+
+static void
+emit_instruction(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_instruction *inst)
+{
+ uint ti = ctx->ti;
+
+ ti += tgsi_build_full_instruction(inst,
+ ctx->tokens_out + ti,
+ ctx->header,
+ ctx->max_tokens_out - ti);
+ ctx->ti = ti;
+}
+
+
+static void
+emit_declaration(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_declaration *decl)
+{
+ uint ti = ctx->ti;
+
+ ti += tgsi_build_full_declaration(decl,
+ ctx->tokens_out + ti,
+ ctx->header,
+ ctx->max_tokens_out - ti);
+ ctx->ti = ti;
+}
+
+
+static void
+emit_immediate(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_immediate *imm)
+{
+ uint ti = ctx->ti;
+
+ ti += tgsi_build_full_immediate(imm,
+ ctx->tokens_out + ti,
+ ctx->header,
+ ctx->max_tokens_out - ti);
+ ctx->ti = ti;
+}
+
+
+
+/**
+ * Apply user-defined transformations to the input shader to produce
+ * the output shader.
+ * For example, a register search-and-replace operation could be applied
+ * by defining a transform_instruction() callback that examined and changed
+ * the instruction src/dest regs.
+ *
+ * \return number of tokens emitted
+ */
+int
+tgsi_transform_shader(const struct tgsi_token *tokens_in,
+ struct tgsi_token *tokens_out,
+ uint max_tokens_out,
+ struct tgsi_transform_context *ctx)
+{
+ uint procType;
+
+ /* input shader */
+ struct tgsi_parse_context parse;
+
+ /* output shader */
+ struct tgsi_processor *processor;
+
+
+ /**
+ ** callback context init
+ **/
+ ctx->emit_instruction = emit_instruction;
+ ctx->emit_declaration = emit_declaration;
+ ctx->emit_immediate = emit_immediate;
+ ctx->tokens_out = tokens_out;
+ ctx->max_tokens_out = max_tokens_out;
+
+
+ /**
+ ** Setup to begin parsing input shader
+ **/
+ if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) {
+ debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n");
+ return -1;
+ }
+ procType = parse.FullHeader.Processor.Processor;
+ assert(procType == TGSI_PROCESSOR_FRAGMENT ||
+ procType == TGSI_PROCESSOR_VERTEX ||
+ procType == TGSI_PROCESSOR_GEOMETRY);
+
+
+ /**
+ ** Setup output shader
+ **/
+ *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version();
+
+ ctx->header = (struct tgsi_header *) (tokens_out + 1);
+ *ctx->header = tgsi_build_header();
+
+ processor = (struct tgsi_processor *) (tokens_out + 2);
+ *processor = tgsi_build_processor( procType, ctx->header );
+
+ ctx->ti = 3;
+
+
+ /**
+ ** Loop over incoming program tokens/instructions
+ */
+ while( !tgsi_parse_end_of_tokens( &parse ) ) {
+
+ tgsi_parse_token( &parse );
+
+ switch( parse.FullToken.Token.Type ) {
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ {
+ struct tgsi_full_instruction *fullinst
+ = &parse.FullToken.FullInstruction;
+
+ if (ctx->transform_instruction)
+ ctx->transform_instruction(ctx, fullinst);
+ else
+ ctx->emit_instruction(ctx, fullinst);
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ {
+ struct tgsi_full_declaration *fulldecl
+ = &parse.FullToken.FullDeclaration;
+
+ if (ctx->transform_declaration)
+ ctx->transform_declaration(ctx, fulldecl);
+ else
+ ctx->emit_declaration(ctx, fulldecl);
+ }
+ break;
+
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ {
+ struct tgsi_full_immediate *fullimm
+ = &parse.FullToken.FullImmediate;
+
+ if (ctx->transform_immediate)
+ ctx->transform_immediate(ctx, fullimm);
+ else
+ ctx->emit_immediate(ctx, fullimm);
+ }
+ break;
+
+ default:
+ assert( 0 );
+ }
+ }
+
+ if (ctx->epilog) {
+ ctx->epilog(ctx);
+ }
+
+ tgsi_parse_free (&parse);
+
+ return ctx->ti;
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_TRANSFORM_H
+#define TGSI_TRANSFORM_H
+
+
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/util/tgsi_build.h"
+
+
+
+/**
+ * Subclass this to add caller-specific data
+ */
+struct tgsi_transform_context
+{
+/**** PUBLIC ***/
+
+ /**
+ * User-defined callbacks invoked per instruction.
+ */
+ void (*transform_instruction)(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst);
+
+ void (*transform_declaration)(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl);
+
+ void (*transform_immediate)(struct tgsi_transform_context *ctx,
+ struct tgsi_full_immediate *imm);
+
+ /**
+ * Called at end of input program to allow caller to append extra
+ * instructions. Return number of tokens emitted.
+ */
+ void (*epilog)(struct tgsi_transform_context *ctx);
+
+
+/*** PRIVATE ***/
+
+ /**
+ * These are setup by tgsi_transform_shader() and cannot be overridden.
+ * Meant to be called from in the above user callback functions.
+ */
+ void (*emit_instruction)(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_instruction *inst);
+ void (*emit_declaration)(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_declaration *decl);
+ void (*emit_immediate)(struct tgsi_transform_context *ctx,
+ const struct tgsi_full_immediate *imm);
+
+ struct tgsi_header *header;
+ uint max_tokens_out;
+ struct tgsi_token *tokens_out;
+ uint ti;
+};
+
+
+
+extern int
+tgsi_transform_shader(const struct tgsi_token *tokens_in,
+ struct tgsi_token *tokens_out,
+ uint max_tokens_out,
+ struct tgsi_transform_context *ctx);
+
+
+#endif /* TGSI_TRANSFORM_H */
--- /dev/null
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "tgsi_parse.h"
+#include "tgsi_build.h"
+#include "tgsi_util.h"
+
+union pointer_hack
+{
+ void *pointer;
+ unsigned long long uint64;
+};
+
+void *
+tgsi_align_128bit(
+ void *unaligned )
+{
+ union pointer_hack ph;
+
+ ph.uint64 = 0;
+ ph.pointer = unaligned;
+ ph.uint64 = (ph.uint64 + 15) & ~15;
+ return ph.pointer;
+}
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+ const struct tgsi_src_register *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->SwizzleX;
+ case 1:
+ return reg->SwizzleY;
+ case 2:
+ return reg->SwizzleZ;
+ case 3:
+ return reg->SwizzleW;
+ default:
+ assert( 0 );
+ }
+ return 0;
+}
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->ExtSwizzleX;
+ case 1:
+ return reg->ExtSwizzleY;
+ case 2:
+ return reg->ExtSwizzleZ;
+ case 3:
+ return reg->ExtSwizzleW;
+ default:
+ assert( 0 );
+ }
+ return 0;
+}
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+ const struct tgsi_full_src_register *reg,
+ unsigned component )
+{
+ unsigned swizzle;
+
+ /*
+ * First, calculate the extended swizzle for a given channel. This will give
+ * us either a channel index into the simple swizzle or a constant 1 or 0.
+ */
+ swizzle = tgsi_util_get_src_register_extswizzle(
+ ®->SrcRegisterExtSwz,
+ component );
+
+ assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+ assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+ assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+ assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+ assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+ assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+
+ /*
+ * Second, calculate the simple swizzle for the unswizzled channel index.
+ * Leave the constants intact, they are not affected by the simple swizzle.
+ */
+ if( swizzle <= TGSI_SWIZZLE_W ) {
+ swizzle = tgsi_util_get_src_register_swizzle(
+ ®->SrcRegister,
+ component );
+ }
+
+ return swizzle;
+}
+
+void
+tgsi_util_set_src_register_swizzle(
+ struct tgsi_src_register *reg,
+ unsigned swizzle,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ reg->SwizzleX = swizzle;
+ break;
+ case 1:
+ reg->SwizzleY = swizzle;
+ break;
+ case 2:
+ reg->SwizzleZ = swizzle;
+ break;
+ case 3:
+ reg->SwizzleW = swizzle;
+ break;
+ default:
+ assert( 0 );
+ }
+}
+
+void
+tgsi_util_set_src_register_extswizzle(
+ struct tgsi_src_register_ext_swz *reg,
+ unsigned swizzle,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ reg->ExtSwizzleX = swizzle;
+ break;
+ case 1:
+ reg->ExtSwizzleY = swizzle;
+ break;
+ case 2:
+ reg->ExtSwizzleZ = swizzle;
+ break;
+ case 3:
+ reg->ExtSwizzleW = swizzle;
+ break;
+ default:
+ assert( 0 );
+ }
+}
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->NegateX;
+ case 1:
+ return reg->NegateY;
+ case 2:
+ return reg->NegateZ;
+ case 3:
+ return reg->NegateW;
+ default:
+ assert( 0 );
+ }
+ return 0;
+}
+
+void
+tgsi_util_set_src_register_extnegate(
+ struct tgsi_src_register_ext_swz *reg,
+ unsigned negate,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ reg->NegateX = negate;
+ break;
+ case 1:
+ reg->NegateY = negate;
+ break;
+ case 2:
+ reg->NegateZ = negate;
+ break;
+ case 3:
+ reg->NegateW = negate;
+ break;
+ default:
+ assert( 0 );
+ }
+}
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+ const struct tgsi_full_src_register *reg,
+ unsigned component )
+{
+ unsigned sign_mode;
+
+ if( reg->SrcRegisterExtMod.Absolute ) {
+ /* Consider only the post-abs negation. */
+
+ if( reg->SrcRegisterExtMod.Negate ) {
+ sign_mode = TGSI_UTIL_SIGN_SET;
+ }
+ else {
+ sign_mode = TGSI_UTIL_SIGN_CLEAR;
+ }
+ }
+ else {
+ /* Accumulate the three negations. */
+
+ unsigned negate;
+
+ negate = reg->SrcRegister.Negate;
+ if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) {
+ negate = !negate;
+ }
+ if( reg->SrcRegisterExtMod.Negate ) {
+ negate = !negate;
+ }
+
+ if( negate ) {
+ sign_mode = TGSI_UTIL_SIGN_TOGGLE;
+ }
+ else {
+ sign_mode = TGSI_UTIL_SIGN_KEEP;
+ }
+ }
+
+ return sign_mode;
+}
+
+void
+tgsi_util_set_full_src_register_sign_mode(
+ struct tgsi_full_src_register *reg,
+ unsigned sign_mode )
+{
+ reg->SrcRegisterExtSwz.NegateX = 0;
+ reg->SrcRegisterExtSwz.NegateY = 0;
+ reg->SrcRegisterExtSwz.NegateZ = 0;
+ reg->SrcRegisterExtSwz.NegateW = 0;
+
+ switch (sign_mode)
+ {
+ case TGSI_UTIL_SIGN_CLEAR:
+ reg->SrcRegister.Negate = 0;
+ reg->SrcRegisterExtMod.Absolute = 1;
+ reg->SrcRegisterExtMod.Negate = 0;
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ reg->SrcRegister.Negate = 0;
+ reg->SrcRegisterExtMod.Absolute = 1;
+ reg->SrcRegisterExtMod.Negate = 1;
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ reg->SrcRegister.Negate = 1;
+ reg->SrcRegisterExtMod.Absolute = 0;
+ reg->SrcRegisterExtMod.Negate = 0;
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ reg->SrcRegister.Negate = 0;
+ reg->SrcRegisterExtMod.Absolute = 0;
+ reg->SrcRegisterExtMod.Negate = 0;
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
--- /dev/null
+#if !defined TGSI_UTIL_H
+#define TGSI_UTIL_H
+
+#if defined __cplusplus
+extern "C" {
+#endif // defined __cplusplus
+
+void *
+tgsi_align_128bit(
+ void *unaligned );
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+ const struct tgsi_src_register *reg,
+ unsigned component );
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component);
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+ const struct tgsi_full_src_register *reg,
+ unsigned component );
+
+void
+tgsi_util_set_src_register_swizzle(
+ struct tgsi_src_register *reg,
+ unsigned swizzle,
+ unsigned component );
+
+void
+tgsi_util_set_src_register_extswizzle(
+ struct tgsi_src_register_ext_swz *reg,
+ unsigned swizzle,
+ unsigned component );
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component );
+
+void
+tgsi_util_set_src_register_extnegate(
+ struct tgsi_src_register_ext_swz *reg,
+ unsigned negate,
+ unsigned component );
+
+#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */
+#define TGSI_UTIL_SIGN_SET 1 /* Force negative */
+#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */
+#define TGSI_UTIL_SIGN_KEEP 3 /* No change */
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+ const struct tgsi_full_src_register *reg,
+ unsigned component );
+
+void
+tgsi_util_set_full_src_register_sign_mode(
+ struct tgsi_full_src_register *reg,
+ unsigned sign_mode );
+
+#if defined __cplusplus
+} // extern "C"
+#endif // defined __cplusplus
+
+#endif // !defined TGSI_UTIL_H
+
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdarg.h>
+
+#ifdef WIN32
+#include <windows.h>
+#include <winddi.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+#include "pipe/p_debug.h"
+#include "pipe/p_compiler.h"
+
+
+void debug_vprintf(const char *format, va_list ap)
+{
+#ifdef WIN32
+ EngDebugPrint("Gallium3D: ", (PCHAR)format, ap);
+#else
+ vfprintf(stderr, format, ap);
+#endif
+}
+
+
+void debug_printf(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ debug_vprintf(format, ap);
+ va_end(ap);
+}
+
+
+static INLINE void debug_abort(void)
+{
+#ifdef WIN32
+ EngDebugBreak();
+#else
+ abort();
+#endif
+}
+
+
+void debug_assert_fail(const char *expr, const char *file, unsigned line)
+{
+ debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr);
+ debug_abort();
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * RGBA/float tile get/put functions.
+ * Usable both by drivers and state trackers.
+ * Surfaces should already be in a mapped state.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+#include "pipe/p_inlines.h"
+
+#include "p_tile.h"
+
+
+
+/**
+ * Move raw block of pixels from surface to user memory.
+ * This should be usable by any hw driver that has mappable surfaces.
+ */
+void
+pipe_get_tile_raw(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ void *p, int dst_stride)
+{
+ const uint cpp = ps->cpp;
+ const ubyte *pSrc;
+ const uint src_stride = ps->pitch * cpp;
+ ubyte *pDest;
+ uint i;
+
+ if (dst_stride == 0) {
+ dst_stride = w * cpp;
+ }
+
+ if (pipe_clip_tile(x, y, &w, &h, ps))
+ return;
+
+ pSrc = (const ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp;
+ pDest = (ubyte *) p;
+
+ for (i = 0; i < h; i++) {
+ memcpy(pDest, pSrc, w * cpp);
+ pDest += dst_stride;
+ pSrc += src_stride;
+ }
+
+ pipe_surface_unmap(ps);
+}
+
+
+/**
+ * Move raw block of pixels from user memory to surface.
+ * This should be usable by any hw driver that has mappable surfaces.
+ */
+void
+pipe_put_tile_raw(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ const void *p, int src_stride)
+{
+ const uint cpp = ps->cpp;
+ const ubyte *pSrc;
+ const uint dst_stride = ps->pitch * cpp;
+ ubyte *pDest;
+ uint i;
+
+ if (src_stride == 0) {
+ src_stride = w * cpp;
+ }
+
+ if (pipe_clip_tile(x, y, &w, &h, ps))
+ return;
+
+ pSrc = (const ubyte *) p;
+ pDest = (ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp;
+
+ for (i = 0; i < h; i++) {
+ memcpy(pDest, pSrc, w * cpp);
+ pDest += dst_stride;
+ pSrc += src_stride;
+ }
+
+ pipe_surface_unmap(ps);
+}
+
+
+
+
+/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */
+#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F))
+
+#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \
+ us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) )
+
+
+
+/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+
+static void
+a8r8g8b8_get_tile_rgba(unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ const unsigned pixel = *src++;
+ pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
+ pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
+ pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
+ pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff);
+ }
+ p += dst_stride;
+ }
+}
+
+
+static void
+a8r8g8b8_put_tile_rgba(unsigned *dst,
+ unsigned w, unsigned h,
+ const float *p,
+ unsigned src_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ const float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ unsigned r, g, b, a;
+ UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]);
+ *dst++ = (a << 24) | (r << 16) | (g << 8) | b;
+ }
+ p += src_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/
+
+static void
+b8g8r8a8_get_tile_rgba(unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ const unsigned pixel = *src++;
+ pRow[0] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff);
+ pRow[1] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff);
+ pRow[2] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff);
+ pRow[3] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff);
+ }
+ p += dst_stride;
+ }
+}
+
+
+static void
+b8g8r8a8_put_tile_rgba(unsigned *dst,
+ unsigned w, unsigned h,
+ const float *p,
+ unsigned src_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ const float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ unsigned r, g, b, a;
+ UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]);
+ *dst++ = (b << 24) | (g << 16) | (r << 8) | a;
+ }
+ p += src_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/
+
+static void
+a1r5g5b5_get_tile_rgba(ushort *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ const ushort pixel = *src++;
+ pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f);
+ pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f);
+ pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
+ pRow[3] = ((pixel >> 15) ) * 1.0f;
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/
+
+static void
+a4r4g4b4_get_tile_rgba(ushort *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ const ushort pixel = *src++;
+ pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f);
+ pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f);
+ pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f);
+ pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_R5G6B5_UNORM ***/
+
+static void
+r5g6b5_get_tile_rgba(ushort *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ const ushort pixel = *src++;
+ pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f);
+ pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f);
+ pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f);
+ pRow[3] = 1.0f;
+ }
+ p += dst_stride;
+ }
+}
+
+
+static void
+r5g5b5_put_tile_rgba(ushort *dst,
+ unsigned w, unsigned h,
+ const float *p,
+ unsigned src_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ const float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0);
+ uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0);
+ uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0);
+ *dst++ = (r << 11) | (g << 5) | (b);
+ }
+ p += src_stride;
+ }
+}
+
+
+
+/*** PIPE_FORMAT_Z16_UNORM ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z16_get_tile_rgba(ushort *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ const float scale = 1.0f / 65535.0f;
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = *src++ * scale;
+ }
+ p += dst_stride;
+ }
+}
+
+
+
+
+/*** PIPE_FORMAT_U_L8 ***/
+
+static void
+l8_get_tile_rgba(ubyte *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, src++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] = UBYTE_TO_FLOAT(*src);
+ pRow[3] = 1.0;
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_U_A8 ***/
+
+static void
+a8_get_tile_rgba(ubyte *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, src++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] = 0.0;
+ pRow[3] = UBYTE_TO_FLOAT(*src);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/
+
+static void
+r16g16b16a16_get_tile_rgba(short *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, src += 4, pRow += 4) {
+ pRow[0] = SHORT_TO_FLOAT(src[0]);
+ pRow[1] = SHORT_TO_FLOAT(src[1]);
+ pRow[2] = SHORT_TO_FLOAT(src[2]);
+ pRow[3] = SHORT_TO_FLOAT(src[3]);
+ }
+ p += dst_stride;
+ }
+}
+
+
+static void
+r16g16b16a16_put_tile_rgba(short *dst,
+ unsigned w, unsigned h,
+ const float *p,
+ unsigned src_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ const float *pRow = p;
+ for (j = 0; j < w; j++, dst += 4, pRow += 4) {
+ UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]);
+ UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]);
+ UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]);
+ UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]);
+ }
+ p += src_stride;
+ }
+}
+
+
+
+/*** PIPE_FORMAT_U_I8 ***/
+
+static void
+i8_get_tile_rgba(ubyte *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, src++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = UBYTE_TO_FLOAT(*src);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_U_A8_L8 ***/
+
+static void
+a8_l8_get_tile_rgba(ushort *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ ushort p = *src++;
+ pRow[0] =
+ pRow[1] =
+ pRow[2] = UBYTE_TO_FLOAT(p & 0xff);
+ pRow[3] = UBYTE_TO_FLOAT(p >> 8);
+ }
+ p += dst_stride;
+ }
+}
+
+
+
+
+/*** PIPE_FORMAT_Z32_UNORM ***/
+
+/**
+ * Return each Z value as four floats in [0,1].
+ */
+static void
+z32_get_tile_rgba(unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ const double scale = 1.0 / (double) 0xffffffff;
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float) (*src++ * scale);
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_S8Z24_UNORM ***/
+
+/**
+ * Return Z component as four float in [0,1]. Stencil part ignored.
+ */
+static void
+s8z24_get_tile_rgba(unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ const double scale = 1.0 / ((1 << 24) - 1);
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float) (scale * (*src++ & 0xffffff));
+ }
+ p += dst_stride;
+ }
+}
+
+
+/*** PIPE_FORMAT_Z24S8_UNORM ***/
+
+/**
+ * Return Z component as four float in [0,1]. Stencil part ignored.
+ */
+static void
+z24s8_get_tile_rgba(unsigned *src,
+ unsigned w, unsigned h,
+ float *p,
+ unsigned dst_stride)
+{
+ const double scale = 1.0 / ((1 << 24) - 1);
+ unsigned i, j;
+
+ for (i = 0; i < h; i++) {
+ float *pRow = p;
+ for (j = 0; j < w; j++, pRow += 4) {
+ pRow[0] =
+ pRow[1] =
+ pRow[2] =
+ pRow[3] = (float) (scale * (*src++ >> 8));
+ }
+ p += dst_stride;
+ }
+}
+
+
+void
+pipe_get_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ float *p)
+{
+ unsigned dst_stride = w * 4;
+ void *packed;
+
+ if (pipe_clip_tile(x, y, &w, &h, ps))
+ return;
+
+ packed = MALLOC(h * w * ps->cpp);
+
+ if (!packed)
+ return;
+
+ pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp);
+
+ switch (ps->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ a1r5g5b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_A4R4G4B4_UNORM:
+ a4r4g4b4_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_U_L8:
+ l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_U_A8:
+ a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_U_I8:
+ i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_U_A8_L8:
+ a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ r16g16b16a16_get_tile_rgba((short *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ z16_get_tile_rgba((ushort *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_S8Z24_UNORM:
+ s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ z24s8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride);
+ break;
+ default:
+ assert(0);
+ }
+
+ FREE(packed);
+}
+
+
+void
+pipe_put_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ const float *p)
+{
+ unsigned src_stride = w * 4;
+ void *packed;
+
+ if (pipe_clip_tile(x, y, &w, &h, ps))
+ return;
+
+ packed = MALLOC(h * w * ps->cpp);
+
+ if (!packed)
+ return;
+
+ switch (ps->format) {
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
+ break;
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
+ break;
+ case PIPE_FORMAT_A1R5G5B5_UNORM:
+ /*a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_R5G6B5_UNORM:
+ r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);
+ break;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ break;
+ case PIPE_FORMAT_U_L8:
+ /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_U_A8:
+ /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_U_I8:
+ /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_U_A8_L8:
+ /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride);
+ break;
+ case PIPE_FORMAT_Z16_UNORM:
+ /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_Z32_UNORM:
+ /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_S8Z24_UNORM:
+ /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+ break;
+ case PIPE_FORMAT_Z24S8_UNORM:
+ /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
+ break;
+ default:
+ assert(0);
+ }
+
+ pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp);
+
+ FREE(packed);
+}
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef P_TILE_H
+#define P_TILE_H
+
+#include "pipe/p_compiler.h"
+
+struct pipe_context;
+struct pipe_surface;
+
+
+/**
+ * Clip tile against surface dims.
+ * \return TRUE if tile is totally clipped, FALSE otherwise
+ */
+static INLINE boolean
+pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps)
+{
+ if (x >= ps->width)
+ return TRUE;
+ if (y >= ps->height)
+ return TRUE;
+ if (x + *w > ps->width)
+ *w = ps->width - x;
+ if (y + *h > ps->height)
+ *h = ps->height - y;
+ return FALSE;
+}
+
+
+extern void
+pipe_get_tile_raw(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ void *p, int dst_stride);
+
+extern void
+pipe_put_tile_raw(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ const void *p, int src_stride);
+
+
+extern void
+pipe_get_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ float *p);
+
+extern void
+pipe_put_tile_rgba(struct pipe_context *pipe,
+ struct pipe_surface *ps,
+ uint x, uint y, uint w, uint h,
+ const float *p);
+
+#endif
--- /dev/null
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * Miscellaneous utility functions.
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_util.h"
+
+
+/**
+ * Copy 2D rect from one place to another.
+ * Position and sizes are in pixels.
+ */
+void
+pipe_copy_rect(ubyte * dst,
+ unsigned cpp,
+ unsigned dst_pitch,
+ unsigned dst_x,
+ unsigned dst_y,
+ unsigned width,
+ unsigned height,
+ const ubyte * src,
+ int src_pitch,
+ unsigned src_x,
+ int src_y)
+{
+ unsigned i;
+
+ dst_pitch *= cpp;
+ src_pitch *= cpp;
+ dst += dst_x * cpp;
+ src += src_x * cpp;
+ dst += dst_y * dst_pitch;
+ src += src_y * src_pitch;
+ width *= cpp;
+
+ if (width == dst_pitch && width == src_pitch)
+ memcpy(dst, src, height * width);
+ else {
+ for (i = 0; i < height; i++) {
+ memcpy(dst, src, width);
+ dst += dst_pitch;
+ src += src_pitch;
+ }
+ }
+}