#include "util/u_math.h"
#include "util/u_memory.h"
-#include "util/u_simple_list.h"
-#include "os/os_time.h"
+#include "util/simple_list.h"
+#include "util/os_time.h"
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_bitarit.h"
#include "gallivm/lp_bld_const.h"
#include "lp_state_fs.h"
#include "lp_state_setup.h"
-/*
- * Set if the start point for interpolation should be calculated with a
- * more accurate method (barycentric interpolation).
- * Unfortunately, actual interpolation results of small tris with steep
- * gradients far away from the origin are still very busted, this does
- * nothing to change that (in fact it may make it worse), but some tests
- * (don't ask) really want accurate values at origin (and ONLY origin).
- */
-#define ACCURATE_A0 0
+
+/** Setup shader number (for debugging) */
+static unsigned setup_no = 0;
/* currently organized to interpolate full float[4] attributes even
LLVMValueRef dy01_ooa;
LLVMValueRef dx20_ooa;
LLVMValueRef dx01_ooa;
- LLVMValueRef e01o;
- LLVMValueRef e20o;
- LLVMValueRef e12o;
struct lp_build_context bld;
};
static void
store_coef(struct gallivm_state *gallivm,
- struct lp_setup_args *args,
- unsigned slot,
- LLVMValueRef a0,
- LLVMValueRef dadx,
- LLVMValueRef dady)
+ struct lp_setup_args *args,
+ unsigned slot,
+ LLVMValueRef a0,
+ LLVMValueRef dadx,
+ LLVMValueRef dady)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef idx = lp_build_const_int32(gallivm, slot);
LLVMBuildStore(builder,
- a0,
- LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
+ a0,
+ LLVMBuildGEP(builder, args->a0, &idx, 1, ""));
LLVMBuildStore(builder,
- dadx,
- LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
+ dadx,
+ LLVMBuildGEP(builder, args->dadx, &idx, 1, ""));
LLVMBuildStore(builder,
- dady,
- LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
+ dady,
+ LLVMBuildGEP(builder, args->dady, &idx, 1, ""));
}
LLVMValueRef a0_0 = args->facing;
LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, "");
LLVMValueRef a0, face_val;
- const unsigned char swizzles[4] = { PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ZERO,
- PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ZERO };
+ const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0,
+ PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 };
/* Our face val is either 1 or 0 so we do
* face = (val * 2) - 1
* to make it 1 or -1
LLVMValueRef a2)
{
LLVMBuilderRef b = gallivm->builder;
- bool accurate_a0 = ACCURATE_A0;
LLVMValueRef attr_0;
LLVMValueRef dy20_ooa = args->dy20_ooa;
LLVMValueRef dy01_ooa = args->dy01_ooa;
/* Calculate a0 - the attribute value at the origin
*/
- if (!accurate_a0) {
- LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
- LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
- LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
- attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
- }
- else {
- LLVMValueRef ao2 = LLVMBuildFMul(b, args->e01o, a2, "");
- LLVMValueRef ao1 = LLVMBuildFMul(b, args->e20o, a1, "");
- LLVMValueRef ao0 = LLVMBuildFMul(b, args->e12o, a0, "");
- attr_0 = LLVMBuildFAdd(b, ao0, ao1, "");
- attr_0 = LLVMBuildFAdd(b, attr_0, ao2, "");
- }
+ LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0");
+ LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0");
+ LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0");
+ attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0");
store_coef(gallivm, args, slot, attr_0, dadx, dady);
}
int i;
for(i = 0; i < nr_args; ++i)
if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
- LLVMAddAttribute(LLVMGetParam(function, i),
- LLVMNoAliasAttribute);
+ lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
}
static void
LLVMValueRef attr_pos[3];
struct lp_type typef4 = lp_type_float_vec(32, 128);
struct lp_build_context bld;
- bool accurate_a0 = ACCURATE_A0;
lp_build_context_init(&bld, gallivm, typef4);
args->bld = bld;
dxy20 = LLVMBuildFMul(b, dxy20, ooa, "");
dxy01 = LLVMBuildFMul(b, dxy01, ooa, "");
- if (accurate_a0) {
- LLVMValueRef xy1xy2, xy1xy2_center, dxy12, dyx01, dyx12yx20;
- LLVMValueRef p0, p1p2, tmp0, tmp1, shuf0145, shuf1054, shuf1u3u;
-
- shuffles[0] = zeroi;
- shuffles[1] = onei;
- shuffles[2] = lp_build_const_int32(gallivm, 4);
- shuffles[3] = lp_build_const_int32(gallivm, 5);
- shuf0145 = LLVMConstVector(shuffles, 4);
- shuffles[0] = onei;
- shuffles[1] = zeroi;
- shuffles[2] = lp_build_const_int32(gallivm, 5);
- shuffles[3] = lp_build_const_int32(gallivm, 4);
- shuf1054 = LLVMConstVector(shuffles, 4);
- shuffles[0] = onei;
- shuffles[1] = LLVMGetUndef(shuf_type);
- shuffles[2] = lp_build_const_int32(gallivm, 3);
- shuffles[3] = LLVMGetUndef(shuf_type);
- shuf1u3u = LLVMConstVector(shuffles, 4);
-
- xy1xy2 = LLVMBuildShuffleVector(b, attr_pos[1], attr_pos[2], shuf0145, "");
- xy1xy2_center = LLVMBuildFSub(b, xy1xy2, pixel_center, "");
- dxy12 = LLVMBuildFSub(b, attr_pos[1], attr_pos[2], "dxy12");
- dxy12 = LLVMBuildFMul(b, dxy12, ooa, "");
- dyx12yx20 = LLVMBuildShuffleVector(b, dxy12, dxy20, shuf1054, "dyx12yx20");
- dyx01 = LLVMBuildShuffleVector(b, dxy01, dxy01, shuf10, "");
- p0 = LLVMBuildFMul(b, dyx01, xy0_center, "");
- p1p2 = LLVMBuildFMul(b, dyx12yx20, xy1xy2_center, "");
- tmp0 = LLVMBuildExtractElement(b, p0, zeroi, "");
- tmp1 = LLVMBuildExtractElement(b, p0, onei, "");
- args->e01o = lp_build_broadcast_scalar(&bld, LLVMBuildFSub(b, tmp0, tmp1, "e01o"));
- tmp1 = LLVMBuildShuffleVector(b, p1p2, p1p2, shuf1u3u, "");
- tmp0 = LLVMBuildFSub(b, p1p2, tmp1, "e12o20o");
- args->e12o = lp_build_extract_broadcast(gallivm, typef4, typef4, tmp0, zeroi);
- args->e20o = lp_build_extract_broadcast(gallivm, typef4, typef4, tmp0,
- lp_build_const_int32(gallivm, 2));
- }
-
args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei);
args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei);
struct lp_setup_variant *variant = NULL;
struct gallivm_state *gallivm;
struct lp_setup_args args;
- char func_name[256];
+ char func_name[64];
LLVMTypeRef vec4f_type;
LLVMTypeRef func_type;
LLVMTypeRef arg_types[7];
goto fail;
variant = CALLOC_STRUCT(lp_setup_variant);
- if (variant == NULL)
+ if (!variant)
goto fail;
- variant->gallivm = gallivm = gallivm_create();
+ variant->no = setup_no++;
+
+ util_snprintf(func_name, sizeof(func_name), "setup_variant_%u",
+ variant->no);
+
+ variant->gallivm = gallivm = gallivm_create(func_name, lp->context);
if (!variant->gallivm) {
goto fail;
}
memcpy(&variant->key, key, key->size);
variant->list_item_global.base = variant;
- util_snprintf(func_name, sizeof(func_name), "fs%u_setup%u",
- 0, variant->no);
-
/* Currently always deal with full 4-wide vertex attributes from
* the vertices.
*/
arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */
func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
- arg_types, Elements(arg_types), 0);
+ arg_types, ARRAY_SIZE(arg_types), 0);
variant->function = LLVMAddFunction(gallivm->module, func_name, func_type);
if (!variant->function)
variant->function, "entry");
LLVMPositionBuilderAtEnd(builder, block);
- set_noalias(builder, variant->function, arg_types, Elements(arg_types));
+ set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types));
init_args(gallivm, &variant->key, &args);
emit_tri_coef(gallivm, &variant->key, &args);
if (!variant->jit_function)
goto fail;
+ gallivm_free_ir(variant->gallivm);
+
/*
* Update timing information:
*/
LP_COUNT_ADD(llvm_compile_time, t1 - t0);
LP_COUNT_ADD(nr_llvm_compiles, 1);
}
-
+
return variant;
fail:
if (variant) {
- if (variant->function) {
- gallivm_free_function(gallivm,
- variant->function,
- variant->jit_function);
- }
if (variant->gallivm) {
gallivm_destroy(variant->gallivm);
}
FREE(variant);
}
-
+
return NULL;
}
static void
lp_make_setup_variant_key(struct llvmpipe_context *lp,
- struct lp_setup_variant_key *key)
+ struct lp_setup_variant_key *key)
{
struct lp_fragment_shader *fs = lp->fs;
unsigned i;
assert(sizeof key->inputs[0] == sizeof(uint));
-
+
key->num_inputs = fs->info.base.num_inputs;
key->flatshade_first = lp->rasterizer->flatshade_first;
key->pixel_center_half = lp->rasterizer->half_pixel_center;
key->size = Offset(struct lp_setup_variant_key,
inputs[key->num_inputs]);
- key->color_slot = lp->color_slot [0];
+ key->color_slot = lp->color_slot[0];
key->bcolor_slot = lp->bcolor_slot[0];
- key->spec_slot = lp->color_slot [1];
- key->bspec_slot = lp->bcolor_slot[1];
- assert(key->color_slot == lp->color_slot [0]);
- assert(key->bcolor_slot == lp->bcolor_slot[0]);
- assert(key->spec_slot == lp->color_slot [1]);
- assert(key->bspec_slot == lp->bcolor_slot[1]);
+ key->spec_slot = lp->color_slot[1];
+ key->bspec_slot = lp->bcolor_slot[1];
/*
* If depth is floating point, depth bias is calculated with respect
for (i = 0; i < key->num_inputs; i++) {
if (key->inputs[i].interp == LP_INTERP_COLOR) {
if (lp->rasterizer->flatshade)
- key->inputs[i].interp = LP_INTERP_CONSTANT;
- else
- key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
+ key->inputs[i].interp = LP_INTERP_CONSTANT;
+ else
+ key->inputs[i].interp = LP_INTERP_PERSPECTIVE;
}
}
static void
remove_setup_variant(struct llvmpipe_context *lp,
- struct lp_setup_variant *variant)
+ struct lp_setup_variant *variant)
{
if (gallivm_debug & GALLIVM_DEBUG_IR) {
debug_printf("llvmpipe: del setup_variant #%u total %u\n",
- variant->no, lp->nr_setup_variants);
- }
-
- if (variant->function) {
- gallivm_free_function(variant->gallivm,
- variant->function,
- variant->jit_function);
+ variant->no, lp->nr_setup_variants);
}
if (variant->gallivm) {
foreach(li, &lp->setup_variants_list) {
if(li->base->key.size == key->size &&
- memcmp(&li->base->key, key, key->size) == 0) {
+ memcmp(&li->base->key, key, key->size) == 0) {
variant = li->base;
break;
}
}
else {
if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) {
- cull_setup_variants(lp);
+ cull_setup_variants(lp);
}
variant = generate_setup_variant(key, lp);
if (variant) {
insert_at_head(&lp->setup_variants_list, &variant->list_item_global);
lp->nr_setup_variants++;
- llvmpipe_variant_count++;
}
}
- lp_setup_set_setup_variant(lp->setup,
- variant);
+ lp_setup_set_setup_variant(lp->setup, variant);
}
void
}
void
-lp_dump_setup_coef( const struct lp_setup_variant_key *key,
- const float (*sa0)[4],
- const float (*sdadx)[4],
- const float (*sdady)[4])
+lp_dump_setup_coef(const struct lp_setup_variant_key *key,
+ const float (*sa0)[4],
+ const float (*sdadx)[4],
+ const float (*sdady)[4])
{
int i, slot;
float dady = sdady[0][i];
debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n",
- "xyzw"[i],
- a0, dadx, dady);
+ "xyzw"[i], a0, dadx, dady);
}
for (slot = 0; slot < key->num_inputs; slot++) {
unsigned usage_mask = key->inputs[slot].usage_mask;
for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
- if (usage_mask & (1 << i)) {
- float a0 = sa0 [1 + slot][i];
- float dadx = sdadx[1 + slot][i];
- float dady = sdady[1 + slot][i];
-
- debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
- slot,
- "xyzw"[i],
- a0, dadx, dady);
- }
+ if (usage_mask & (1 << i)) {
+ float a0 = sa0 [1 + slot][i];
+ float dadx = sdadx[1 + slot][i];
+ float dady = sdady[1 + slot][i];
+
+ debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n",
+ slot, "xyzw"[i], a0, dadx, dady);
+ }
}
}
}