/**************************************************************************
*
* Copyright 2009 VMware, Inc.
- * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2008 VMware, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_strings.h"
#include "lp_bld_tgsi_action.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_gather.h"
#include "lp_bld_init.h"
#include "lp_bld_logic.h"
+#include "lp_bld_misc.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
#include "lp_bld_quad.h"
#include "lp_bld_sample.h"
#include "lp_bld_struct.h"
+/* SM 4.0 says that subroutines can nest 32 deep and
+ * we need one more for our main function */
+#define LP_MAX_NUM_FUNCS 33
+
#define DUMP_GS_EMITS 0
-static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
+/*
+ * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
+ * instruction.
+ *
+ * TODO:
+ * - take execution masks in consideration
+ * - debug control-flow instructions
+ */
+#define DEBUG_EXECUTION 0
+
+
+/*
+ * Emit code to print a register value.
+ */
+static void
+emit_dump_reg(struct gallivm_state *gallivm,
+ unsigned file,
+ unsigned index,
+ unsigned chan,
+ LLVMValueRef value)
+{
+ char buf[32];
+
+ snprintf(buf, sizeof buf, " %s[%u].%c = ",
+ tgsi_file_name(file),
+ index, "xyzw"[chan]);
+
+ lp_build_print_value(gallivm, buf, value);
+}
+
+/*
+ * Return the context for the current function.
+ * (always 'main', if shader doesn't do any function calls)
+ */
+static inline struct function_ctx *
+func_ctx(struct lp_exec_mask *mask)
+{
+ assert(mask->function_stack_size > 0);
+ assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
+ return &mask->function_stack[mask->function_stack_size - 1];
+}
+
+/*
+ * Returns true if we're in a loop.
+ * It's global, meaning that it returns true even if there's
+ * no loop inside the current function, but we were inside
+ * a loop inside another function, from which this one was called.
+ */
+static inline boolean
+mask_has_loop(struct lp_exec_mask *mask)
+{
+ int i;
+ for (i = mask->function_stack_size - 1; i >= 0; --i) {
+ const struct function_ctx *ctx = &mask->function_stack[i];
+ if (ctx->loop_stack_size > 0)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * combine the execution mask if there is one with the current mask.
+ */
+static LLVMValueRef
+mask_vec(struct lp_build_tgsi_context *bld_base)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ struct lp_exec_mask *exec_mask = &bld->exec_mask;
+ LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
+ if (!exec_mask->has_mask) {
+ return bld_mask;
+ }
+ if (!bld_mask)
+ return exec_mask->exec_mask;
+ return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
+ exec_mask->exec_mask, "");
+}
+
+/*
+ * Returns true if we're inside a switch statement.
+ * It's global, meaning that it returns true even if there's
+ * no switch in the current function, but we were inside
+ * a switch inside another function, from which this one was called.
+ */
+static inline boolean
+mask_has_switch(struct lp_exec_mask *mask)
+{
+ int i;
+ for (i = mask->function_stack_size - 1; i >= 0; --i) {
+ const struct function_ctx *ctx = &mask->function_stack[i];
+ if (ctx->switch_stack_size > 0)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+/*
+ * Returns true if we're inside a conditional.
+ * It's global, meaning that it returns true even if there's
+ * no conditional in the current function, but we were inside
+ * a conditional inside another function, from which this one was called.
+ */
+static inline boolean
+mask_has_cond(struct lp_exec_mask *mask)
+{
+ int i;
+ for (i = mask->function_stack_size - 1; i >= 0; --i) {
+ const struct function_ctx *ctx = &mask->function_stack[i];
+ if (ctx->cond_stack_size > 0)
+ return TRUE;
+ }
+ return FALSE;
+}
+
+
+/*
+ * Initialize a function context at the specified index.
+ */
+static void
+lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
{
- LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
- LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
+ LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = &mask->function_stack[function_idx];
+
+ ctx->cond_stack_size = 0;
+ ctx->loop_stack_size = 0;
+ ctx->switch_stack_size = 0;
+
+ if (function_idx == 0) {
+ ctx->ret_mask = mask->ret_mask;
+ }
+
+ ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
+ int_type, "looplimiter");
+ LLVMBuildStore(
+ builder,
+ LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
+ ctx->loop_limiter);
+}
+static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
+{
mask->bld = bld;
mask->has_mask = FALSE;
mask->ret_in_main = FALSE;
- mask->cond_stack_size = 0;
- mask->loop_stack_size = 0;
- mask->call_stack_size = 0;
- mask->switch_stack_size = 0;
+ /* For the main function */
+ mask->function_stack_size = 1;
mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
mask->cond_mask = mask->switch_mask =
LLVMConstAllOnes(mask->int_vec_type);
- mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
+ mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
+ sizeof(mask->function_stack[0]));
+ lp_exec_mask_function_init(mask, 0);
+}
- LLVMBuildStore(
- builder,
- LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
- mask->loop_limiter);
+static void
+lp_exec_mask_fini(struct lp_exec_mask *mask)
+{
+ FREE(mask->function_stack);
}
static void lp_exec_mask_update(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ boolean has_loop_mask = mask_has_loop(mask);
+ boolean has_cond_mask = mask_has_cond(mask);
+ boolean has_switch_mask = mask_has_switch(mask);
+ boolean has_ret_mask = mask->function_stack_size > 1 ||
+ mask->ret_in_main;
- if (mask->loop_stack_size) {
+ if (has_loop_mask) {
/*for loops we need to update the entire mask at runtime */
LLVMValueRef tmp;
assert(mask->break_mask);
} else
mask->exec_mask = mask->cond_mask;
- if (mask->switch_stack_size) {
+ if (has_switch_mask) {
mask->exec_mask = LLVMBuildAnd(builder,
mask->exec_mask,
mask->switch_mask,
"switchmask");
}
- if (mask->call_stack_size || mask->ret_in_main) {
+ if (has_ret_mask) {
mask->exec_mask = LLVMBuildAnd(builder,
mask->exec_mask,
mask->ret_mask,
"callmask");
}
- mask->has_mask = (mask->cond_stack_size > 0 ||
- mask->loop_stack_size > 0 ||
- mask->call_stack_size > 0 ||
- mask->switch_stack_size > 0 ||
- mask->ret_in_main);
+ mask->has_mask = (has_cond_mask ||
+ has_loop_mask ||
+ has_switch_mask ||
+ has_ret_mask);
}
static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
LLVMValueRef val)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
- assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
- if (mask->cond_stack_size == 0) {
+ if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
+ ctx->cond_stack_size++;
+ return;
+ }
+ if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
}
- mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
+ ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
assert(LLVMTypeOf(val) == mask->int_vec_type);
mask->cond_mask = LLVMBuildAnd(builder,
mask->cond_mask,
static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
LLVMValueRef prev_mask;
LLVMValueRef inv_mask;
- assert(mask->cond_stack_size);
- prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
- if (mask->cond_stack_size == 1) {
+ assert(ctx->cond_stack_size);
+ if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
+ return;
+ prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
+ if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
}
static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
{
- assert(mask->cond_stack_size);
- mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
+ struct function_ctx *ctx = func_ctx(mask);
+ assert(ctx->cond_stack_size);
+ --ctx->cond_stack_size;
+ if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
+ return;
+ mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_bgnloop(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
- if (mask->loop_stack_size == 0) {
- assert(mask->loop_block == NULL);
- assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
- assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
- assert(mask->break_var == NULL);
+ if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
+ ++ctx->loop_stack_size;
+ return;
}
- assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
+ ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
+ ctx->break_type;
+ ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
- mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
- mask->break_type;
- mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
+ ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
+ ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
+ ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
+ ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
+ ++ctx->loop_stack_size;
- mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
- mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
- mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
- mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
- ++mask->loop_stack_size;
+ ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
+ LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
- mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
- LLVMBuildStore(builder, mask->break_mask, mask->break_var);
+ ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
- mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
+ LLVMBuildBr(builder, ctx->loop_block);
+ LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
- LLVMBuildBr(builder, mask->loop_block);
- LLVMPositionBuilderAtEnd(builder, mask->loop_block);
-
- mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
+ mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
lp_exec_mask_update(mask);
}
struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
- if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
+ if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
LLVMValueRef exec_mask = LLVMBuildNot(builder,
mask->exec_mask,
"break");
exec_mask, "break_full");
}
else {
- unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
+ enum tgsi_opcode opcode =
+ bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
opcode == TGSI_OPCODE_CASE);
- if (mask->switch_in_default) {
+ if (ctx->switch_in_default) {
/*
* stop default execution but only if this is an unconditional switch.
* (The condition here is not perfect since dead code after break is
* allowed but should be sufficient since false negatives are just
* unoptimized - so we don't have to pre-evaluate that).
*/
- if(break_always && mask->switch_pc) {
- bld_base->pc = mask->switch_pc;
+ if(break_always && ctx->switch_pc) {
+ bld_base->pc = ctx->switch_pc;
return;
}
}
lp_exec_mask_update(mask);
}
-static void lp_exec_break_condition(struct lp_exec_mask *mask,
- LLVMValueRef cond)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- LLVMValueRef cond_mask = LLVMBuildAnd(builder,
- mask->exec_mask,
- cond, "cond_mask");
- cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
-
- if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- cond_mask, "breakc_full");
- }
- else {
- mask->switch_mask = LLVMBuildAnd(builder,
- mask->switch_mask,
- cond_mask, "breakc_switch");
- }
-
- lp_exec_mask_update(mask);
-}
-
static void lp_exec_continue(struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
struct lp_exec_mask *mask)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
LLVMBasicBlockRef endloop;
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
assert(mask->break_mask);
+
+ assert(ctx->loop_stack_size);
+ if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
+ --ctx->loop_stack_size;
+ return;
+ }
+
/*
* Restore the cont_mask, but don't pop
*/
- assert(mask->loop_stack_size);
- mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
+ mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
lp_exec_mask_update(mask);
/*
* Unlike the continue mask, the break_mask must be preserved across loop
* iterations
*/
- LLVMBuildStore(builder, mask->break_mask, mask->break_var);
+ LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
/* Decrement the loop limiter */
- limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
+ limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
limiter = LLVMBuildSub(
builder,
LLVMConstInt(int_type, 1, false),
"");
- LLVMBuildStore(builder, limiter, mask->loop_limiter);
+ LLVMBuildStore(builder, limiter, ctx->loop_limiter);
/* i1cond = (mask != 0) */
i1cond = LLVMBuildICmp(
endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
LLVMBuildCondBr(builder,
- icond, mask->loop_block, endloop);
+ icond, ctx->loop_block, endloop);
LLVMPositionBuilderAtEnd(builder, endloop);
- assert(mask->loop_stack_size);
- --mask->loop_stack_size;
- mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
- mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
- mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
- mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
- mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
+ assert(ctx->loop_stack_size);
+ --ctx->loop_stack_size;
+ mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
+ mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
+ ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
+ ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
+ ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
+ ctx->switch_stack_size];
lp_exec_mask_update(mask);
}
static void lp_exec_switch(struct lp_exec_mask *mask,
LLVMValueRef switchval)
{
- mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
- mask->break_type;
- mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
+ struct function_ctx *ctx = func_ctx(mask);
- mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
- mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
- mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
- mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
- mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
- mask->switch_stack_size++;
+ if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
+ ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
+ ctx->switch_stack_size++;
+ return;
+ }
+
+ ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
+ ctx->break_type;
+ ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
+
+ ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
+ ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
+ ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
+ ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
+ ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
+ ctx->switch_stack_size++;
- mask->switch_val = switchval;
mask->switch_mask = LLVMConstNull(mask->int_vec_type);
- mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
- mask->switch_in_default = false;
- mask->switch_pc = 0;
+ ctx->switch_val = switchval;
+ ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
+ ctx->switch_in_default = false;
+ ctx->switch_pc = 0;
lp_exec_mask_update(mask);
}
struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
+
+ if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
+ ctx->switch_stack_size--;
+ return;
+ }
/* check if there's deferred default if so do it now */
- if (mask->switch_pc && !mask->switch_in_default) {
+ if (ctx->switch_pc && !ctx->switch_in_default) {
LLVMValueRef prevmask, defaultmask;
unsigned tmp_pc;
- prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
- defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
+ prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
+ defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
- mask->switch_in_default = true;
+ ctx->switch_in_default = true;
lp_exec_mask_update(mask);
- assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
+ assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
TGSI_OPCODE_DEFAULT);
tmp_pc = bld_base->pc;
- bld_base->pc = mask->switch_pc;
+ bld_base->pc = ctx->switch_pc;
/*
* re-purpose switch_pc to point to here again, since we stop execution of
* the deferred default after next break.
*/
- mask->switch_pc = tmp_pc - 1;
+ ctx->switch_pc = tmp_pc - 1;
return;
}
- else if (mask->switch_pc && mask->switch_in_default) {
- assert(bld_base->pc == mask->switch_pc + 1);
+ else if (ctx->switch_pc && ctx->switch_in_default) {
+ assert(bld_base->pc == ctx->switch_pc + 1);
}
- mask->switch_stack_size--;
- mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
- mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
- mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
- mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
- mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
+ ctx->switch_stack_size--;
+ mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
+ ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
+ ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
+ ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
+ ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
- mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
+ ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
lp_exec_mask_update(mask);
}
LLVMValueRef caseval)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
LLVMValueRef casemask, prevmask;
+ if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
+ return;
+ }
+
/* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
- if (!mask->switch_in_default) {
- prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
- casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
- mask->switch_mask_default = LLVMBuildOr(builder, casemask,
- mask->switch_mask_default, "sw_default_mask");
+ if (!ctx->switch_in_default) {
+ prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
+ casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
+ ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
+ ctx->switch_mask_default, "sw_default_mask");
casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
int *default_pc_start)
{
unsigned pc = bld_base->pc;
- unsigned curr_switch_stack = mask->switch_stack_size;
+ struct function_ctx *ctx = func_ctx(mask);
+ int curr_switch_stack = ctx->switch_stack_size;
+
+ if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
+ return false;
+ }
/* skip over case statements which are together with default */
while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
pc++;
}
- while (pc != -1 && pc < bld_base->num_instructions) {
- unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
+ while (pc != ~0u && pc < bld_base->num_instructions) {
+ enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
switch (opcode) {
case TGSI_OPCODE_CASE:
- if (curr_switch_stack == mask->switch_stack_size) {
+ if (curr_switch_stack == ctx->switch_stack_size) {
*default_pc_start = pc - 1;
return false;
}
curr_switch_stack++;
break;
case TGSI_OPCODE_ENDSWITCH:
- if (curr_switch_stack == mask->switch_stack_size) {
+ if (curr_switch_stack == ctx->switch_stack_size) {
*default_pc_start = pc - 1;
return true;
}
curr_switch_stack--;
break;
+ default:
+ ; /* nothing */
}
pc++;
}
struct lp_build_tgsi_context * bld_base)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
int default_exec_pc;
boolean default_is_last;
+ if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
+ return;
+ }
+
/*
* This is a messy opcode, because it may not be always at the end and
* there can be fallthrough in and out of it.
*/
if (default_is_last) {
LLVMValueRef prevmask, defaultmask;
- prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
- defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
+ prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
+ defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
- mask->switch_in_default = true;
+ ctx->switch_in_default = true;
lp_exec_mask_update(mask);
}
* which just gets rid of all case statements appearing together with
* default (or could do switch analysis at switch start time instead).
*/
- unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
- boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
+ enum tgsi_opcode opcode =
+ bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
+ boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
opcode != TGSI_OPCODE_SWITCH);
/*
* If it is not last statement and there was no fallthrough into it,
* do the same as with the former case, except instead of skipping the code
* just execute it without updating the mask, then go back and re-execute.
*/
- mask->switch_pc = bld_base->pc;
+ ctx->switch_pc = bld_base->pc;
if (!ft_into) {
bld_base->pc = default_exec_pc;
}
*/
static void lp_exec_mask_store(struct lp_exec_mask *mask,
struct lp_build_context *bld_store,
- LLVMValueRef pred,
LLVMValueRef val,
LLVMValueRef dst_ptr)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
assert(lp_check_value(bld_store->type, val));
assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
- assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
+ assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val) ||
+ LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(dst_ptr))) == LLVMArrayTypeKind);
- /* Mix the predicate and execution mask */
- if (mask->has_mask) {
- if (pred) {
- pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
- } else {
- pred = mask->exec_mask;
- }
- }
-
- if (pred) {
+ if (exec_mask) {
LLVMValueRef res, dst;
dst = LLVMBuildLoad(builder, dst_ptr, "");
- res = lp_build_select(bld_store, pred, val, dst);
+ res = lp_build_select(bld_store, exec_mask, val, dst);
LLVMBuildStore(builder, res, dst_ptr);
} else
LLVMBuildStore(builder, val, dst_ptr);
int func,
int *pc)
{
- assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
- mask->call_stack[mask->call_stack_size].pc = *pc;
- mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
- mask->call_stack_size++;
+ if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
+ return;
+ }
+
+ lp_exec_mask_function_init(mask, mask->function_stack_size);
+ mask->function_stack[mask->function_stack_size].pc = *pc;
+ mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
+ mask->function_stack_size++;
*pc = func;
}
static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
{
LLVMBuilderRef builder = mask->bld->gallivm->builder;
+ struct function_ctx *ctx = func_ctx(mask);
LLVMValueRef exec_mask;
- if (mask->cond_stack_size == 0 &&
- mask->loop_stack_size == 0 &&
- mask->switch_stack_size == 0 &&
- mask->call_stack_size == 0) {
+ if (ctx->cond_stack_size == 0 &&
+ ctx->loop_stack_size == 0 &&
+ ctx->switch_stack_size == 0 &&
+ mask->function_stack_size == 1) {
/* returning from main() */
*pc = -1;
return;
}
- if (mask->call_stack_size == 0) {
+ if (mask->function_stack_size == 1) {
/*
* This requires special handling since we need to ensure
* we don't drop the mask even if we have no call stack
static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
{
- assert(mask->call_stack_size);
- mask->call_stack_size--;
- *pc = mask->call_stack[mask->call_stack_size].pc;
- mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
+ struct function_ctx *ctx;
+
+ assert(mask->function_stack_size > 1);
+ assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
+
+ ctx = func_ctx(mask);
+ mask->function_stack_size--;
+
+ *pc = ctx->pc;
+ mask->ret_mask = ctx->ret_mask;
+
lp_exec_mask_update(mask);
}
+static LLVMValueRef
+get_file_ptr(struct lp_build_tgsi_soa_context *bld,
+ unsigned file,
+ int index,
+ unsigned chan)
+{
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
+ LLVMValueRef var_of_array;
+
+ switch (file) {
+ case TGSI_FILE_TEMPORARY:
+ array_of_vars = bld->temps;
+ var_of_array = bld->temps_array;
+ break;
+ case TGSI_FILE_OUTPUT:
+ array_of_vars = bld->outputs;
+ var_of_array = bld->outputs_array;
+ break;
+ default:
+ assert(0);
+ return NULL;
+ }
+
+ assert(chan < 4);
+
+ if (bld->indirect_files & (1 << file)) {
+ LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
+ if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
+ gep[1] = lindex;
+ return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
+ } else {
+ return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
+ }
+ }
+ else {
+ assert(index <= bld->bld_base.info->file_max[file]);
+ return array_of_vars[index][chan];
+ }
+}
+
+
/**
* Return pointer to a temporary register channel (src or dest).
* Note that indirect addressing cannot be handled here.
unsigned index,
unsigned chan)
{
- LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- assert(chan < 4);
- if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
- LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
- return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
- }
- else {
- return bld->temps[index][chan];
- }
+ return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
}
/**
unsigned index,
unsigned chan)
{
- LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- assert(chan < 4);
- if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
- LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
- index * 4 + chan);
- return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
- }
- else {
- return bld->outputs[index][chan];
- }
+ return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
}
/*
* with a little work.
*/
static LLVMValueRef
-build_gather(struct lp_build_context *bld,
+build_gather(struct lp_build_tgsi_context *bld_base,
LLVMValueRef base_ptr,
- LLVMValueRef indexes)
+ LLVMValueRef indexes,
+ LLVMValueRef overflow_mask,
+ LLVMValueRef indexes2)
{
- LLVMBuilderRef builder = bld->gallivm->builder;
- LLVMValueRef res = bld->undef;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ struct lp_build_context *bld = &bld_base->base;
+ LLVMValueRef res;
unsigned i;
+ if (indexes2)
+ res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+ else
+ res = bld->undef;
+ /*
+ * overflow_mask is a vector telling us which channels
+ * in the vector overflowed. We use the overflow behavior for
+ * constant buffers which is defined as:
+ * Out of bounds access to constant buffer returns 0 in all
+ * components. Out of bounds behavior is always with respect
+ * to the size of the buffer bound at that slot.
+ */
+
+ if (overflow_mask) {
+ /*
+ * We avoid per-element control flow here (also due to llvm going crazy,
+ * though I suspect it's better anyway since overflow is likely rare).
+ * Note that since we still fetch from buffers even if num_elements was
+ * zero (in this case we'll fetch from index zero) the jit func callers
+ * MUST provide valid fake constant buffers of size 4x32 (the values do
+ * not matter), otherwise we'd still need (not per element though)
+ * control flow.
+ */
+ indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
+ if (indexes2)
+ indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
+ }
+
/*
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
*/
- for (i = 0; i < bld->type.length; i++) {
- LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
- LLVMValueRef index = LLVMBuildExtractElement(builder,
- indexes, ii, "");
- LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
- &index, 1, "gather_ptr");
- LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
+ LLVMValueRef si, di;
+ LLVMValueRef index;
+ LLVMValueRef scalar_ptr, scalar;
+
+ di = lp_build_const_int32(bld->gallivm, i);
+ if (indexes2)
+ si = lp_build_const_int32(bld->gallivm, i >> 1);
+ else
+ si = di;
+
+ if (indexes2 && (i & 1)) {
+ index = LLVMBuildExtractElement(builder,
+ indexes2, si, "");
+ } else {
+ index = LLVMBuildExtractElement(builder,
+ indexes, si, "");
+ }
+ scalar_ptr = LLVMBuildGEP(builder, base_ptr,
+ &index, 1, "gather_ptr");
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+
+ res = LLVMBuildInsertElement(builder, res, scalar, di, "");
+ }
- res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
+ if (overflow_mask) {
+ if (indexes2) {
+ res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
+ overflow_mask = LLVMBuildSExt(builder, overflow_mask,
+ bld_base->dbl_bld.int_vec_type, "");
+ res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
+ bld_base->dbl_bld.zero, res);
+ } else
+ res = lp_build_select(bld, overflow_mask, bld->zero, res);
}
return res;
LLVMValueRef base_ptr,
LLVMValueRef indexes,
LLVMValueRef values,
- struct lp_exec_mask *mask,
- LLVMValueRef pred)
+ struct lp_exec_mask *mask)
{
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
unsigned i;
-
- /* Mix the predicate and execution mask */
- if (mask->has_mask) {
- if (pred) {
- pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
- }
- else {
- pred = mask->exec_mask;
- }
- }
+ LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
/*
* Loop over elements of index_vec, store scalar value.
static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context *bld,
unsigned reg_file, unsigned reg_index,
- const struct tgsi_ind_register *indirect_reg)
+ const struct tgsi_ind_register *indirect_reg,
+ int index_limit)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
index = lp_build_add(uint_bld, base, rel);
- max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
- uint_bld->type,
- bld->bld_base.info->file_max[reg_file]);
+ /*
+ * emit_fetch_constant handles constant buffer overflow so this code
+ * is pointless for them.
+ * Furthermore the D3D10 spec in section 6.5 says:
+ * If the constant buffer bound to a slot is larger than the size
+ * declared in the shader for that slot, implementations are allowed
+ * to return incorrect data (not necessarily 0) for indices that are
+ * larger than the declared size but smaller than the buffer size.
+ */
+ if (reg_file != TGSI_FILE_CONSTANT) {
+ assert(index_limit >= 0);
+ max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
+ uint_bld->type, index_limit);
- assert(!uint_bld->type.sign);
- index = lp_build_min(uint_bld, index, max_index);
+ assert(!uint_bld->type.sign);
+ index = lp_build_min(uint_bld, index, max_index);
+ }
return index;
}
case TGSI_TYPE_SIGNED:
bld_fetch = &bld_base->int_bld;
break;
- case TGSI_TYPE_VOID:
case TGSI_TYPE_DOUBLE:
+ bld_fetch = &bld_base->dbl_bld;
+ break;
+ case TGSI_TYPE_UNSIGNED64:
+ bld_fetch = &bld_base->uint64_bld;
+ break;
+ case TGSI_TYPE_SIGNED64:
+ bld_fetch = &bld_base->int64_bld;
+ break;
+ case TGSI_TYPE_VOID:
default:
assert(0);
bld_fetch = NULL;
return bld_fetch;
}
+static LLVMValueRef
+get_soa_array_offsets(struct lp_build_context *uint_bld,
+ LLVMValueRef indirect_index,
+ unsigned chan_index,
+ boolean need_perelement_offset)
+{
+ struct gallivm_state *gallivm = uint_bld->gallivm;
+ LLVMValueRef chan_vec =
+ lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
+ LLVMValueRef length_vec =
+ lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
+ LLVMValueRef index_vec;
+
+ /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
+ index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
+ index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
+
+ if (need_perelement_offset) {
+ LLVMValueRef pixel_offsets;
+ unsigned i;
+ /* build pixel offset vector: {0, 1, 2, 3, ...} */
+ pixel_offsets = uint_bld->undef;
+ for (i = 0; i < uint_bld->type.length; i++) {
+ LLVMValueRef ii = lp_build_const_int32(gallivm, i);
+ pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
+ ii, ii, "");
+ }
+ index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
+ }
+ return index_vec;
+}
+
static LLVMValueRef
emit_fetch_constant(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMValueRef indirect_index = NULL;
unsigned dimension = 0;
- LLVMValueRef dimension_index;
LLVMValueRef consts_ptr;
+ LLVMValueRef num_consts;
LLVMValueRef res;
+ unsigned swizzle = swizzle_in & 0xffff;
/* XXX: Handle fetching xyzw components as a vector */
- assert(swizzle != ~0);
+ assert(swizzle != ~0u);
if (reg->Register.Dimension) {
assert(!reg->Dimension.Indirect);
assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
}
- dimension_index = lp_build_const_int32(gallivm, dimension);
- consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
+ consts_ptr = bld->consts[dimension];
+ num_consts = bld->consts_sizes[dimension];
if (reg->Register.Indirect) {
+ LLVMValueRef indirect_index;
+ LLVMValueRef swizzle_vec =
+ lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
+ LLVMValueRef index_vec; /* index into the const buffer */
+ LLVMValueRef overflow_mask;
+ LLVMValueRef index_vec2 = NULL;
+
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
- }
-
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
- LLVMValueRef index_vec; /* index into the const buffer */
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
+
+ /* All fetches are from the same constant buffer, so
+ * we need to propagate the size to a vector to do a
+ * vector comparison */
+ num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
+ /* Construct a boolean vector telling us which channels
+ * overflow the bound constant buffer */
+ overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
+ indirect_index, num_consts);
/* index_vec = indirect_index * 4 + swizzle */
index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
+ if (tgsi_type_is_64bit(stype)) {
+ LLVMValueRef swizzle_vec2;
+ swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
+ index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
+ }
/* Gather values from the constant buffer */
- res = build_gather(&bld_base->base, consts_ptr, index_vec);
+ res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
}
else {
LLVMValueRef index; /* index into the const buffer */
LLVMValueRef scalar, scalar_ptr;
-
- index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
+ struct lp_build_context *bld_broad = &bld_base->base;
+ index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
&index, 1, "");
- scalar = LLVMBuildLoad(builder, scalar_ptr, "");
- res = lp_build_broadcast_scalar(&bld_base->base, scalar);
+
+ if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
+
+ LLVMValueRef scalar2, scalar2_ptr;
+ LLVMValueRef shuffles[2];
+ index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
+
+ scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
+ &index, 1, "");
+
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
+ shuffles[0] = lp_build_const_int32(gallivm, 0);
+ shuffles[1] = lp_build_const_int32(gallivm, 1);
+
+ res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+ res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
+ res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
+ } else {
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
+ bld_broad = &bld_base->dbl_bld;
+ } else if (stype == TGSI_TYPE_UNSIGNED64) {
+ LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
+ bld_broad = &bld_base->uint64_bld;
+ } else if (stype == TGSI_TYPE_SIGNED64) {
+ LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
+ bld_broad = &bld_base->int64_bld;
+ }
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ res = lp_build_broadcast_scalar(bld_broad, scalar);
+ }
+
}
- if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+ if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
+
return res;
}
+/**
+ * Fetch 64-bit values from two separate channels.
+ * 64-bit values are stored split across two channels, like xy and zw.
+ * This function creates a set of vec_length*2 floats,
+ * extracts the values from the two channels,
+ * puts them in the correct place, then casts to vec_length 64-bits.
+ */
static LLVMValueRef
-emit_fetch_immediate(
+emit_fetch_64bit(
struct lp_build_tgsi_context * bld_base,
- const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ LLVMValueRef input,
+ LLVMValueRef input2)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- struct lp_build_context *float_bld = &bld_base->base;
- LLVMValueRef res = NULL;
- LLVMValueRef indirect_index = NULL;
+ LLVMValueRef res;
+ struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
+ int i;
+ LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
+ int len = bld_base->base.type.length * 2;
+ assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
- if (reg->Register.Indirect) {
- indirect_index = get_indirect_index(bld,
- reg->Register.File,
- reg->Register.Index,
- ®->Indirect);
+ for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+ shuffles[i] = lp_build_const_int32(gallivm, i / 2);
+ shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
}
+ res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(bld->bld_base.base.gallivm,
- uint_bld->type, swizzle);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
- bld->bld_base.base.type.length);
- LLVMValueRef index_vec; /* index into the const buffer */
- LLVMValueRef imms_array;
- LLVMValueRef pixel_offsets;
- LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
- LLVMTypeRef float4_ptr_type;
- int i;
+ return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
+}
- /* build pixel offset vector: {0, 1, 2, 3, ...} */
- for (i = 0; i < float_bld->type.length; i++) {
- offsets[i] = lp_build_const_int32(gallivm, i);
- }
- pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
+static LLVMValueRef
+emit_fetch_immediate(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ enum tgsi_opcode_type stype,
+ unsigned swizzle_in)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef res = NULL;
+ unsigned swizzle = swizzle_in & 0xffff;
- /* index_vec = (indirect_index * 4 + swizzle) * length */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
- index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
+ if (bld->use_immediates_array || reg->Register.Indirect) {
+ LLVMValueRef imms_array;
+ LLVMTypeRef fptr_type;
/* cast imms_array pointer to float* */
- float4_ptr_type = LLVMPointerType(
- LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
- imms_array = LLVMBuildBitCast(builder, bld->imms_array,
- float4_ptr_type, "");
+ fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+ imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
- /* Gather values from the temporary register array */
- res = build_gather(&bld_base->base, imms_array, index_vec);
+ if (reg->Register.Indirect) {
+ LLVMValueRef indirect_index;
+ LLVMValueRef index_vec; /* index into the immediate register array */
+ LLVMValueRef index_vec2 = NULL;
+ indirect_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
+ /*
+ * Unlike for other reg classes, adding pixel offsets is unnecessary -
+ * immediates are stored as full vectors (FIXME??? - might be better
+ * to store them the same as constants) but all elements are the same
+ * in any case.
+ */
+ index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle,
+ FALSE);
+ if (tgsi_type_is_64bit(stype))
+ index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle_in >> 16,
+ FALSE);
+ /* Gather values from the immediate register array */
+ res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
+ } else {
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
+ gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
+ LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
+ bld->imms_array, gep, 2, "");
+ res = LLVMBuildLoad(builder, imms_ptr, "");
+
+ if (tgsi_type_is_64bit(stype)) {
+ LLVMValueRef imms_ptr2;
+ LLVMValueRef res2;
+ gep[1] = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + (swizzle_in >> 16));
+ imms_ptr2 = LLVMBuildGEP(builder,
+ bld->imms_array, gep, 2, "");
+ res2 = LLVMBuildLoad(builder, imms_ptr2, "");
+ res = emit_fetch_64bit(bld_base, stype, res, res2);
+ }
+ }
}
else {
res = bld->immediates[reg->Register.Index][swizzle];
+ if (tgsi_type_is_64bit(stype))
+ res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
}
- if (stype == TGSI_TYPE_UNSIGNED) {
- res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
- } else if (stype == TGSI_TYPE_SIGNED) {
- res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+ if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
+ struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
+ res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
return res;
}
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMValueRef indirect_index = NULL;
LLVMValueRef res;
+ unsigned swizzle = swizzle_in & 0xffff;
if (reg->Register.Indirect) {
+ LLVMValueRef indirect_index;
+ LLVMValueRef index_vec; /* index into the input reg array */
+ LLVMValueRef index_vec2 = NULL;
+ LLVMValueRef inputs_array;
+ LLVMTypeRef fptr_type;
+
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
- }
-
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
- LLVMValueRef index_vec; /* index into the const buffer */
- LLVMValueRef inputs_array;
- LLVMTypeRef float4_ptr_type;
-
- /* index_vec = (indirect_index * 4 + swizzle) * length */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
-
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
+
+ index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle,
+ TRUE);
+ if (tgsi_type_is_64bit(stype)) {
+ index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle_in >> 16,
+ TRUE);
+ }
/* cast inputs_array pointer to float* */
- float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
- inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
- float4_ptr_type, "");
+ fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+ inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
- /* Gather values from the temporary register array */
- res = build_gather(&bld_base->base, inputs_array, index_vec);
+ /* Gather values from the input register array */
+ res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
} else {
if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
reg->Register.Index * 4 + swizzle);
- LLVMValueRef input_ptr = LLVMBuildGEP(builder,
- bld->inputs_array, &lindex, 1, "");
+ LLVMValueRef input_ptr = LLVMBuildGEP(builder,
+ bld->inputs_array, &lindex, 1, "");
+
res = LLVMBuildLoad(builder, input_ptr, "");
+ if (tgsi_type_is_64bit(stype)) {
+ LLVMValueRef lindex1;
+ LLVMValueRef input_ptr2;
+ LLVMValueRef res2;
+
+ lindex1 = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + (swizzle_in >> 16));
+ input_ptr2 = LLVMBuildGEP(builder,
+ bld->inputs_array, &lindex1, 1, "");
+ res2 = LLVMBuildLoad(builder, input_ptr2, "");
+ res = emit_fetch_64bit(bld_base, stype, res, res2);
+ }
}
else {
res = bld->inputs[reg->Register.Index][swizzle];
+ if (tgsi_type_is_64bit(stype))
+ res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
}
}
assert(res);
- if (stype == TGSI_TYPE_UNSIGNED) {
- res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
- } else if (stype == TGSI_TYPE_SIGNED) {
- res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+ if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
+ struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
+ res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
return res;
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ const struct tgsi_shader_info *info = bld->bld_base.info;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef attrib_index = NULL;
LLVMValueRef vertex_index = NULL;
+ unsigned swizzle = swizzle_in & 0xffff;
LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
LLVMValueRef res;
+ if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
+ /* This is really a system value not a regular input */
+ assert(!reg->Register.Indirect);
+ assert(!reg->Dimension.Indirect);
+ res = bld->system_values.prim_id;
+ if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
+ res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
+ }
+ return res;
+ }
+
if (reg->Register.Indirect) {
+ /*
+ * XXX: this is possibly not quite the right value, since file_max may be
+ * larger than the max attrib index, due to it being the max of declared
+ * inputs AND the max vertices per prim (which is 6 for tri adj).
+ * It should however be safe to use (since we always allocate
+ * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
+ */
+ int index_limit = info->file_max[reg->Register.File];
attrib_index = get_indirect_index(bld,
- reg->Register.File,
- reg->Register.Index,
- ®->Indirect);
+ reg->Register.File,
+ reg->Register.Index,
+ ®->Indirect,
+ index_limit);
} else {
attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
}
-
+
if (reg->Dimension.Indirect) {
+ /*
+ * A fixed 6 should do as well (which is what we allocate).
+ */
+ int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
vertex_index = get_indirect_index(bld,
reg->Register.File,
reg->Dimension.Index,
- ®->DimIndirect);
+ ®->DimIndirect,
+ index_limit);
} else {
vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
}
res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
reg->Dimension.Indirect,
- vertex_index, attrib_index,
+ vertex_index,
+ reg->Register.Indirect,
+ attrib_index,
swizzle_index);
assert(res);
-
- if (stype == TGSI_TYPE_UNSIGNED) {
+ if (tgsi_type_is_64bit(stype)) {
+ LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
+ LLVMValueRef res2;
+ res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
+ reg->Dimension.Indirect,
+ vertex_index,
+ reg->Register.Indirect,
+ attrib_index,
+ swizzle_index);
+ assert(res2);
+ res = emit_fetch_64bit(bld_base, stype, res, res2);
+ } else if (stype == TGSI_TYPE_UNSIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
} else if (stype == TGSI_TYPE_SIGNED) {
res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- struct lp_build_context *float_bld = &bld_base->base;
- LLVMValueRef indirect_index = NULL;
LLVMValueRef res;
+ unsigned swizzle = swizzle_in & 0xffff;
if (reg->Register.Indirect) {
+ LLVMValueRef indirect_index;
+ LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
+ LLVMValueRef temps_array;
+ LLVMTypeRef fptr_type;
+
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
- }
-
- if (reg->Register.Indirect) {
- LLVMValueRef swizzle_vec =
- lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
- bld->bld_base.base.type.length);
- LLVMValueRef index_vec; /* index into the const buffer */
- LLVMValueRef temps_array;
- LLVMValueRef pixel_offsets;
- LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
- LLVMTypeRef float4_ptr_type;
- int i;
-
- /* build pixel offset vector: {0, 1, 2, 3, ...} */
- for (i = 0; i < float_bld->type.length; i++) {
- offsets[i] = lp_build_const_int32(gallivm, i);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
+
+ index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle,
+ TRUE);
+ if (tgsi_type_is_64bit(stype)) {
+ index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ swizzle_in >> 16,
+ TRUE);
}
- pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
-
- /* index_vec = (indirect_index * 4 + swizzle) * length */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
- index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
/* cast temps_array pointer to float* */
- float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
- temps_array = LLVMBuildBitCast(builder, bld->temps_array,
- float4_ptr_type, "");
+ fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+ temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
/* Gather values from the temporary register array */
- res = build_gather(&bld_base->base, temps_array, index_vec);
+ res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
}
else {
LLVMValueRef temp_ptr;
temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
res = LLVMBuildLoad(builder, temp_ptr, "");
+
+ if (tgsi_type_is_64bit(stype)) {
+ LLVMValueRef temp_ptr2, res2;
+
+ temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
+ res2 = LLVMBuildLoad(builder, temp_ptr2, "");
+ res = emit_fetch_64bit(bld_base, stype, res, res2);
+ }
}
- if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+ if (stype == TGSI_TYPE_SIGNED ||
+ stype == TGSI_TYPE_UNSIGNED ||
+ stype == TGSI_TYPE_DOUBLE ||
+ stype == TGSI_TYPE_SIGNED64 ||
+ stype == TGSI_TYPE_UNSIGNED64) {
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
}
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
atype = TGSI_TYPE_UNSIGNED;
break;
+ case TGSI_SEMANTIC_VERTEXID_NOBASE:
+ res = bld->system_values.vertex_id_nobase;
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
+ case TGSI_SEMANTIC_BASEVERTEX:
+ res = bld->system_values.basevertex;
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
case TGSI_SEMANTIC_PRIMID:
res = bld->system_values.prim_id;
atype = TGSI_TYPE_UNSIGNED;
break;
+ case TGSI_SEMANTIC_INVOCATIONID:
+ res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
default:
assert(!"unexpected semantic in emit_fetch_system_value");
res = bld_base->base.zero;
LLVMValueRef *ddx,
LLVMValueRef *ddy)
{
- if(res)
+ if (res)
*res = src;
/* TODO: use interpolation coeffs for inputs */
- if(ddx)
+ if (ddx)
*ddx = lp_build_ddx(&bld->bld_base.base, src);
- if(ddy)
+ if (ddy)
*ddy = lp_build_ddy(&bld->bld_base.base, src);
}
-
/**
- * Predicate.
+ * store an array of vec-length 64-bit into two arrays of vec_length floats
+ * i.e.
+ * value is d0, d1, d2, d3 etc.
+ * each 64-bit has high and low pieces x, y
+ * so gets stored into the separate channels as:
+ * chan_ptr = d0.x, d1.x, d2.x, d3.x
+ * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
*/
static void
-emit_fetch_predicate(
- struct lp_build_tgsi_soa_context *bld,
- const struct tgsi_full_instruction *inst,
- LLVMValueRef *pred)
+emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
+ LLVMValueRef value)
{
- LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- unsigned index;
- unsigned char swizzles[4];
- LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
- LLVMValueRef value;
- unsigned chan;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *float_bld = &bld_base->base;
+ unsigned i;
+ LLVMValueRef temp, temp2;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
- if (!inst->Instruction.Predicate) {
- TGSI_FOR_EACH_CHANNEL( chan ) {
- pred[chan] = NULL;
- }
- return;
+ for (i = 0; i < bld_base->base.type.length; i++) {
+ shuffles[i] = lp_build_const_int32(gallivm, i * 2);
+ shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
}
- swizzles[0] = inst->Predicate.SwizzleX;
- swizzles[1] = inst->Predicate.SwizzleY;
- swizzles[2] = inst->Predicate.SwizzleZ;
- swizzles[3] = inst->Predicate.SwizzleW;
-
- index = inst->Predicate.Index;
- assert(index < LP_MAX_TGSI_PREDS);
-
- TGSI_FOR_EACH_CHANNEL( chan ) {
- unsigned swizzle = swizzles[chan];
-
- /*
- * Only fetch the predicate register channels that are actually listed
- * in the swizzles
- */
- if (!unswizzled[swizzle]) {
- value = LLVMBuildLoad(builder,
- bld->preds[index][swizzle], "");
-
- /*
- * Convert the value to an integer mask.
- *
- * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
- * is needlessly causing two comparisons due to storing the intermediate
- * result as float vector instead of an integer mask vector.
- */
- value = lp_build_compare(bld->bld_base.base.gallivm,
- bld->bld_base.base.type,
- PIPE_FUNC_NOTEQUAL,
- value,
- bld->bld_base.base.zero);
- if (inst->Predicate.Negate) {
- value = LLVMBuildNot(builder, value, "");
- }
-
- unswizzled[swizzle] = value;
- } else {
- value = unswizzled[swizzle];
- }
+ temp = LLVMBuildShuffleVector(builder, value,
+ LLVMGetUndef(LLVMTypeOf(value)),
+ LLVMConstVector(shuffles,
+ bld_base->base.type.length),
+ "");
+ temp2 = LLVMBuildShuffleVector(builder, value,
+ LLVMGetUndef(LLVMTypeOf(value)),
+ LLVMConstVector(shuffles2,
+ bld_base->base.type.length),
+ "");
- pred[chan] = value;
- }
+ lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
+ lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
}
/**
const struct tgsi_full_instruction *inst,
unsigned index,
unsigned chan_index,
- LLVMValueRef pred,
LLVMValueRef value)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
const struct tgsi_full_dst_register *reg = &inst->Dst[index];
struct lp_build_context *float_bld = &bld_base->base;
struct lp_build_context *int_bld = &bld_base->int_bld;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
LLVMValueRef indirect_index = NULL;
- enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
/*
* Apply saturation.
*
* It is always assumed to be float.
*/
- switch( inst->Instruction.Saturate ) {
- case TGSI_SAT_NONE:
- break;
-
- case TGSI_SAT_ZERO_ONE:
+ if (inst->Instruction.Saturate) {
assert(dtype == TGSI_TYPE_FLOAT ||
dtype == TGSI_TYPE_UNTYPED);
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
- value = lp_build_max_ext(float_bld, value, float_bld->zero,
- GALLIVM_NAN_RETURN_SECOND);
- value = lp_build_min_ext(float_bld, value, float_bld->one,
- GALLIVM_NAN_BEHAVIOR_UNDEFINED);
- break;
-
- case TGSI_SAT_MINUS_PLUS_ONE:
- assert(dtype == TGSI_TYPE_FLOAT ||
- dtype == TGSI_TYPE_UNTYPED);
- value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
- value = lp_build_max_ext(float_bld, value,
- lp_build_const_vec(gallivm, float_bld->type, -1.0),
- GALLIVM_NAN_RETURN_SECOND);
- value = lp_build_min_ext(float_bld, value, float_bld->one,
- GALLIVM_NAN_BEHAVIOR_UNDEFINED);
- break;
-
- default:
- assert(0);
+ value = lp_build_clamp_zero_one_nanzero(float_bld, value);
}
if (reg->Register.Indirect) {
+ /*
+ * Currently the mesa/st doesn't generate indirect stores
+ * to 64-bit values, it normally uses MOV to do indirect stores.
+ */
+ assert(!tgsi_type_is_64bit(dtype));
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
} else {
assert(reg->Register.Index <=
bld_base->info->file_max[reg->Register.File]);
}
+ if (DEBUG_EXECUTION) {
+ emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
+ }
+
switch( reg->Register.File ) {
case TGSI_FILE_OUTPUT:
/* Outputs are always stored as floats */
value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
if (reg->Register.Indirect) {
- LLVMValueRef chan_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, float_bld->type.length);
- LLVMValueRef index_vec; /* indexes into the temp registers */
+ LLVMValueRef index_vec; /* indexes into the output registers */
LLVMValueRef outputs_array;
- LLVMValueRef pixel_offsets;
- LLVMTypeRef float_ptr_type;
- int i;
-
- /* build pixel offset vector: {0, 1, 2, 3, ...} */
- pixel_offsets = uint_bld->undef;
- for (i = 0; i < float_bld->type.length; i++) {
- LLVMValueRef ii = lp_build_const_int32(gallivm, i);
- pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
- ii, ii, "");
- }
+ LLVMTypeRef fptr_type;
- /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
- index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
+ index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ chan_index,
+ TRUE);
- float_ptr_type =
- LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
- outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
- float_ptr_type, "");
+ fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+ outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
- /* Scatter store values into temp registers */
+ /* Scatter store values into output registers */
emit_mask_scatter(bld, outputs_array, index_vec, value,
- &bld->exec_mask, pred);
+ &bld->exec_mask);
}
else {
LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
chan_index);
- lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
+
+ if (tgsi_type_is_64bit(dtype)) {
+ LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
+ chan_index + 1);
+ emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
+ value);
+ } else
+ lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
}
break;
case TGSI_FILE_TEMPORARY:
/* Temporaries are always stored as floats */
- value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+ if (!tgsi_type_is_64bit(dtype))
+ value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+ else
+ value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
if (reg->Register.Indirect) {
- LLVMValueRef chan_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
- LLVMValueRef length_vec =
- lp_build_const_int_vec(gallivm, uint_bld->type,
- float_bld->type.length);
LLVMValueRef index_vec; /* indexes into the temp registers */
LLVMValueRef temps_array;
- LLVMValueRef pixel_offsets;
- LLVMTypeRef float_ptr_type;
- int i;
-
- /* build pixel offset vector: {0, 1, 2, 3, ...} */
- pixel_offsets = uint_bld->undef;
- for (i = 0; i < float_bld->type.length; i++) {
- LLVMValueRef ii = lp_build_const_int32(gallivm, i);
- pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
- ii, ii, "");
- }
+ LLVMTypeRef fptr_type;
- /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
- index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
- index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
- index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
- index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
+ index_vec = get_soa_array_offsets(&bld_base->uint_bld,
+ indirect_index,
+ chan_index,
+ TRUE);
- float_ptr_type =
- LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
- temps_array = LLVMBuildBitCast(builder, bld->temps_array,
- float_ptr_type, "");
+ fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
+ temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
/* Scatter store values into temp registers */
emit_mask_scatter(bld, temps_array, index_vec, value,
- &bld->exec_mask, pred);
+ &bld->exec_mask);
}
else {
LLVMValueRef temp_ptr;
- temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
- chan_index);
- lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
+ temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
+
+ if (tgsi_type_is_64bit(dtype)) {
+ LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
+ reg->Register.Index,
+ chan_index + 1);
+ emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
+ value);
+ }
+ else
+ lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
}
break;
assert(dtype == TGSI_TYPE_SIGNED);
assert(LLVMTypeOf(value) == int_bld->vec_type);
value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
- lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
+ lp_exec_mask_store(&bld->exec_mask, int_bld, value,
bld->addr[reg->Register.Index][chan_index]);
break;
- case TGSI_FILE_PREDICATE:
- assert(LLVMTypeOf(value) == float_bld->vec_type);
- value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
- lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
- bld->preds[reg->Register.Index][chan_index]);
- break;
-
default:
assert( 0 );
}
(void)dtype;
}
+/*
+ * Called at the beginning of the translation of each TGSI instruction, to
+ * emit some debug code.
+ */
static void
-emit_store(
+emit_debug(
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_instruction * inst,
- const struct tgsi_opcode_info * info,
- LLVMValueRef dst[4])
+ const struct tgsi_opcode_info * info)
{
- unsigned chan_index;
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- if(info->num_dst) {
- LLVMValueRef pred[TGSI_NUM_CHANNELS];
+ if (DEBUG_EXECUTION) {
+ /*
+ * Dump the TGSI instruction.
+ */
- emit_fetch_predicate( bld, inst, pred );
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ char buf[512];
+ buf[0] = '$';
+ buf[1] = ' ';
+ tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
+ lp_build_printf(gallivm, buf);
- TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
- emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
+ /* Dump the execution mask.
+ */
+ if (bld->exec_mask.has_mask) {
+ lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
}
}
}
+static void
+emit_store(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_instruction * inst,
+ const struct tgsi_opcode_info * info,
+ unsigned index,
+ LLVMValueRef dst[4])
+
+{
+ enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
+
+ unsigned writemask = inst->Dst[index].Register.WriteMask;
+ while (writemask) {
+ unsigned chan_index = u_bit_scan(&writemask);
+ if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
+ continue;
+ emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
+ }
+}
+
static unsigned
tgsi_to_pipe_tex_target(unsigned tgsi_target)
{
}
}
+
+static enum lp_sampler_lod_property
+lp_build_lod_property(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_instruction *inst,
+ unsigned src_op)
+{
+ const struct tgsi_full_src_register *reg = &inst->Src[src_op];
+ enum lp_sampler_lod_property lod_property;
+
+ /*
+ * Not much we can do here. We could try catching inputs declared
+ * with constant interpolation but not sure it's worth it - since for
+ * TEX opcodes as well as FETCH/LD the lod comes from same reg as
+ * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
+ * like the constant/immediate recognition below.
+ * What seems to be of more value would be to recognize temps holding
+ * broadcasted scalars but no way we can do it.
+ * Tried asking llvm but without any success (using LLVMIsConstant
+ * even though this isn't exactly what we'd need), even as simple as
+ * IMM[0] UINT32 (0,-1,0,0)
+ * MOV TEMP[0] IMM[0].yyyy
+ * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
+ * doesn't work.
+ * This means there's ZERO chance this will ever catch a scalar lod
+ * with traditional tex opcodes as well as texel fetches, since the lod
+ * comes from the same reg as coords (except some test shaders using
+ * constant coords maybe).
+ * There's at least hope for sample opcodes as well as size queries.
+ */
+ if (reg->Register.File == TGSI_FILE_CONSTANT ||
+ reg->Register.File == TGSI_FILE_IMMEDIATE) {
+ lod_property = LP_SAMPLER_LOD_SCALAR;
+ }
+ else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
+ lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+ }
+ else {
+ lod_property = LP_SAMPLER_LOD_PER_QUAD;
+ }
+ }
+ else {
+ /* never use scalar (per-quad) lod the results are just too wrong. */
+ lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+ }
+ return lod_property;
+}
+
+
/**
* High-level instruction translators.
*/
emit_tex( struct lp_build_tgsi_soa_context *bld,
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
- LLVMValueRef *texel)
+ LLVMValueRef *texel,
+ unsigned sampler_reg,
+ enum lp_sampler_op_type sampler_op)
{
- unsigned unit;
- LLVMValueRef lod_bias, explicit_lod;
+ unsigned unit = inst->Src[sampler_reg].Register.Index;
LLVMValueRef oow = NULL;
- LLVMValueRef coords[4];
+ LLVMValueRef lod = NULL;
+ LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
- struct lp_derivatives *deriv_ptr = NULL;
- boolean scalar_lod;
- unsigned num_coords, num_derivs, num_offsets;
- unsigned i;
+ struct lp_sampler_params params;
+ enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
+ unsigned num_derivs, num_offsets, i;
+ unsigned shadow_coord = 0;
+ unsigned layer_coord = 0;
+ unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
+
+ memset(¶ms, 0, sizeof(params));
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
}
switch (inst->Texture.Texture) {
- case TGSI_TEXTURE_1D:
- num_coords = 1;
- num_offsets = 1;
- num_derivs = 1;
- break;
case TGSI_TEXTURE_1D_ARRAY:
- num_coords = 2;
+ layer_coord = 1;
+ /* fallthrough */
+ case TGSI_TEXTURE_1D:
num_offsets = 1;
num_derivs = 1;
break;
+ case TGSI_TEXTURE_2D_ARRAY:
+ layer_coord = 2;
+ /* fallthrough */
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
- num_coords = 2;
num_offsets = 2;
num_derivs = 2;
break;
- case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
- num_coords = 3;
+ layer_coord = 1;
+ /* fallthrough */
+ case TGSI_TEXTURE_SHADOW1D:
+ shadow_coord = 2;
num_offsets = 1;
num_derivs = 1;
break;
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ layer_coord = 2;
+ shadow_coord = 3;
+ num_offsets = 2;
+ num_derivs = 2;
+ break;
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_SHADOWRECT:
- case TGSI_TEXTURE_2D_ARRAY:
- num_coords = 3;
+ shadow_coord = 2;
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_CUBE:
- num_coords = 3;
num_offsets = 2;
num_derivs = 3;
break;
case TGSI_TEXTURE_3D:
- num_coords = 3;
num_offsets = 3;
num_derivs = 3;
break;
- case TGSI_TEXTURE_SHADOW2D_ARRAY:
- num_coords = 4;
+ case TGSI_TEXTURE_SHADOWCUBE:
+ shadow_coord = 3;
num_offsets = 2;
- num_derivs = 2;
+ num_derivs = 3;
break;
- case TGSI_TEXTURE_SHADOWCUBE:
- num_coords = 4;
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ num_offsets = 2;
+ num_derivs = 3;
+ layer_coord = 3;
+ break;
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
num_offsets = 2;
num_derivs = 3;
+ layer_coord = 3;
+ shadow_coord = 4; /* shadow coord special different reg */
break;
+ case TGSI_TEXTURE_2D_MSAA:
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
default:
assert(0);
return;
}
/* Note lod and especially projected are illegal in a LOT of cases */
- if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
- assert(num_coords < 4);
- lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
- explicit_lod = NULL;
- }
- else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
- assert(num_coords < 4);
- lod_bias = NULL;
- explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
- }
- else {
- lod_bias = NULL;
- explicit_lod = NULL;
+ if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+ modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
+ /* note that shadow cube array with bias/explicit lod does not exist */
+ lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
+ }
+ else {
+ lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
+ }
+ if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
+ sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
+ }
+ else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
+ }
+ lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
- assert(num_coords < 4);
- oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
+ oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
oow = lp_build_rcp(&bld->bld_base.base, oow);
}
- for (i = 0; i < num_coords; i++) {
- coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
+ for (i = 0; i < num_derivs; i++) {
+ coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
}
- for (i = num_coords; i < 4; i++) {
+ for (i = num_derivs; i < 5; i++) {
coords[i] = bld->bld_base.base.undef;
}
+ /* Layer coord always goes into 3rd slot, except for cube map arrays */
+ if (layer_coord) {
+ if (layer_coord == 3) {
+ coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
+ }
+ else {
+ coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
+ }
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
+ coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
+ }
+ /* Shadow coord occupies always 5th slot. */
+ if (shadow_coord) {
+ sample_key |= LP_SAMPLER_SHADOW;
+ if (shadow_coord == 4) {
+ coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
+ }
+ else {
+ coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
+ }
+ if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
+ coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
+ }
+
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
unsigned dim;
+ sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
for (dim = 0; dim < num_derivs; ++dim) {
- derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
- derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
+ derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
+ derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
+ }
+ params.derivs = &derivs;
+ /*
+ * could also check all src regs if constant but I doubt such
+ * cases exist in practice.
+ */
+ if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
+ lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+ }
+ else {
+ lod_property = LP_SAMPLER_LOD_PER_QUAD;
+ }
+ }
+ else {
+ lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
- deriv_ptr = &derivs;
- unit = inst->Src[3].Register.Index;
- } else {
- unit = inst->Src[1].Register.Index;
}
+ sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
- /* some advanced gather instructions (txgo) would require 4 offsets */
+ /* we don't handle the 4 offset version of tg4 */
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
+ sample_key |= LP_SAMPLER_OFFSETS;
for (dim = 0; dim < num_offsets; dim++) {
- offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
+ offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
}
}
- /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
- scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
- bld->sampler->emit_fetch_texel(bld->sampler,
- bld->bld_base.base.gallivm,
- bld->bld_base.base.type,
- FALSE,
- unit, unit,
- coords,
- offsets,
- deriv_ptr,
- lod_bias, explicit_lod, scalar_lod,
- texel);
+ params.type = bld->bld_base.base.type;
+ params.sample_key = sample_key;
+ params.texture_index = unit;
+ params.sampler_index = unit;
+ params.context_ptr = bld->context_ptr;
+ params.thread_data_ptr = bld->thread_data_ptr;
+ params.coords = coords;
+ params.offsets = offsets;
+ params.lod = lod;
+ params.texel = texel;
+
+ bld->sampler->emit_tex_sample(bld->sampler,
+ bld->bld_base.base.gallivm,
+ ¶ms);
}
static void
const struct tgsi_full_instruction *inst,
enum lp_build_tex_modifier modifier,
boolean compare,
+ enum lp_sampler_op_type sample_type,
LLVMValueRef *texel)
{
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
unsigned texture_unit, sampler_unit;
- LLVMValueRef lod_bias, explicit_lod;
- LLVMValueRef coords[4];
+ LLVMValueRef lod = NULL;
+ LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
struct lp_derivatives derivs;
- struct lp_derivatives *deriv_ptr = NULL;
- boolean scalar_lod;
- unsigned num_coords, num_offsets, num_derivs;
- unsigned i;
+ struct lp_sampler_params params;
+ enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
+
+ unsigned num_offsets, num_derivs, i;
+ unsigned layer_coord = 0;
+ unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
+
+ memset(¶ms, 0, sizeof(params));
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
*/
switch (bld->sv[texture_unit].Resource) {
case TGSI_TEXTURE_1D:
- num_coords = 1;
num_offsets = 1;
num_derivs = 1;
break;
case TGSI_TEXTURE_1D_ARRAY:
- num_coords = 2;
+ layer_coord = 1;
num_offsets = 1;
num_derivs = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
- num_coords = 2;
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_2D_ARRAY:
- num_coords = 3;
+ layer_coord = 2;
num_offsets = 2;
num_derivs = 2;
break;
case TGSI_TEXTURE_CUBE:
- num_coords = 3;
num_offsets = 2;
num_derivs = 3;
break;
case TGSI_TEXTURE_3D:
- num_coords = 3;
num_offsets = 3;
num_derivs = 3;
break;
case TGSI_TEXTURE_CUBE_ARRAY:
- num_coords = 4;
+ layer_coord = 3;
num_offsets = 2;
num_derivs = 3;
break;
return;
}
- if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
- lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
- explicit_lod = NULL;
- }
- else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
- lod_bias = NULL;
- explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
+ if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
+ modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
+ if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
+ sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
+ }
+ else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
+ sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
+ }
+ lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
- lod_bias = NULL;
/* XXX might be better to explicitly pass the level zero information */
- explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
- }
- else {
- lod_bias = NULL;
- explicit_lod = NULL;
+ sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
+ lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
}
- for (i = 0; i < num_coords; i++) {
- coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
+ for (i = 0; i < num_derivs; i++) {
+ coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
}
- for (i = num_coords; i < 4; i++) {
+ for (i = num_derivs; i < 5; i++) {
coords[i] = bld->bld_base.base.undef;
}
- /*
- * XXX: whack shadow comparison value into place.
- * Should probably fix the interface for separate value
- * (it will not work for cube arrays if it is part of coords).
- */
+
+ /* Layer coord always goes into 3rd slot, except for cube map arrays */
+ if (layer_coord) {
+ if (layer_coord == 3)
+ coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
+ else
+ coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
+ }
+ /* Shadow coord occupies always 5th slot. */
if (compare) {
- unsigned c_coord = num_coords > 2 ? 3 : 2;
- assert(num_coords < 4);
- coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
+ sample_key |= LP_SAMPLER_SHADOW;
+ coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
}
if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
unsigned dim;
+ sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
for (dim = 0; dim < num_derivs; ++dim) {
- derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
- derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
+ derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
+ derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
+ }
+ params.derivs = &derivs;
+ /*
+ * could also check all src regs if constant but I doubt such
+ * cases exist in practice.
+ */
+ if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
+ lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
+ }
+ else {
+ lod_property = LP_SAMPLER_LOD_PER_QUAD;
+ }
+ }
+ else {
+ lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
- deriv_ptr = &derivs;
}
/* some advanced gather instructions (txgo) would require 4 offsets */
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
+ sample_key |= LP_SAMPLER_OFFSETS;
for (dim = 0; dim < num_offsets; dim++) {
- offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
+ offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
}
}
+ sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
+
+ params.type = bld->bld_base.base.type;
+ params.sample_key = sample_key;
+ params.texture_index = texture_unit;
+ params.sampler_index = sampler_unit;
+ params.context_ptr = bld->context_ptr;
+ params.thread_data_ptr = bld->thread_data_ptr;
+ params.coords = coords;
+ params.offsets = offsets;
+ params.lod = lod;
+ params.texel = texel;
+
+ bld->sampler->emit_tex_sample(bld->sampler,
+ bld->bld_base.base.gallivm,
+ ¶ms);
- /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
- scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
- bld->sampler->emit_fetch_texel(bld->sampler,
- bld->bld_base.base.gallivm,
- bld->bld_base.base.type,
- FALSE,
- texture_unit, sampler_unit,
- coords,
- offsets,
- deriv_ptr,
- lod_bias, explicit_lod, scalar_lod,
- texel);
-
- if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
- inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
- inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
- inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
+ if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
+ inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
+ inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
+ inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
unsigned unit, target;
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
LLVMValueRef explicit_lod = NULL;
- LLVMValueRef coords[3];
+ LLVMValueRef coords[5];
LLVMValueRef offsets[3] = { NULL };
- boolean scalar_lod;
- unsigned num_coords;
- unsigned dims;
- unsigned i;
+ struct lp_sampler_params params;
+ enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
+ unsigned dims, i;
+ unsigned layer_coord = 0;
+ unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
+
+ memset(¶ms, 0, sizeof(params));
if (!bld->sampler) {
_debug_printf("warning: found texture instruction but no sampler generator supplied\n");
switch (target) {
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_BUFFER:
- num_coords = 1;
dims = 1;
break;
case TGSI_TEXTURE_1D_ARRAY:
- num_coords = 2;
+ layer_coord = 1;
dims = 1;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_RECT:
- num_coords = 2;
+ case TGSI_TEXTURE_2D_MSAA:
dims = 2;
break;
case TGSI_TEXTURE_2D_ARRAY:
- num_coords = 3;
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ layer_coord = 2;
dims = 2;
break;
case TGSI_TEXTURE_3D:
- num_coords = 3;
dims = 3;
break;
default:
return;
}
- /* always have lod except for buffers ? */
- if (target != TGSI_TEXTURE_BUFFER) {
- explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
+ /* always have lod except for buffers and msaa targets ? */
+ if (target != TGSI_TEXTURE_BUFFER &&
+ target != TGSI_TEXTURE_2D_MSAA &&
+ target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
+ explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
+ lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
+ /*
+ * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
+ * would be the sample index.
+ */
- for (i = 0; i < num_coords; i++) {
- coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
+ for (i = 0; i < dims; i++) {
+ coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
}
- for (i = num_coords; i < 3; i++) {
+ /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
+ for (i = dims; i < 5; i++) {
coords[i] = coord_undef;
}
+ if (layer_coord)
+ coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
if (inst->Texture.NumOffsets == 1) {
unsigned dim;
+ sample_key |= LP_SAMPLER_OFFSETS;
for (dim = 0; dim < dims; dim++) {
- offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
+ offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
}
}
+ sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
- /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
- scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
-
- bld->sampler->emit_fetch_texel(bld->sampler,
- bld->bld_base.base.gallivm,
- bld->bld_base.base.type,
- TRUE,
- unit, unit,
- coords,
- offsets,
- NULL,
- NULL, explicit_lod, scalar_lod,
- texel);
+ params.type = bld->bld_base.base.type;
+ params.sample_key = sample_key;
+ params.texture_index = unit;
+ /*
+ * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
+ * and trigger some assertions with d3d10 where the sampler view number
+ * can exceed this.
+ */
+ params.sampler_index = 0;
+ params.context_ptr = bld->context_ptr;
+ params.thread_data_ptr = bld->thread_data_ptr;
+ params.coords = coords;
+ params.offsets = offsets;
+ params.derivs = NULL;
+ params.lod = explicit_lod;
+ params.texel = texel;
+
+ bld->sampler->emit_tex_sample(bld->sampler,
+ bld->bld_base.base.gallivm,
+ ¶ms);
if (is_samplei &&
- (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
- inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
- inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
- inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
+ (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
+ inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
+ inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
+ inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
unsigned char swizzles[4];
swizzles[0] = inst->Src[1].Register.SwizzleX;
swizzles[1] = inst->Src[1].Register.SwizzleY;
boolean is_sviewinfo)
{
LLVMValueRef explicit_lod;
- boolean scalar_lod;
+ enum lp_sampler_lod_property lod_property;
unsigned has_lod;
unsigned i;
unsigned unit = inst->Src[1].Register.Index;
unsigned target, pipe_target;
+ struct lp_sampler_size_query_params params;
if (is_sviewinfo) {
target = bld->sv[unit].Resource;
return;
}
- if (has_lod)
- explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
- else
+ if (has_lod) {
+ explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
+ lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
+ }
+ else {
explicit_lod = NULL;
+ lod_property = LP_SAMPLER_LOD_SCALAR;
+ }
+
pipe_target = tgsi_to_pipe_tex_target(target);
- /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
- scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
+ params.int_type = bld->bld_base.int_bld.type;
+ params.texture_unit = unit;
+ params.target = pipe_target;
+ params.context_ptr = bld->context_ptr;
+ params.is_sviewinfo = TRUE;
+ params.lod_property = lod_property;
+ params.explicit_lod = explicit_lod;
+ params.sizes_out = sizes_out;
bld->sampler->emit_size_query(bld->sampler,
bld->bld_base.base.gallivm,
- bld->bld_base.int_bld.type,
- unit, pipe_target,
- is_sviewinfo,
- scalar_lod,
- explicit_lod,
- sizes_out);
+ ¶ms);
}
static boolean
near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
- int pc)
+ int pc)
{
- int i;
+ unsigned i;
for (i = 0; i < 5; i++) {
- unsigned opcode;
+ enum tgsi_opcode opcode;
if (pc + i >= bld->bld_base.info->num_instructions)
- return TRUE;
+ return TRUE;
opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
if (opcode == TGSI_OPCODE_END)
- return TRUE;
+ return TRUE;
if (opcode == TGSI_OPCODE_TEX ||
- opcode == TGSI_OPCODE_TXP ||
- opcode == TGSI_OPCODE_TXD ||
- opcode == TGSI_OPCODE_TXB ||
- opcode == TGSI_OPCODE_TXL ||
- opcode == TGSI_OPCODE_TXF ||
- opcode == TGSI_OPCODE_TXQ ||
- opcode == TGSI_OPCODE_CAL ||
- opcode == TGSI_OPCODE_CALLNZ ||
- opcode == TGSI_OPCODE_IF ||
- opcode == TGSI_OPCODE_UIF ||
- opcode == TGSI_OPCODE_BGNLOOP ||
- opcode == TGSI_OPCODE_SWITCH)
- return FALSE;
+ opcode == TGSI_OPCODE_TXP ||
+ opcode == TGSI_OPCODE_TXD ||
+ opcode == TGSI_OPCODE_TXB ||
+ opcode == TGSI_OPCODE_TXL ||
+ opcode == TGSI_OPCODE_TXF ||
+ opcode == TGSI_OPCODE_TXQ ||
+ opcode == TGSI_OPCODE_TEX2 ||
+ opcode == TGSI_OPCODE_TXB2 ||
+ opcode == TGSI_OPCODE_TXL2 ||
+ opcode == TGSI_OPCODE_SAMPLE ||
+ opcode == TGSI_OPCODE_SAMPLE_B ||
+ opcode == TGSI_OPCODE_SAMPLE_C ||
+ opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
+ opcode == TGSI_OPCODE_SAMPLE_D ||
+ opcode == TGSI_OPCODE_SAMPLE_I ||
+ opcode == TGSI_OPCODE_SAMPLE_I_MS ||
+ opcode == TGSI_OPCODE_SAMPLE_L ||
+ opcode == TGSI_OPCODE_SVIEWINFO ||
+ opcode == TGSI_OPCODE_CAL ||
+ opcode == TGSI_OPCODE_IF ||
+ opcode == TGSI_OPCODE_UIF ||
+ opcode == TGSI_OPCODE_BGNLOOP ||
+ opcode == TGSI_OPCODE_SWITCH)
+ return FALSE;
}
return TRUE;
}
}
- if(mask) {
- lp_build_mask_update(bld->mask, mask);
-
- if (!near_end_of_shader(bld, pc))
- lp_build_mask_check(bld->mask);
+ if (bld->exec_mask.has_mask) {
+ LLVMValueRef invmask;
+ invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
+ mask = LLVMBuildOr(builder, mask, invmask, "");
}
+
+ lp_build_mask_update(bld->mask, mask);
+ if (!near_end_of_shader(bld, pc))
+ lp_build_mask_check(bld->mask);
}
* to stdout.
*/
static void
-emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
+emit_dump_file(struct lp_build_tgsi_soa_context *bld,
+ unsigned file)
{
+ const struct tgsi_shader_info *info = bld->bld_base.info;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- LLVMValueRef temp_ptr;
- LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
- LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
- LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
- LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
+ LLVMValueRef reg_ptr;
int index;
- int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
+ int max_index = info->file_max[file];
- for (index = 0; index < n; index++) {
- LLVMValueRef idx = lp_build_const_int32(gallivm, index);
- LLVMValueRef v[4][4], res;
+ /*
+ * Some register files, particularly constants, can be very large,
+ * and dumping everything could make this unusably slow.
+ */
+ max_index = MIN2(max_index, 32);
+
+ for (index = 0; index <= max_index; index++) {
+ LLVMValueRef res;
+ unsigned mask;
int chan;
- lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
+ if (index < 8 * sizeof(unsigned) &&
+ (info->file_mask[file] & (1u << index)) == 0) {
+ /* This was not declared.*/
+ continue;
+ }
- for (chan = 0; chan < 4; chan++) {
- temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
- res = LLVMBuildLoad(builder, temp_ptr, "");
- v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
- v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
- v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
- v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
+ if (file == TGSI_FILE_INPUT) {
+ mask = info->input_usage_mask[index];
+ } else {
+ mask = TGSI_WRITEMASK_XYZW;
}
- lp_build_printf(gallivm, " X: %f %f %f %f\n",
- v[0][0], v[0][1], v[0][2], v[0][3]);
- lp_build_printf(gallivm, " Y: %f %f %f %f\n",
- v[1][0], v[1][1], v[1][2], v[1][3]);
- lp_build_printf(gallivm, " Z: %f %f %f %f\n",
- v[2][0], v[2][1], v[2][2], v[2][3]);
- lp_build_printf(gallivm, " W: %f %f %f %f\n",
- v[3][0], v[3][1], v[3][2], v[3][3]);
+ for (chan = 0; chan < 4; chan++) {
+ if ((mask & (1 << chan)) == 0) {
+ /* This channel is not used.*/
+ continue;
+ }
+
+ if (file == TGSI_FILE_CONSTANT) {
+ struct tgsi_full_src_register reg;
+ memset(®, 0, sizeof reg);
+ reg.Register.File = file;
+ reg.Register.Index = index;
+ reg.Register.SwizzleX = 0;
+ reg.Register.SwizzleY = 1;
+ reg.Register.SwizzleZ = 2;
+ reg.Register.SwizzleW = 3;
+
+ res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
+ if (!res) {
+ continue;
+ }
+ } else if (file == TGSI_FILE_INPUT) {
+ res = bld->inputs[index][chan];
+ if (!res) {
+ continue;
+ }
+ } else if (file == TGSI_FILE_TEMPORARY) {
+ reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
+ assert(reg_ptr);
+ res = LLVMBuildLoad(builder, reg_ptr, "");
+ } else if (file == TGSI_FILE_OUTPUT) {
+ reg_ptr = lp_get_output_ptr(bld, index, chan);
+ assert(reg_ptr);
+ res = LLVMBuildLoad(builder, reg_ptr, "");
+ } else {
+ assert(0);
+ continue;
+ }
+
+ emit_dump_reg(gallivm, file, index, chan, res);
+ }
}
}
const unsigned last = decl->Range.Last;
unsigned idx, i;
- for (idx = first; idx <= last; ++idx) {
- assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
- switch (decl->Declaration.File) {
- case TGSI_FILE_TEMPORARY:
- assert(idx < LP_MAX_TGSI_TEMPS);
- if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
+ assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
+
+ switch (decl->Declaration.File) {
+ case TGSI_FILE_TEMPORARY:
+ if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
+ assert(last < LP_MAX_INLINED_TEMPS);
+ for (idx = first; idx <= last; ++idx) {
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
}
- break;
+ }
+ break;
- case TGSI_FILE_OUTPUT:
- if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
+ case TGSI_FILE_OUTPUT:
+ if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
+ for (idx = first; idx <= last; ++idx) {
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->outputs[idx][i] = lp_build_alloca(gallivm,
vec_type, "output");
}
- break;
+ }
+ break;
- case TGSI_FILE_ADDRESS:
- /* ADDR registers are only allocated with an integer LLVM IR type,
- * as they are guaranteed to always have integers.
- * XXX: Not sure if this exception is worthwhile (or the whole idea of
- * an ADDR register for that matter).
- */
+ case TGSI_FILE_ADDRESS:
+ /* ADDR registers are only allocated with an integer LLVM IR type,
+ * as they are guaranteed to always have integers.
+ * XXX: Not sure if this exception is worthwhile (or the whole idea of
+ * an ADDR register for that matter).
+ */
+ assert(last < LP_MAX_TGSI_ADDRS);
+ for (idx = first; idx <= last; ++idx) {
assert(idx < LP_MAX_TGSI_ADDRS);
for (i = 0; i < TGSI_NUM_CHANNELS; i++)
bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
- break;
-
- case TGSI_FILE_PREDICATE:
- assert(idx < LP_MAX_TGSI_PREDS);
- for (i = 0; i < TGSI_NUM_CHANNELS; i++)
- bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
- "predicate");
- break;
+ }
+ break;
- case TGSI_FILE_SAMPLER_VIEW:
- /*
- * The target stored here MUST match whatever there actually
- * is in the set sampler views (what about return type?).
- */
- assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+ case TGSI_FILE_SAMPLER_VIEW:
+ /*
+ * The target stored here MUST match whatever there actually
+ * is in the set sampler views (what about return type?).
+ */
+ assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
+ for (idx = first; idx <= last; ++idx) {
bld->sv[idx] = decl->SamplerView;
- break;
-
- default:
- /* don't need to declare other vars */
- break;
}
+ break;
+
+ case TGSI_FILE_CONSTANT:
+ {
+ /*
+ * We could trivially fetch the per-buffer pointer when fetching the
+ * constant, relying on llvm to figure out it's always the same pointer
+ * anyway. However, doing so results in a huge (more than factor of 10)
+ * slowdown in llvm compilation times for some (but not all) shaders
+ * (more specifically, the IR optimization spends way more time in
+ * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
+ */
+ unsigned idx2D = decl->Dim.Index2D;
+ LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
+ assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
+ bld->consts[idx2D] =
+ lp_build_array_get(gallivm, bld->consts_ptr, index2D);
+ bld->consts_sizes[idx2D] =
+ lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
+ }
+ break;
+ case TGSI_FILE_BUFFER:
+ {
+ unsigned idx = decl->Range.First;
+ LLVMValueRef index = lp_build_const_int32(gallivm, idx);
+ assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
+ bld->ssbos[idx] =
+ lp_build_array_get(gallivm, bld->ssbo_ptr, index);
+ bld->ssbo_sizes[idx] =
+ lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
+
+ }
+ break;
+ default:
+ /* don't need to declare other vars */
+ break;
}
}
{
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
-
- /* simply copy the immediate values into the next immediates[] slot */
+ LLVMValueRef imms[4];
unsigned i;
const uint size = imm->Immediate.NrTokens - 1;
assert(size <= 4);
- assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
for( i = 0; i < size; ++i )
- bld->immediates[bld->num_immediates][i] =
- lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
+ imms[i] =
+ lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
break;
+ case TGSI_IMM_FLOAT64:
+ case TGSI_IMM_UINT64:
+ case TGSI_IMM_INT64:
case TGSI_IMM_UINT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
- bld->immediates[bld->num_immediates][i] =
- LLVMConstBitCast(tmp, bld_base->base.vec_type);
+ imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
break;
case TGSI_IMM_INT32:
for( i = 0; i < size; ++i ) {
LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
- bld->immediates[bld->num_immediates][i] =
- LLVMConstBitCast(tmp, bld_base->base.vec_type);
+ imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
}
-
+
break;
}
for( i = size; i < 4; ++i )
- bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
+ imms[i] = bld_base->base.undef;
- if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
+ if (bld->use_immediates_array) {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
+
+ assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
for (i = 0; i < 4; ++i ) {
- LLVMValueRef lindex = lp_build_const_int32(
- bld->bld_base.base.gallivm, index * 4 + i);
+ gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
- bld->imms_array, &lindex, 1, "");
- LLVMBuildStore(builder,
- bld->immediates[index][i],
- imm_ptr);
+ bld->imms_array, gep, 2, "");
+ LLVMBuildStore(builder, imms[i], imm_ptr);
+ }
+ } else {
+ /* simply copy the immediate values into the next immediates[] slot */
+ unsigned i;
+ assert(imm->Immediate.NrTokens - 1 <= 4);
+ assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
+
+ for(i = 0; i < 4; ++i )
+ bld->immediates[bld->num_immediates][i] = imms[i];
+
+ if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
+ unsigned index = bld->num_immediates;
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
+ for (i = 0; i < 4; ++i ) {
+ gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
+ LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
+ bld->imms_array, gep, 2, "");
+ LLVMBuildStore(builder,
+ bld->immediates[index][i],
+ imm_ptr);
+ }
}
}
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_fetch_deriv(bld, emit_data->args[0], NULL,
- &emit_data->output[emit_data->chan], NULL);
+ emit_fetch_deriv(bld, emit_data->args[0], NULL,
+ &emit_data->output[emit_data->chan], NULL);
+}
+
+static void
+ddy_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
+ &emit_data->output[emit_data->chan]);
+}
+
+static void
+kill_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_kill(bld, bld_base->pc - 1);
+}
+
+static void
+kill_if_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
+}
+
+static void
+tex_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
+}
+
+static void
+tex2_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
}
static void
-ddy_emit(
+txb_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
- &emit_data->output[emit_data->chan]);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
+ emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
-kill_emit(
+txb2_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_kill(bld, bld_base->pc - 1);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
+ emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
}
static void
-kill_if_emit(
+txd_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
+ emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
}
static void
-tex_emit(
+txl_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
+ emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
-txb_emit(
+txl2_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
- emit_data->output);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
+ emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
}
static void
-txd_emit(
+txp_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
- emit_data->output);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
+ emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
}
static void
-txl_emit(
+tg4_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
- emit_data->output);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ emit_data->output, 2, LP_SAMPLER_OP_GATHER);
}
static void
-txp_emit(
+lodq_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
- emit_data->output);
+ emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ emit_data->output, 1, LP_SAMPLER_OP_LODQ);
}
static void
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
- TRUE, emit_data->output);
+ TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
- TRUE, emit_data->output);
+ TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
}
static void
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
- FALSE, emit_data->output);
+ FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output);
+}
+
+static void
+gather4_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ FALSE, LP_SAMPLER_OP_GATHER, emit_data->output);
}
static void
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
}
-static LLVMValueRef
-mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
+static void
+lod_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
+}
+
+static void
+load_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- LLVMValueRef one_vec = bld_base->int_bld.one;
- struct lp_exec_mask *exec_mask = &bld->exec_mask;
+ const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+ unsigned buf = bufreg->Register.Index;
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+
+ if (0) {
+ /* for indirect support with ARB_gpu_shader5 */
+ } else {
+ LLVMValueRef index;
+ LLVMValueRef scalar, scalar_ptr;
+ unsigned chan_index;
+
+ index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
+ index = lp_build_shr_imm(uint_bld, index, 2);
+
+ scalar_ptr = bld->ssbos[buf];
+
+ LLVMValueRef ssbo_limit;
+
+ ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+ ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+ LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
+ exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+
+ LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond, temp_res;
+
+ loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
+ loop_state.counter, "");
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+
+ lp_build_if(&ifthen, gallivm, cond);
+ scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
+
+ temp_res = LLVMBuildLoad(builder, result, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, result);
+ lp_build_else(&ifthen);
+ temp_res = LLVMBuildLoad(builder, result, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, result);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
+ }
+ }
+}
+
+static void
+store_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
+ unsigned buf = bufreg->Register.Index;
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+
+ if (0) {
+
+ } else {
+ LLVMValueRef index; /* index into the const buffer */
+ LLVMValueRef scalar_ptr;
+ LLVMValueRef value;
+ unsigned chan_index;
+
+ index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
+ index = lp_build_shr_imm(uint_bld, index, 2);
+
+ scalar_ptr = bld->ssbos[buf];
+
+ LLVMValueRef ssbo_limit;
+
+ ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+ ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+ LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+ value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
+ exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
+ loop_state.counter, "");
+ value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
+
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond;
+
+ loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
+ loop_state.counter, "");
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
+
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ }
+ }
+}
+
+static void
+resq_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+
+ unsigned buf = bufreg->Register.Index;
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+
+ LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
+
+ emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
+}
+
+static void
+atomic_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER);
+ unsigned buf = bufreg->Register.Index;
+
+ LLVMAtomicRMWBinOp op;
+ switch (emit_data->inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case TGSI_OPCODE_ATOMXCHG:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ case TGSI_OPCODE_ATOMAND:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case TGSI_OPCODE_ATOMOR:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case TGSI_OPCODE_ATOMXOR:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case TGSI_OPCODE_ATOMUMIN:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case TGSI_OPCODE_ATOMUMAX:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case TGSI_OPCODE_ATOMIMIN:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case TGSI_OPCODE_ATOMIMAX:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ case TGSI_OPCODE_ATOMCAS:
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (0) {
+ } else {
+ LLVMValueRef index; /* index into the const buffer */
+ LLVMValueRef scalar, scalar_ptr;
+ LLVMValueRef value;
+
+ index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
+ value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
+
+ index = lp_build_shr_imm(uint_bld, index, 2);
+ index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
+
+ scalar_ptr = bld->ssbos[buf];
+
+ LLVMValueRef atom_res = lp_build_alloca(gallivm,
+ uint_bld->vec_type, "");
+
+ LLVMValueRef ssbo_limit;
+ ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+ ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
+ exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
+ loop_state.counter, "");
+ value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
+
+ index = LLVMBuildExtractElement(gallivm->builder, index,
+ loop_state.counter, "");
+
+ scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
+ &index, 1, "");
- if (exec_mask->has_mask) {
- one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond, temp_res;
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
+ LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
+ loop_state.counter, "");
+ cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
+ scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
+ cas_src_ptr,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
+ } else {
+ scalar = LLVMBuildAtomicRMW(builder, op,
+ scalar_ptr, value_ptr,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ }
+ temp_res = LLVMBuildLoad(builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, atom_res);
+ lp_build_else(&ifthen);
+ temp_res = LLVMBuildLoad(builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, atom_res);
+ lp_build_endif(&ifthen);
+
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
}
- one_vec = LLVMBuildAnd(builder, one_vec,
- lp_build_mask_value(bld->mask), "");
- return one_vec;
}
static void
LLVMValueRef mask)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
-
- current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
-
+
+ current_vec = LLVMBuildSub(builder, current_vec, mask, "");
+
LLVMBuildStore(builder, current_vec, ptr);
}
LLVMValueRef mask)
{
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
- LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
- PIPE_FUNC_NOTEQUAL,
- mask,
- bld_base->uint_bld.zero);
current_vec = lp_build_select(&bld_base->uint_bld,
- full_mask,
+ mask,
bld_base->uint_bld.zero,
current_vec);
-
+
LLVMBuildStore(builder, current_vec, ptr);
}
LLVMValueRef total_emitted_vertices_vec)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
- LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
+ struct lp_build_context *int_bld = &bld->bld_base.int_bld;
+ LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
total_emitted_vertices_vec,
bld->max_output_vertices_vec);
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
if (bld->gs_iface->emit_vertex) {
- LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+ LLVMValueRef mask = mask_vec(bld_base);
LLVMValueRef total_emitted_vertices_vec =
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
- masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
- total_emitted_vertices_vec);
+ mask = clamp_mask_to_max_output_vertices(bld, mask,
+ total_emitted_vertices_vec);
gather_outputs(bld);
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
bld->outputs,
total_emitted_vertices_vec);
increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
- masked_ones);
+ mask);
increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
- masked_ones);
+ mask);
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ emit vertex masked ones = ",
- masked_ones);
+ mask);
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ emit vertex emitted = ",
total_emitted_vertices_vec);
static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,
- LLVMValueRef masked_ones)
+ LLVMValueRef mask)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
if (bld->gs_iface->end_primitive) {
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
LLVMValueRef emitted_vertices_vec =
LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
LLVMValueRef emitted_prims_vec =
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
-
+
+ LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
+ emitted_vertices_vec,
+ uint_bld->zero);
+ /* We need to combine the current execution mask with the mask
+ telling us which, if any, execution slots actually have
+ unemitted primitives, this way we make sure that end_primitives
+ executes only on the paths that have unflushed vertices */
+ mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
+
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
emitted_vertices_vec,
emitted_prims_vec);
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim masked ones = ",
- masked_ones);
+ mask);
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim emitted verts1 = ",
emitted_vertices_vec);
bld->emitted_prims_vec_ptr, ""));
#endif
increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
- masked_ones);
+ mask);
clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
- masked_ones);
+ mask);
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
" +++ end prim emitted verts2 = ",
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
if (bld->gs_iface->end_primitive) {
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMValueRef emitted_verts = LLVMBuildLoad(
- builder, bld->emitted_vertices_vec_ptr, "");
- LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
- emitted_verts,
- uint_bld->zero);
- /* We need to combine the current execution mask with the mask
- telling us which, if any, execution slots actually have
- unemitted primitives, this way we make sure that end_primitives
- executes only on the paths that have unflushed vertices */
- masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
-
- end_primitive_masked(bld_base, masked_ones);
+ LLVMValueRef mask = mask_vec(bld_base);
+ end_primitive_masked(bld_base, mask);
}
}
lp_exec_break(&bld->exec_mask, bld_base);
}
-static void
-breakc_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMValueRef unsigned_cond =
- LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
- LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
- unsigned_cond,
- uint_bld->zero);
-
- lp_exec_break_condition(&bld->exec_mask, cond);
-}
-
static void
if_emit(
const struct lp_build_tgsi_action * action,
lp_exec_continue(&bld->exec_mask);
}
-/* XXX: Refactor and move it to lp_bld_tgsi_action.c
- *
- * XXX: What do the comments about xmm registers mean? Maybe they are left over
- * from old code, but there is no garauntee that LLVM will use those registers
- * for this code.
- *
- * XXX: There should be no calls to lp_build_emit_fetch in this function. This
- * should be handled by the emit_data->fetch_args function. */
-static void
-nrm_emit(
- const struct lp_build_tgsi_action * action,
- struct lp_build_tgsi_context * bld_base,
- struct lp_build_emit_data * emit_data)
-{
- LLVMValueRef tmp0, tmp1;
- LLVMValueRef tmp4 = NULL;
- LLVMValueRef tmp5 = NULL;
- LLVMValueRef tmp6 = NULL;
- LLVMValueRef tmp7 = NULL;
- struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
-
- uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
-
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
- TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
- TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
- (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
-
- /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
-
- /* xmm4 = src.x */
- /* xmm0 = src.x * src.x */
- tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
- tmp4 = tmp0;
- }
- tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
-
- /* xmm5 = src.y */
- /* xmm0 = xmm0 + src.y * src.y */
- tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
- tmp5 = tmp1;
- }
- tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
- tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
-
- /* xmm6 = src.z */
- /* xmm0 = xmm0 + src.z * src.z */
- tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
- tmp6 = tmp1;
- }
- tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
- tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
-
- if (dims == 4) {
- /* xmm7 = src.w */
- /* xmm0 = xmm0 + src.w * src.w */
- tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
- tmp7 = tmp1;
- }
- tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
- tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
- }
- /* xmm1 = 1 / sqrt(xmm0) */
- tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
- /* dst.x = xmm1 * src.x */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
- emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
- }
- /* dst.y = xmm1 * src.y */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
- emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
- }
-
- /* dst.z = xmm1 * src.z */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
- emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
- }
- /* dst.w = xmm1 * src.w */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
- emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
- }
- }
-
- /* dst.w = 1.0 */
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
- emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
- }
-}
-
static void emit_prologue(struct lp_build_tgsi_context * bld_base)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state * gallivm = bld_base->base.gallivm;
if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm,
- bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
- bld->temps_array = lp_build_array_alloca(gallivm,
- bld_base->base.vec_type, array_size,
- "temp_array");
+ unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
+ bld->temps_array = lp_build_alloca_undef(gallivm,
+ LLVMArrayType(bld_base->base.vec_type, array_size),
+ "temp_array");
}
if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
}
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm,
- bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
- bld->imms_array = lp_build_array_alloca(gallivm,
- bld_base->base.vec_type, array_size,
+ unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
+ bld->imms_array = lp_build_alloca_undef(gallivm,
+ LLVMArrayType(bld_base->base.vec_type, array_size),
"imms_array");
}
LLVMBuildStore(gallivm->builder, uint_bld->zero,
bld->total_emitted_vertices_vec_ptr);
}
+
+ if (DEBUG_EXECUTION) {
+ lp_build_printf(gallivm, "\n");
+ emit_dump_file(bld, TGSI_FILE_CONSTANT);
+ if (!bld->gs_iface)
+ emit_dump_file(bld, TGSI_FILE_INPUT);
+ }
}
static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- if (0) {
+ if (DEBUG_EXECUTION) {
/* for debugging */
- emit_dump_temps(bld);
+ if (0) {
+ emit_dump_file(bld, TGSI_FILE_TEMPORARY);
+ }
+ emit_dump_file(bld, TGSI_FILE_OUTPUT);
+ lp_build_printf(bld_base->base.gallivm, "\n");
}
/* If we have indirect addressing in outputs we need to copy our alloca array
LLVMValueRef total_emitted_vertices_vec;
LLVMValueRef emitted_prims_vec;
/* implicit end_primitives, needed in case there are any unflushed
- vertices in the cache */
- end_primitive(NULL, bld_base, NULL);
+ vertices in the cache. Note must not call end_primitive here
+ since the exec_mask is not valid at this point. */
+ end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
total_emitted_vertices_vec =
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
void
lp_build_tgsi_soa(struct gallivm_state *gallivm,
const struct tgsi_token *tokens,
- struct lp_type type,
- struct lp_build_mask_context *mask,
- LLVMValueRef consts_ptr,
- const struct lp_bld_tgsi_system_values *system_values,
- const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
- LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
- struct lp_build_sampler_soa *sampler,
- const struct tgsi_shader_info *info,
- const struct lp_build_tgsi_gs_iface *gs_iface)
+ const struct lp_build_tgsi_params *params,
+ LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
{
struct lp_build_tgsi_soa_context bld;
-
+ struct lp_type type = params->type;
struct lp_type res_type;
assert(type.length <= LP_MAX_VECTOR_LENGTH);
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
- bld.mask = mask;
- bld.inputs = inputs;
+ {
+ struct lp_type dbl_type;
+ dbl_type = type;
+ dbl_type.width *= 2;
+ lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
+ }
+ {
+ struct lp_type uint64_type;
+ uint64_type = lp_uint_type(type);
+ uint64_type.width *= 2;
+ lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
+ }
+ {
+ struct lp_type int64_type;
+ int64_type = lp_int_type(type);
+ int64_type.width *= 2;
+ lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
+ }
+ bld.mask = params->mask;
+ bld.inputs = params->inputs;
bld.outputs = outputs;
- bld.consts_ptr = consts_ptr;
- bld.sampler = sampler;
- bld.bld_base.info = info;
- bld.indirect_files = info->indirect_files;
+ bld.consts_ptr = params->consts_ptr;
+ bld.const_sizes_ptr = params->const_sizes_ptr;
+ bld.ssbo_ptr = params->ssbo_ptr;
+ bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
+ bld.sampler = params->sampler;
+ bld.bld_base.info = params->info;
+ bld.indirect_files = params->info->indirect_files;
+ bld.context_ptr = params->context_ptr;
+ bld.thread_data_ptr = params->thread_data_ptr;
+
+ /*
+ * If the number of temporaries is rather large then we just
+ * allocate them as an array right from the start and treat
+ * like indirect temporaries.
+ */
+ if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
+ bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
+ }
+ /*
+ * For performance reason immediates are always backed in a static
+ * array, but if their number is too great, we have to use just
+ * a dynamically allocated array.
+ */
+ bld.use_immediates_array =
+ (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
+ if (bld.use_immediates_array) {
+ bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
+ }
+
bld.bld_base.soa = TRUE;
+ bld.bld_base.emit_debug = emit_debug;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
- bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
- bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
- bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
/* DX10 sampling ops */
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
-
- if (gs_iface) {
+ bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
+
+ bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
+
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
+
+ if (params->gs_iface) {
/* There's no specific value for this because it should always
* be set, but apps using ext_geometry_shader4 quite often
* were forgetting so we're using MAX_VERTEX_VARYING from
* that spec even though we could debug_assert if it's not
* set, but that's a lot uglier. */
- uint max_output_vertices = 32;
- uint i = 0;
+ uint max_output_vertices;
+
/* inputs are always indirect with gs */
bld.indirect_files |= (1 << TGSI_FILE_INPUT);
- bld.gs_iface = gs_iface;
+ bld.gs_iface = params->gs_iface;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
- for (i = 0; i < info->num_properties; ++i) {
- if (info->properties[i].name ==
- TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
- max_output_vertices = info->properties[i].data[0];
- }
- }
+ max_output_vertices =
+ params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+ if (!max_output_vertices)
+ max_output_vertices = 32;
+
bld.max_output_vertices_vec =
- lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
+ lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
max_output_vertices);
}
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
- bld.system_values = *system_values;
+ bld.system_values = *params->system_values;
lp_build_tgsi_llvm(&bld.bld_base, tokens);
LLVMDumpModule(module);
}
+ lp_exec_mask_fini(&bld.exec_mask);
}