#include "util/u_debug.h"
#include "util/u_math.h"
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "tgsi/tgsi_dump.h"
#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_info.h"
#include "lp_bld_gather.h"
#include "lp_bld_init.h"
#include "lp_bld_logic.h"
+#include "lp_bld_misc.h"
#include "lp_bld_swizzle.h"
#include "lp_bld_flow.h"
+#include "lp_bld_coro.h"
#include "lp_bld_quad.h"
#include "lp_bld_tgsi.h"
#include "lp_bld_limits.h"
#include "lp_bld_sample.h"
#include "lp_bld_struct.h"
-/* SM 4.0 says that subroutines can nest 32 deep and
- * we need one more for our main function */
-#define LP_MAX_NUM_FUNCS 33
-
#define DUMP_GS_EMITS 0
/*
{
char buf[32];
- util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
- tgsi_file_name(file),
- index, "xyzw"[chan]);
+ snprintf(buf, sizeof buf, " %s[%u].%c = ",
+ tgsi_file_name(file),
+ index, "xyzw"[chan]);
lp_build_print_value(gallivm, buf, value);
}
-/*
- * Return the context for the current function.
- * (always 'main', if shader doesn't do any function calls)
- */
static inline struct function_ctx *
func_ctx(struct lp_exec_mask *mask)
{
}
/*
- * Returns true if we're in a loop.
- * It's global, meaning that it returns true even if there's
- * no loop inside the current function, but we were inside
- * a loop inside another function, from which this one was called.
+ * combine the execution mask if there is one with the current mask.
*/
-static inline boolean
-mask_has_loop(struct lp_exec_mask *mask)
-{
- int i;
- for (i = mask->function_stack_size - 1; i >= 0; --i) {
- const struct function_ctx *ctx = &mask->function_stack[i];
- if (ctx->loop_stack_size > 0)
- return TRUE;
- }
- return FALSE;
-}
-
-/*
- * Returns true if we're inside a switch statement.
- * It's global, meaning that it returns true even if there's
- * no switch in the current function, but we were inside
- * a switch inside another function, from which this one was called.
- */
-static inline boolean
-mask_has_switch(struct lp_exec_mask *mask)
-{
- int i;
- for (i = mask->function_stack_size - 1; i >= 0; --i) {
- const struct function_ctx *ctx = &mask->function_stack[i];
- if (ctx->switch_stack_size > 0)
- return TRUE;
- }
- return FALSE;
-}
-
-/*
- * Returns true if we're inside a conditional.
- * It's global, meaning that it returns true even if there's
- * no conditional in the current function, but we were inside
- * a conditional inside another function, from which this one was called.
- */
-static inline boolean
-mask_has_cond(struct lp_exec_mask *mask)
-{
- int i;
- for (i = mask->function_stack_size - 1; i >= 0; --i) {
- const struct function_ctx *ctx = &mask->function_stack[i];
- if (ctx->cond_stack_size > 0)
- return TRUE;
- }
- return FALSE;
-}
-
-
-/*
- * Initialize a function context at the specified index.
- */
-static void
-lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
-{
- LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = &mask->function_stack[function_idx];
-
- ctx->cond_stack_size = 0;
- ctx->loop_stack_size = 0;
- ctx->switch_stack_size = 0;
-
- if (function_idx == 0) {
- ctx->ret_mask = mask->ret_mask;
- }
-
- ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
- int_type, "looplimiter");
- LLVMBuildStore(
- builder,
- LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
- ctx->loop_limiter);
-}
-
-static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
-{
- mask->bld = bld;
- mask->has_mask = FALSE;
- mask->ret_in_main = FALSE;
- /* For the main function */
- mask->function_stack_size = 1;
-
- mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
- mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
- mask->cond_mask = mask->switch_mask =
- LLVMConstAllOnes(mask->int_vec_type);
-
- mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
- sizeof(mask->function_stack[0]));
- lp_exec_mask_function_init(mask, 0);
-}
-
-static void
-lp_exec_mask_fini(struct lp_exec_mask *mask)
-{
- FREE(mask->function_stack);
-}
-
-static void lp_exec_mask_update(struct lp_exec_mask *mask)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- boolean has_loop_mask = mask_has_loop(mask);
- boolean has_cond_mask = mask_has_cond(mask);
- boolean has_switch_mask = mask_has_switch(mask);
- boolean has_ret_mask = mask->function_stack_size > 1 ||
- mask->ret_in_main;
-
- if (has_loop_mask) {
- /*for loops we need to update the entire mask at runtime */
- LLVMValueRef tmp;
- assert(mask->break_mask);
- tmp = LLVMBuildAnd(builder,
- mask->cont_mask,
- mask->break_mask,
- "maskcb");
- mask->exec_mask = LLVMBuildAnd(builder,
- mask->cond_mask,
- tmp,
- "maskfull");
- } else
- mask->exec_mask = mask->cond_mask;
-
- if (has_switch_mask) {
- mask->exec_mask = LLVMBuildAnd(builder,
- mask->exec_mask,
- mask->switch_mask,
- "switchmask");
- }
-
- if (has_ret_mask) {
- mask->exec_mask = LLVMBuildAnd(builder,
- mask->exec_mask,
- mask->ret_mask,
- "callmask");
- }
-
- mask->has_mask = (has_cond_mask ||
- has_loop_mask ||
- has_switch_mask ||
- has_ret_mask);
-}
-
-static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
- LLVMValueRef val)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = func_ctx(mask);
-
- if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
- ctx->cond_stack_size++;
- return;
- }
- if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
- assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
- }
- ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
- assert(LLVMTypeOf(val) == mask->int_vec_type);
- mask->cond_mask = LLVMBuildAnd(builder,
- mask->cond_mask,
- val,
- "");
- lp_exec_mask_update(mask);
-}
-
-static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = func_ctx(mask);
- LLVMValueRef prev_mask;
- LLVMValueRef inv_mask;
-
- assert(ctx->cond_stack_size);
- if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
- return;
- prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
- if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
- assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
- }
-
- inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
-
- mask->cond_mask = LLVMBuildAnd(builder,
- inv_mask,
- prev_mask, "");
- lp_exec_mask_update(mask);
-}
-
-static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
-{
- struct function_ctx *ctx = func_ctx(mask);
- assert(ctx->cond_stack_size);
- --ctx->cond_stack_size;
- if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
- return;
- mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
- lp_exec_mask_update(mask);
-}
-
-static void lp_exec_bgnloop(struct lp_exec_mask *mask)
+static LLVMValueRef
+mask_vec(struct lp_build_tgsi_context *bld_base)
{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = func_ctx(mask);
-
- if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
- ++ctx->loop_stack_size;
- return;
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ struct lp_exec_mask *exec_mask = &bld->exec_mask;
+ LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
+ if (!exec_mask->has_mask) {
+ return bld_mask;
}
-
- ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
- ctx->break_type;
- ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
-
- ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
- ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
- ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
- ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
- ++ctx->loop_stack_size;
-
- ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
- LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
-
- ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
-
- LLVMBuildBr(builder, ctx->loop_block);
- LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
-
- mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
-
- lp_exec_mask_update(mask);
+ if (!bld_mask)
+ return exec_mask->exec_mask;
+ return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
+ exec_mask->exec_mask, "");
}
-static void lp_exec_break(struct lp_exec_mask *mask,
+static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
struct lp_build_tgsi_context * bld_base)
{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = func_ctx(mask);
-
- if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
- LLVMValueRef exec_mask = LLVMBuildNot(builder,
- mask->exec_mask,
- "break");
-
- mask->break_mask = LLVMBuildAnd(builder,
- mask->break_mask,
- exec_mask, "break_full");
- }
- else {
- unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
- boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
- opcode == TGSI_OPCODE_CASE);
-
-
- if (ctx->switch_in_default) {
- /*
- * stop default execution but only if this is an unconditional switch.
- * (The condition here is not perfect since dead code after break is
- * allowed but should be sufficient since false negatives are just
- * unoptimized - so we don't have to pre-evaluate that).
- */
- if(break_always && ctx->switch_pc) {
- bld_base->pc = ctx->switch_pc;
- return;
- }
- }
-
- if (break_always) {
- mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
- }
- else {
- LLVMValueRef exec_mask = LLVMBuildNot(builder,
- mask->exec_mask,
- "break");
- mask->switch_mask = LLVMBuildAnd(builder,
- mask->switch_mask,
- exec_mask, "break_switch");
- }
- }
-
- lp_exec_mask_update(mask);
-}
-
-static void lp_exec_continue(struct lp_exec_mask *mask)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- LLVMValueRef exec_mask = LLVMBuildNot(builder,
- mask->exec_mask,
- "");
-
- mask->cont_mask = LLVMBuildAnd(builder,
- mask->cont_mask,
- exec_mask, "");
-
- lp_exec_mask_update(mask);
-}
-
-
-static void lp_exec_endloop(struct gallivm_state *gallivm,
- struct lp_exec_mask *mask)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- struct function_ctx *ctx = func_ctx(mask);
- LLVMBasicBlockRef endloop;
- LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
- LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
- mask->bld->type.width *
- mask->bld->type.length);
- LLVMValueRef i1cond, i2cond, icond, limiter;
-
- assert(mask->break_mask);
-
-
- assert(ctx->loop_stack_size);
- if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
- --ctx->loop_stack_size;
- return;
- }
-
- /*
- * Restore the cont_mask, but don't pop
- */
- mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
- lp_exec_mask_update(mask);
-
- /*
- * Unlike the continue mask, the break_mask must be preserved across loop
- * iterations
- */
- LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
-
- /* Decrement the loop limiter */
- limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
-
- limiter = LLVMBuildSub(
- builder,
- limiter,
- LLVMConstInt(int_type, 1, false),
- "");
-
- LLVMBuildStore(builder, limiter, ctx->loop_limiter);
-
- /* i1cond = (mask != 0) */
- i1cond = LLVMBuildICmp(
- builder,
- LLVMIntNE,
- LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
- LLVMConstNull(reg_type), "i1cond");
-
- /* i2cond = (looplimiter > 0) */
- i2cond = LLVMBuildICmp(
- builder,
- LLVMIntSGT,
- limiter,
- LLVMConstNull(int_type), "i2cond");
-
- /* if( i1cond && i2cond ) */
- icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
-
- endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
-
- LLVMBuildCondBr(builder,
- icond, ctx->loop_block, endloop);
-
- LLVMPositionBuilderAtEnd(builder, endloop);
-
- assert(ctx->loop_stack_size);
- --ctx->loop_stack_size;
- mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
- mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
- ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
- ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
- ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
- ctx->switch_stack_size];
-
- lp_exec_mask_update(mask);
+ enum tgsi_opcode opcode =
+ bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
+ bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
+ opcode == TGSI_OPCODE_CASE);
+ lp_exec_break(mask, &bld_base->pc, break_always);
}
static void lp_exec_switch(struct lp_exec_mask *mask,
}
while (pc != ~0u && pc < bld_base->num_instructions) {
- unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
+ enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
switch (opcode) {
case TGSI_OPCODE_CASE:
if (curr_switch_stack == ctx->switch_stack_size) {
}
curr_switch_stack--;
break;
+ default:
+ ; /* nothing */
}
pc++;
}
* which just gets rid of all case statements appearing together with
* default (or could do switch analysis at switch start time instead).
*/
- unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
+ enum tgsi_opcode opcode =
+ bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
opcode != TGSI_OPCODE_SWITCH);
/*
}
-/* stores val into an address pointed to by dst_ptr.
- * mask->exec_mask is used to figure out which bits of val
- * should be stored into the address
- * (0 means don't store this bit, 1 means do store).
- */
-static void lp_exec_mask_store(struct lp_exec_mask *mask,
- struct lp_build_context *bld_store,
- LLVMValueRef val,
- LLVMValueRef dst_ptr)
-{
- LLVMBuilderRef builder = mask->bld->gallivm->builder;
- LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL;
-
- assert(lp_check_value(bld_store->type, val));
- assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
- assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
-
- if (exec_mask) {
- LLVMValueRef res, dst;
-
- dst = LLVMBuildLoad(builder, dst_ptr, "");
- res = lp_build_select(bld_store, exec_mask, val, dst);
- LLVMBuildStore(builder, res, dst_ptr);
- } else
- LLVMBuildStore(builder, val, dst_ptr);
-}
-
static void lp_exec_mask_call(struct lp_exec_mask *mask,
int func,
int *pc)
if (bld->indirect_files & (1 << file)) {
LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
- return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
+ if (LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
+ gep[1] = lindex;
+ return LLVMBuildGEP(builder, var_of_array, gep, 2, "");
+ } else {
+ return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
+ }
}
else {
assert(index <= bld->bld_base.info->file_max[file]);
static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context *bld,
unsigned reg_file, unsigned reg_index,
- const struct tgsi_ind_register *indirect_reg)
+ const struct tgsi_ind_register *indirect_reg,
+ int index_limit)
{
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
* larger than the declared size but smaller than the buffer size.
*/
if (reg_file != TGSI_FILE_CONSTANT) {
+ assert(index_limit >= 0);
max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
- uint_bld->type,
- bld->bld_base.info->file_max[reg_file]);
+ uint_bld->type, index_limit);
assert(!uint_bld->type.sign);
index = lp_build_min(uint_bld, index, max_index);
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef consts_ptr;
LLVMValueRef num_consts;
LLVMValueRef res;
+ unsigned swizzle = swizzle_in & 0xffff;
/* XXX: Handle fetching xyzw components as a vector */
assert(swizzle != ~0u);
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
/* All fetches are from the same constant buffer, so
* we need to propagate the size to a vector to do a
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef swizzle_vec2;
- swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
+ swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
}
scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
&index, 1, "");
- if (stype == TGSI_TYPE_DOUBLE) {
- LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
- scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
- bld_broad = &bld_base->dbl_bld;
- } else if (stype == TGSI_TYPE_UNSIGNED64) {
- LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
- scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
- bld_broad = &bld_base->uint64_bld;
- } else if (stype == TGSI_TYPE_SIGNED64) {
- LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
- scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
- bld_broad = &bld_base->int64_bld;
+
+ if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
+
+ LLVMValueRef scalar2, scalar2_ptr;
+ LLVMValueRef shuffles[2];
+ index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
+
+ scalar2_ptr = LLVMBuildGEP(builder, consts_ptr,
+ &index, 1, "");
+
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ scalar2 = LLVMBuildLoad(builder, scalar2_ptr, "");
+ shuffles[0] = lp_build_const_int32(gallivm, 0);
+ shuffles[1] = lp_build_const_int32(gallivm, 1);
+
+ res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+ res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
+ res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
+ } else {
+ if (stype == TGSI_TYPE_DOUBLE) {
+ LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
+ bld_broad = &bld_base->dbl_bld;
+ } else if (stype == TGSI_TYPE_UNSIGNED64) {
+ LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
+ bld_broad = &bld_base->uint64_bld;
+ } else if (stype == TGSI_TYPE_SIGNED64) {
+ LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
+ scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
+ bld_broad = &bld_base->int64_bld;
+ }
+ scalar = LLVMBuildLoad(builder, scalar_ptr, "");
+ res = lp_build_broadcast_scalar(bld_broad, scalar);
}
- scalar = LLVMBuildLoad(builder, scalar_ptr, "");
- res = lp_build_broadcast_scalar(bld_broad, scalar);
+
}
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
/**
* Fetch 64-bit values from two separate channels.
* 64-bit values are stored split across two channels, like xy and zw.
- * This function creates a set of 16 floats,
+ * This function creates a set of vec_length*2 floats,
* extracts the values from the two channels,
- * puts them in the correct place, then casts to 8 64-bits.
+ * puts them in the correct place, then casts to vec_length 64-bits.
*/
static LLVMValueRef
emit_fetch_64bit(
LLVMValueRef res;
struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
int i;
- LLVMValueRef shuffles[16];
+ LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
int len = bld_base->base.type.length * 2;
- assert(len <= 16);
+ assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res = NULL;
+ unsigned swizzle = swizzle_in & 0xffff;
if (bld->use_immediates_array || reg->Register.Indirect) {
LLVMValueRef imms_array;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
/*
* Unlike for other reg classes, adding pixel offsets is unnecessary -
* immediates are stored as full vectors (FIXME??? - might be better
if (tgsi_type_is_64bit(stype))
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
- swizzle + 1,
+ swizzle_in >> 16,
FALSE);
/* Gather values from the immediate register array */
res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
} else {
- LLVMValueRef lindex = lp_build_const_int32(gallivm,
- reg->Register.Index * 4 + swizzle);
- LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
- bld->imms_array, &lindex, 1, "");
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
+ gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
+ LLVMValueRef imms_ptr = LLVMBuildGEP(builder,
+ bld->imms_array, gep, 2, "");
res = LLVMBuildLoad(builder, imms_ptr, "");
if (tgsi_type_is_64bit(stype)) {
- LLVMValueRef lindex1;
LLVMValueRef imms_ptr2;
LLVMValueRef res2;
-
- lindex1 = lp_build_const_int32(gallivm,
- reg->Register.Index * 4 + swizzle + 1);
+ gep[1] = lp_build_const_int32(gallivm,
+ reg->Register.Index * 4 + (swizzle_in >> 16));
imms_ptr2 = LLVMBuildGEP(builder,
- bld->imms_array, &lindex1, 1, "");
+ bld->imms_array, gep, 2, "");
res2 = LLVMBuildLoad(builder, imms_ptr2, "");
res = emit_fetch_64bit(bld_base, stype, res, res2);
}
else {
res = bld->immediates[reg->Register.Index][swizzle];
if (tgsi_type_is_64bit(stype))
- res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
+ res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
}
if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
+ unsigned swizzle = swizzle_in & 0xffff;
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
if (tgsi_type_is_64bit(stype)) {
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
- swizzle + 1,
+ swizzle_in >> 16,
TRUE);
}
/* cast inputs_array pointer to float* */
LLVMValueRef res2;
lindex1 = lp_build_const_int32(gallivm,
- reg->Register.Index * 4 + swizzle + 1);
+ reg->Register.Index * 4 + (swizzle_in >> 16));
input_ptr2 = LLVMBuildGEP(builder,
bld->inputs_array, &lindex1, 1, "");
res2 = LLVMBuildLoad(builder, input_ptr2, "");
else {
res = bld->inputs[reg->Register.Index][swizzle];
if (tgsi_type_is_64bit(stype))
- res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
+ res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
}
}
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef attrib_index = NULL;
LLVMValueRef vertex_index = NULL;
+ unsigned swizzle = swizzle_in & 0xffff;
LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
LLVMValueRef res;
}
if (reg->Register.Indirect) {
+ /*
+ * XXX: this is possibly not quite the right value, since file_max may be
+ * larger than the max attrib index, due to it being the max of declared
+ * inputs AND the max vertices per prim (which is 6 for tri adj).
+ * It should however be safe to use (since we always allocate
+ * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
+ */
+ int index_limit = info->file_max[reg->Register.File];
attrib_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ index_limit);
} else {
attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
}
if (reg->Dimension.Indirect) {
+ /*
+ * A fixed 6 should do as well (which is what we allocate).
+ */
+ int index_limit = u_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
vertex_index = get_indirect_index(bld,
reg->Register.File,
reg->Dimension.Index,
- ®->DimIndirect);
+ ®->DimIndirect,
+ index_limit);
} else {
vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
}
- res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
+ res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
reg->Dimension.Indirect,
vertex_index,
reg->Register.Indirect,
assert(res);
if (tgsi_type_is_64bit(stype)) {
- LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1);
+ LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
LLVMValueRef res2;
- res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
+ res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
reg->Dimension.Indirect,
vertex_index,
reg->Register.Indirect,
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
+ unsigned swizzle = swizzle_in & 0xffff;
if (reg->Register.Indirect) {
LLVMValueRef indirect_index;
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
index_vec = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
if (tgsi_type_is_64bit(stype)) {
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
indirect_index,
- swizzle + 1,
+ swizzle_in >> 16,
TRUE);
}
if (tgsi_type_is_64bit(stype)) {
LLVMValueRef temp_ptr2, res2;
- temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
+ temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
res2 = LLVMBuildLoad(builder, temp_ptr2, "");
res = emit_fetch_64bit(bld_base, stype, res, res2);
}
struct lp_build_tgsi_context * bld_base,
const struct tgsi_full_src_register * reg,
enum tgsi_opcode_type stype,
- unsigned swizzle)
+ unsigned swizzle_in)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef res;
enum tgsi_opcode_type atype; // Actual type of the value
+ unsigned swizzle = swizzle_in & 0xffff;
assert(!reg->Register.Indirect);
atype = TGSI_TYPE_UNSIGNED;
break;
+ case TGSI_SEMANTIC_BASEINSTANCE:
+ res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
case TGSI_SEMANTIC_PRIMID:
res = bld->system_values.prim_id;
atype = TGSI_TYPE_UNSIGNED;
atype = TGSI_TYPE_UNSIGNED;
break;
+ case TGSI_SEMANTIC_HELPER_INVOCATION:
+ res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
+ case TGSI_SEMANTIC_THREAD_ID:
+ res = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, swizzle, "");
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
+ case TGSI_SEMANTIC_BLOCK_ID:
+ res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.block_id, lp_build_const_int32(gallivm, swizzle));
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
+ case TGSI_SEMANTIC_GRID_SIZE:
+ res = lp_build_extract_broadcast(gallivm, lp_type_int_vec(32, 96), bld_base->uint_bld.type, bld->system_values.grid_size, lp_build_const_int32(gallivm, swizzle));
+ atype = TGSI_TYPE_UNSIGNED;
+ break;
+
+ case TGSI_SEMANTIC_FACE:
+ res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
+ break;
+
default:
assert(!"unexpected semantic in emit_fetch_system_value");
res = bld_base->base.zero;
}
/**
- * store an array of 8 64-bit into two arrays of 8 floats
+ * store an array of vec-length 64-bit into two arrays of vec_length floats
* i.e.
* value is d0, d1, d2, d3 etc.
* each 64-bit has high and low pieces x, y
struct lp_build_context *float_bld = &bld_base->base;
unsigned i;
LLVMValueRef temp, temp2;
- LLVMValueRef shuffles[8];
- LLVMValueRef shuffles2[8];
+ LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
for (i = 0; i < bld_base->base.type.length; i++) {
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
indirect_index = get_indirect_index(bld,
reg->Register.File,
reg->Register.Index,
- ®->Indirect);
+ ®->Indirect,
+ bld->bld_base.info->file_max[reg->Register.File]);
} else {
assert(reg->Register.Index <=
bld_base->info->file_max[reg->Register.File]);
* constant coords maybe).
* There's at least hope for sample opcodes as well as size queries.
*/
- if (reg->Register.File == TGSI_FILE_CONSTANT ||
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
+ reg->Register.File == TGSI_FILE_CONSTANT ||
reg->Register.File == TGSI_FILE_IMMEDIATE) {
lod_property = LP_SAMPLER_LOD_SCALAR;
}
else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
- if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
else {
/* Note lod and especially projected are illegal in a LOT of cases */
if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
- if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
- inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
+ if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
+ lod = bld->bld_base.base.zero;
+ } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+ inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
/* note that shadow cube array with bias/explicit lod does not exist */
lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
}
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
}
+ if (sampler_op == LP_SAMPLER_OP_GATHER) {
+ uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
+ sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
+ }
if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
oow = lp_build_rcp(&bld->bld_base.base, oow);
* cases exist in practice.
*/
if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
- if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
else {
* cases exist in practice.
*/
if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
- if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
+ if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
}
else {
/* always have lod except for buffers and msaa targets ? */
if (target != TGSI_TEXTURE_BUFFER &&
target != TGSI_TEXTURE_2D_MSAA &&
- target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
+ inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
unsigned i;
for (i = 0; i < 5; i++) {
- unsigned opcode;
+ enum tgsi_opcode opcode;
if (pc + i >= bld->bld_base.info->num_instructions)
return TRUE;
bld->consts_sizes[idx2D] =
lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
}
- break;
+ break;
+ case TGSI_FILE_BUFFER:
+ {
+ unsigned idx = decl->Range.First;
+ LLVMValueRef index = lp_build_const_int32(gallivm, idx);
+ assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
+ bld->ssbos[idx] =
+ lp_build_array_get(gallivm, bld->ssbo_ptr, index);
+ bld->ssbo_sizes[idx] =
+ lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, index);
+ }
+ break;
+ case TGSI_FILE_MEMORY:
+ break;
default:
/* don't need to declare other vars */
break;
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
for (i = 0; i < 4; ++i ) {
- LLVMValueRef lindex = lp_build_const_int32(
- bld->bld_base.base.gallivm, index * 4 + i);
+ gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
- bld->imms_array, &lindex, 1, "");
+ bld->imms_array, gep, 2, "");
LLVMBuildStore(builder, imms[i], imm_ptr);
}
} else {
unsigned index = bld->num_immediates;
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef gep[2];
+ gep[0] = lp_build_const_int32(gallivm, 0);
for (i = 0; i < 4; ++i ) {
- LLVMValueRef lindex = lp_build_const_int32(
- bld->bld_base.base.gallivm, index * 4 + i);
+ gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
- bld->imms_array, &lindex, 1, "");
+ bld->imms_array, gep, 2, "");
LLVMBuildStore(builder,
bld->immediates[index][i],
imm_ptr);
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
}
-static LLVMValueRef
-mask_vec(struct lp_build_tgsi_context *bld_base)
+static void
+lod_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+
+ emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
+ FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
+}
+
+static void target_to_dims_layer(unsigned target,
+ unsigned *dims,
+ unsigned *layer_coord)
+{
+ *layer_coord = 0;
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_BUFFER:
+ *dims = 1;
+ break;
+ case TGSI_TEXTURE_1D_ARRAY:
+ *layer_coord = 1;
+ *dims = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ *dims = 2;
+ break;
+ case TGSI_TEXTURE_2D_ARRAY:
+ *layer_coord = 2;
+ *dims = 2;
+ break;
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ *dims = 3;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+}
+
+static void
+img_load_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct lp_img_params params;
+ LLVMValueRef coords[5];
+ LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+ unsigned dims;
+ unsigned target = emit_data->inst->Memory.Texture;
+ unsigned layer_coord;
+
+ target_to_dims_layer(target, &dims, &layer_coord);
+
+ for (unsigned i = 0; i < dims; i++) {
+ coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
+ }
+ for (unsigned i = dims; i < 5; i++) {
+ coords[i] = coord_undef;
+ }
+ if (layer_coord)
+ coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
+
+ memset(¶ms, 0, sizeof(params));
+
+ params.type = bld->bld_base.base.type;
+ params.context_ptr = bld->context_ptr;
+ params.thread_data_ptr = bld->thread_data_ptr;
+ params.coords = coords;
+ params.outdata = emit_data->output;
+ params.target = tgsi_to_pipe_tex_target(target);
+ params.image_index = emit_data->inst->Src[0].Register.Index;
+ params.img_op = LP_IMG_LOAD;
+ bld->image->emit_op(bld->image,
+ bld->bld_base.base.gallivm,
+ ¶ms);
+}
+
+static void
+load_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
- struct lp_exec_mask *exec_mask = &bld->exec_mask;
+ const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+ unsigned buf = bufreg->Register.Index;
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
+ bufreg->Register.File == TGSI_FILE_IMAGE ||
+ bufreg->Register.File == TGSI_FILE_MEMORY ||
+ bufreg->Register.File == TGSI_FILE_CONSTBUF);
+ bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
- if (!exec_mask->has_mask) {
- return lp_build_mask_value(bld->mask);
+ if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+ img_load_emit(action, bld_base, emit_data);
+ } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
+ LLVMValueRef consts_ptr = bld->consts[buf];
+ LLVMValueRef num_consts = bld->consts_sizes[buf];
+
+ LLVMValueRef indirect_index;
+ LLVMValueRef overflow_mask;
+
+ indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
+ indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
+
+ /* All fetches are from the same constant buffer, so
+ * we need to propagate the size to a vector to do a
+ * vector comparison */
+ num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
+
+ /* Gather values from the constant buffer */
+ unsigned chan_index;
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+ /* Construct a boolean vector telling us which channels
+ * overflow the bound constant buffer */
+ overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
+ indirect_index, num_consts);
+
+ /* index_vec = indirect_index * 4 */
+ LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
+ index_vec = lp_build_add(uint_bld, index_vec,
+ lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+ emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
+ }
+ } else if (0) {
+ /* for indirect support with ARB_gpu_shader5 */
+ } else {
+ LLVMValueRef index;
+ LLVMValueRef scalar, scalar_ptr;
+ unsigned chan_index;
+
+ index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
+ index = lp_build_shr_imm(uint_bld, index, 2);
+
+ scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
+
+ LLVMValueRef ssbo_limit;
+
+ if (!is_shared) {
+ ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+ ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+ }
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+ LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ if (!is_shared) {
+ LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
+ exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+ }
+
+ LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond, temp_res;
+
+ loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
+ loop_state.counter, "");
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+
+ lp_build_if(&ifthen, gallivm, cond);
+ scalar = lp_build_pointer_get(builder, scalar_ptr, loop_index);
+
+ temp_res = LLVMBuildLoad(builder, result, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, result);
+ lp_build_else(&ifthen);
+ temp_res = LLVMBuildLoad(builder, result, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, result);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ emit_data->output[chan_index] = LLVMBuildLoad(gallivm->builder, result, "");
+ }
}
- return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
- exec_mask->exec_mask, "");
+}
+
+static void
+img_store_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct lp_img_params params;
+ LLVMValueRef coords[5];
+ LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+ unsigned dims;
+ unsigned target = emit_data->inst->Memory.Texture;
+ unsigned layer_coord;
+
+ target_to_dims_layer(target, &dims, &layer_coord);
+ for (unsigned i = 0; i < dims; i++) {
+ coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
+ }
+ for (unsigned i = dims; i < 5; i++) {
+ coords[i] = coord_undef;
+ }
+ if (layer_coord)
+ coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
+ memset(¶ms, 0, sizeof(params));
+
+ params.type = bld->bld_base.base.type;
+ params.context_ptr = bld->context_ptr;
+ params.thread_data_ptr = bld->thread_data_ptr;
+ params.coords = coords;
+ params.outdata = NULL;
+ params.exec_mask = mask_vec(bld_base);
+ params.target = tgsi_to_pipe_tex_target(target);
+ params.image_index = emit_data->inst->Dst[0].Register.Index;
+ params.img_op = LP_IMG_STORE;
+ for (unsigned i = 0; i < 4; i++)
+ params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
+
+ bld->image->emit_op(bld->image,
+ bld->bld_base.base.gallivm,
+ ¶ms);
+}
+
+static void
+store_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
+ unsigned buf = bufreg->Register.Index;
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
+ bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
+
+ if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+ img_store_emit(action, bld_base, emit_data);
+ } else if (0) {
+
+ } else {
+ LLVMValueRef index; /* index into the const buffer */
+ LLVMValueRef scalar_ptr;
+ LLVMValueRef value;
+ unsigned chan_index;
+
+ index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
+ index = lp_build_shr_imm(uint_bld, index, 2);
+
+ scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
+
+ LLVMValueRef ssbo_limit;
+
+ if (!is_shared) {
+ ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+ ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+ }
+
+ TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
+ LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
+
+ value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ if (!is_shared) {
+ LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
+ exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+ }
+
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
+ loop_state.counter, "");
+ value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
+
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond;
+
+ loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
+ loop_state.counter, "");
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
+
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ }
+ }
+}
+
+static void
+resq_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+
+ unsigned buf = bufreg->Register.Index;
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
+
+ if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+ unsigned target = emit_data->inst->Memory.Texture;
+ struct lp_sampler_size_query_params params = { 0 };
+ params.int_type = bld->bld_base.int_bld.type;
+ params.texture_unit = buf;
+ params.target = tgsi_to_pipe_tex_target(target);
+ params.context_ptr = bld->context_ptr;
+ params.sizes_out = emit_data->output;
+
+ bld->image->emit_size_query(bld->image,
+ bld->bld_base.base.gallivm,
+ ¶ms);
+ } else {
+ LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
+
+ emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
+ }
+}
+
+static void
+img_atomic_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data,
+ LLVMAtomicRMWBinOp op)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct lp_img_params params;
+ LLVMValueRef coords[5];
+ LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
+ unsigned dims;
+ unsigned layer_coord;
+ unsigned target = emit_data->inst->Memory.Texture;
+
+ target_to_dims_layer(target, &dims, &layer_coord);
+
+ for (unsigned i = 0; i < dims; i++) {
+ coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
+ }
+ for (unsigned i = dims; i < 5; i++) {
+ coords[i] = coord_undef;
+ }
+ if (layer_coord)
+ coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
+ memset(¶ms, 0, sizeof(params));
+
+ params.type = bld->bld_base.base.type;
+ params.context_ptr = bld->context_ptr;
+ params.thread_data_ptr = bld->thread_data_ptr;
+ params.exec_mask = mask_vec(bld_base);
+ params.image_index = emit_data->inst->Src[0].Register.Index;
+ params.coords = coords;
+ params.target = tgsi_to_pipe_tex_target(target);
+ params.op = op;
+ params.outdata = emit_data->output;
+ params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
+
+ for (unsigned i = 0; i < 4; i++)
+ params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
+ if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ for (unsigned i = 0; i < 4; i++)
+ params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
+ }
+ bld->image->emit_op(bld->image,
+ bld->bld_base.base.gallivm,
+ ¶ms);
+}
+
+static void
+atomic_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
+
+ assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
+ unsigned buf = bufreg->Register.Index;
+ bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
+
+ LLVMAtomicRMWBinOp op;
+ switch (emit_data->inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ATOMUADD:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case TGSI_OPCODE_ATOMXCHG:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ case TGSI_OPCODE_ATOMAND:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case TGSI_OPCODE_ATOMOR:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case TGSI_OPCODE_ATOMXOR:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case TGSI_OPCODE_ATOMUMIN:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case TGSI_OPCODE_ATOMUMAX:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case TGSI_OPCODE_ATOMIMIN:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case TGSI_OPCODE_ATOMIMAX:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ case TGSI_OPCODE_ATOMCAS:
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (bufreg->Register.File == TGSI_FILE_IMAGE) {
+ img_atomic_emit(action, bld_base, emit_data, op);
+ } else if (0) {
+ } else {
+ LLVMValueRef index; /* index into the const buffer */
+ LLVMValueRef scalar, scalar_ptr;
+ LLVMValueRef value;
+
+ index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
+ value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
+
+ index = lp_build_shr_imm(uint_bld, index, 2);
+
+ if (!is_shared) {
+ index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
+ scalar_ptr = bld->ssbos[buf];
+ } else
+ scalar_ptr = bld->shared_ptr;
+
+ LLVMValueRef atom_res = lp_build_alloca(gallivm,
+ uint_bld->vec_type, "");
+
+ LLVMValueRef ssbo_limit;
+ if (!is_shared) {
+ ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
+ ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
+ }
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+
+ if (!is_shared) {
+ LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
+ exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
+ }
+
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
+ loop_state.counter, "");
+ value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
+
+ index = LLVMBuildExtractElement(gallivm->builder, index,
+ loop_state.counter, "");
+
+ scalar_ptr = LLVMBuildGEP(builder, scalar_ptr,
+ &index, 1, "");
+
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond, temp_res;
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
+ LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
+ LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
+ loop_state.counter, "");
+ cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
+ scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
+ cas_src_ptr,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
+ } else {
+ scalar = LLVMBuildAtomicRMW(builder, op,
+ scalar_ptr, value_ptr,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ }
+ temp_res = LLVMBuildLoad(builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, atom_res);
+ lp_build_else(&ifthen);
+ temp_res = LLVMBuildLoad(builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, atom_res);
+ lp_build_endif(&ifthen);
+
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ emit_data->output[emit_data->chan] = LLVMBuildLoad(gallivm->builder, atom_res, "");
+ }
+}
+
+static void
+barrier_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+
+ LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
+
+ lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
+ LLVMPositionBuilderAtEnd(gallivm->builder, resume);
+}
+
+static void
+membar_emit(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+ LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
}
static void
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
if (bld->gs_iface->emit_vertex) {
+ uint32_t imms_idx = emit_data->inst->Src[0].Register.SwizzleX;
+ LLVMValueRef stream_id = bld->immediates[0][imms_idx];
LLVMValueRef mask = mask_vec(bld_base);
LLVMValueRef total_emitted_vertices_vec =
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
mask = clamp_mask_to_max_output_vertices(bld, mask,
total_emitted_vertices_vec);
gather_outputs(bld);
- bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
+ bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
bld->outputs,
- total_emitted_vertices_vec);
+ total_emitted_vertices_vec,
+ stream_id);
increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
mask);
increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
LLVMValueRef emitted_prims_vec =
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
-
+ LLVMValueRef total_emitted_vertices_vec =
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
emitted_vertices_vec,
uint_bld->zero);
executes only on the paths that have unflushed vertices */
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
- bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
+ bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
+ total_emitted_vertices_vec,
emitted_vertices_vec,
- emitted_prims_vec);
+ emitted_prims_vec,
+ mask_vec(bld_base));
#if DUMP_GS_EMITS
lp_build_print_value(bld->bld_base.base.gallivm,
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- lp_exec_break(&bld->exec_mask, bld_base);
+ lp_exec_tgsi_break(&bld->exec_mask, bld_base);
}
static void
{
struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
- lp_exec_bgnloop(&bld->exec_mask);
+ lp_exec_bgnloop(&bld->exec_mask, true);
}
static void
struct gallivm_state * gallivm = bld_base->base.gallivm;
if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm,
- bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
- bld->temps_array = lp_build_array_alloca(gallivm,
- bld_base->base.vec_type, array_size,
- "temp_array");
+ unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
+ bld->temps_array = lp_build_alloca_undef(gallivm,
+ LLVMArrayType(bld_base->base.vec_type, array_size),
+ "temp_array");
}
if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
}
if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
- LLVMValueRef array_size =
- lp_build_const_int32(gallivm,
- bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
- bld->imms_array = lp_build_array_alloca(gallivm,
- bld_base->base.vec_type, array_size,
+ unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
+ bld->imms_array = lp_build_alloca_undef(gallivm,
+ LLVMArrayType(bld_base->base.vec_type, array_size),
"imms_array");
}
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
bld->gs_iface->gs_epilogue(bld->gs_iface,
- &bld->bld_base,
total_emitted_vertices_vec,
emitted_prims_vec);
} else {
void
lp_build_tgsi_soa(struct gallivm_state *gallivm,
const struct tgsi_token *tokens,
- struct lp_type type,
- struct lp_build_mask_context *mask,
- LLVMValueRef consts_ptr,
- LLVMValueRef const_sizes_ptr,
- const struct lp_bld_tgsi_system_values *system_values,
- const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
- LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
- LLVMValueRef context_ptr,
- LLVMValueRef thread_data_ptr,
- struct lp_build_sampler_soa *sampler,
- const struct tgsi_shader_info *info,
- const struct lp_build_tgsi_gs_iface *gs_iface)
+ const struct lp_build_tgsi_params *params,
+ LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
{
struct lp_build_tgsi_soa_context bld;
-
+ struct lp_type type = params->type;
struct lp_type res_type;
assert(type.length <= LP_MAX_VECTOR_LENGTH);
int64_type.width *= 2;
lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
}
- bld.mask = mask;
- bld.inputs = inputs;
+ bld.mask = params->mask;
+ bld.inputs = params->inputs;
bld.outputs = outputs;
- bld.consts_ptr = consts_ptr;
- bld.const_sizes_ptr = const_sizes_ptr;
- bld.sampler = sampler;
- bld.bld_base.info = info;
- bld.indirect_files = info->indirect_files;
- bld.context_ptr = context_ptr;
- bld.thread_data_ptr = thread_data_ptr;
+ bld.consts_ptr = params->consts_ptr;
+ bld.const_sizes_ptr = params->const_sizes_ptr;
+ bld.ssbo_ptr = params->ssbo_ptr;
+ bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
+ bld.sampler = params->sampler;
+ bld.bld_base.info = params->info;
+ bld.indirect_files = params->info->indirect_files;
+ bld.context_ptr = params->context_ptr;
+ bld.thread_data_ptr = params->thread_data_ptr;
+ bld.image = params->image;
+ bld.shared_ptr = params->shared_ptr;
+ bld.coro = params->coro;
/*
* If the number of temporaries is rather large then we just
* allocate them as an array right from the start and treat
* like indirect temporaries.
*/
- if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
+ if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
}
/*
* a dynamically allocated array.
*/
bld.use_immediates_array =
- (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
+ (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
if (bld.use_immediates_array) {
bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
}
bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
-
- if (gs_iface) {
+ bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
+
+ bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
+
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
+
+ bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
+ bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
+
+ if (params->gs_iface) {
/* There's no specific value for this because it should always
* be set, but apps using ext_geometry_shader4 quite often
* were forgetting so we're using MAX_VERTEX_VARYING from
/* inputs are always indirect with gs */
bld.indirect_files |= (1 << TGSI_FILE_INPUT);
- bld.gs_iface = gs_iface;
+ bld.gs_iface = params->gs_iface;
bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
max_output_vertices =
- info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
+ params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
if (!max_output_vertices)
max_output_vertices = 32;
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
- bld.system_values = *system_values;
+ bld.system_values = *params->system_values;
lp_build_tgsi_llvm(&bld.bld_base, tokens);