for (i = 0; i < fs_type.length / 4; i++) {
unsigned j = 2 * (i % 2) + (i / 2) * 8;
- bits[4*i + 0] = LLVMConstInt(i32t, 1 << (j + 0), 0);
- bits[4*i + 1] = LLVMConstInt(i32t, 1 << (j + 1), 0);
- bits[4*i + 2] = LLVMConstInt(i32t, 1 << (j + 4), 0);
- bits[4*i + 3] = LLVMConstInt(i32t, 1 << (j + 5), 0);
+ bits[4*i + 0] = LLVMConstInt(i32t, 1ULL << (j + 0), 0);
+ bits[4*i + 1] = LLVMConstInt(i32t, 1ULL << (j + 1), 0);
+ bits[4*i + 2] = LLVMConstInt(i32t, 1ULL << (j + 4), 0);
+ bits[4*i + 3] = LLVMConstInt(i32t, 1ULL << (j + 5), 0);
}
mask = LLVMBuildAnd(builder, mask, LLVMConstVector(bits, fs_type.length), "");
unsigned chan;
if (format_expands_to_float_soa(format_desc)) {
- /* just make this a 32bit uint */
+ /* just make this a uint with width of block */
type->floating = false;
type->fixed = false;
type->sign = false;
type->norm = false;
- type->width = 32;
+ type->width = format_desc->block.bits;
type->length = 1;
return;
}
* This is pretty suboptimal for this case blending in SoA would be much
* better, since conversion gets us SoA values so need to convert back.
*/
- assert(src_type.width == 32);
+ assert(src_type.width == 32 || src_type.width == 16);
assert(dst_type.floating);
assert(dst_type.width == 32);
assert(dst_type.length % 4 == 0);
assert(num_srcs % 4 == 0);
+ if (src_type.width == 16) {
+ /* expand 4x16bit values to 4x32bit */
+ struct lp_type type32x4 = src_type;
+ LLVMTypeRef ltype32x4;
+ unsigned num_fetch = dst_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+ type32x4.width = 32;
+ ltype32x4 = lp_build_vec_type(gallivm, type32x4);
+ for (i = 0; i < num_fetch; i++) {
+ src[i] = LLVMBuildZExt(builder, src[i], ltype32x4, "");
+ }
+ src_type.width = 32;
+ }
for (i = 0; i < 4; i++) {
tmpsrc[i] = src[i];
}
assert(src_type.floating);
assert(src_type.width == 32);
assert(src_type.length % 4 == 0);
- assert(dst_type.width == 32);
+ assert(dst_type.width == 32 || dst_type.width == 16);
for (i = 0; i < num_srcs / 4; i++) {
LLVMValueRef tmpsoa[4], tmpdst;
src[i] = tmpdst;
}
}
+ if (dst_type.width == 16) {
+ struct lp_type type16x8 = dst_type;
+ struct lp_type type32x4 = dst_type;
+ LLVMTypeRef ltype16x4, ltypei64, ltypei128;
+ unsigned num_fetch = src_type.length == 8 ? num_srcs / 2 : num_srcs / 4;
+ type16x8.length = 8;
+ type32x4.width = 32;
+ ltypei128 = LLVMIntTypeInContext(gallivm->context, 128);
+ ltypei64 = LLVMIntTypeInContext(gallivm->context, 64);
+ ltype16x4 = lp_build_vec_type(gallivm, dst_type);
+ /* We could do vector truncation but it doesn't generate very good code */
+ for (i = 0; i < num_fetch; i++) {
+ src[i] = lp_build_pack2(gallivm, type32x4, type16x8,
+ src[i], lp_build_zero(gallivm, type32x4));
+ src[i] = LLVMBuildBitCast(builder, src[i], ltypei128, "");
+ src[i] = LLVMBuildTrunc(builder, src[i], ltypei64, "");
+ src[i] = LLVMBuildBitCast(builder, src[i], ltype16x4, "");
+ }
+ }
return;
}
partial_mask |= !variant->opaque;
i32_zero = lp_build_const_int32(gallivm, 0);
-#if HAVE_LLVM < 0x0302
- /*
- * undef triggers a crash in LLVMBuildTrunc in convert_from_blend_type in some
- * cases (seen with r10g10b10a2, 128bit wide vectors) (only used for 1d case).
- */
- undef_src_val = lp_build_zero(gallivm, fs_type);
-#else
undef_src_val = lp_build_undef(gallivm, fs_type);
-#endif
row_type.length = fs_type.length;
vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width;
struct gallivm_state *gallivm = variant->gallivm;
const struct lp_fragment_shader_variant_key *key = &variant->key;
struct lp_shader_input inputs[PIPE_MAX_SHADER_INPUTS];
- char func_name[256];
+ char func_name[64];
struct lp_type fs_type;
struct lp_type blend_type;
LLVMTypeRef fs_elem_type;
}
/* check if writes to cbuf[0] are to be copied to all cbufs */
- cbuf0_write_all = FALSE;
- for (i = 0;i < shader->info.base.num_properties; i++) {
- if (shader->info.base.properties[i].name ==
- TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS) {
- cbuf0_write_all = TRUE;
- break;
- }
- }
+ cbuf0_write_all =
+ shader->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS];
/* TODO: actually pick these based on the fs and color buffer
* characteristics. */
blend_vec_type = lp_build_vec_type(gallivm, blend_type);
- util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
- shader->no, variant->no, partial_mask ? "partial" : "whole");
+ util_snprintf(func_name, sizeof(func_name), "fs%u_variant%u_%s",
+ shader->no, variant->no, partial_mask ? "partial" : "whole");
arg_types[0] = variant->jit_context_ptr_type; /* context */
arg_types[1] = int32_type; /* x */
LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
num_loop, "mask_store");
LLVMValueRef color_store[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS];
+ boolean pixel_center_integer =
+ shader->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER];
/*
* The shader input interpolation info is not explicitely baked in the
gallivm,
shader->info.base.num_inputs,
inputs,
- shader->info.base.pixel_center_integer,
+ pixel_center_integer,
builder, fs_type,
a0_ptr, dadx_ptr, dady_ptr,
x, y);
LLVMBuildRetVoid(builder);
gallivm_verify_function(gallivm, function);
-
- variant->nr_instrs += lp_build_count_instructions(function);
}
struct lp_fragment_shader_variant *variant;
const struct util_format_description *cbuf0_format_desc;
boolean fullcolormask;
+ char module_name[64];
variant = CALLOC_STRUCT(lp_fragment_shader_variant);
if(!variant)
return NULL;
- variant->gallivm = gallivm_create();
+ util_snprintf(module_name, sizeof(module_name), "fs%u_variant%u",
+ shader->no, shader->variants_created);
+
+ variant->gallivm = gallivm_create(module_name, lp->context);
if (!variant->gallivm) {
FREE(variant);
return NULL;
gallivm_compile_module(variant->gallivm);
+ variant->nr_instrs += lp_build_count_ir_module(variant->gallivm->module);
+
if (variant->function[RAST_EDGE_TEST]) {
variant->jit_function[RAST_EDGE_TEST] = (lp_jit_frag_func)
gallivm_jit_function(variant->gallivm,
variant->jit_function[RAST_WHOLE] = variant->jit_function[RAST_EDGE_TEST];
}
+ gallivm_free_ir(variant->gallivm);
+
return variant;
}
llvmpipe_remove_shader_variant(struct llvmpipe_context *lp,
struct lp_fragment_shader_variant *variant)
{
- unsigned i;
-
if (gallivm_debug & GALLIVM_DEBUG_IR) {
debug_printf("llvmpipe: del fs #%u var #%u v created #%u v cached"
" #%u v total cached #%u\n",
lp->nr_fs_variants);
}
- /* free all the variant's JIT'd functions */
- for (i = 0; i < Elements(variant->function); i++) {
- if (variant->function[i]) {
- gallivm_free_function(variant->gallivm,
- variant->function[i],
- variant->jit_function[i]);
- }
- }
-
gallivm_destroy(variant->gallivm);
/* remove from shader's list */
LP_COUNT_ADD(llvm_compile_time, dt);
LP_COUNT_ADD(nr_llvm_compiles, 2); /* emit vs. omit in/out test */
- llvmpipe_variant_count++;
-
/* Put the new variant into the list */
if (variant) {
insert_at_head(&shader->variants, &variant->list_item_local);