#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_intr.h"
+#include "lp_bld_arit.h"
#include "lp_bld_conv.h"
LLVMValueRef packed = NULL;
if(src_type.width == 32) {
- /* FIXME: we only have a packed signed intrinsic */
+#if 0
+ if(dst_type.sign)
+ packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", tmp_vec_type, lo, hi);
+ else {
+ /* XXX: PACKUSDW intrinsic is actually the only one with a consistent signature */
+ packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", new_vec_type, lo, hi);
+ }
+#else
packed = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", tmp_vec_type, lo, hi);
+#endif
}
else if(src_type.width == 16) {
if(dst_type.sign)
* Clamp if necessary
*/
- if(!tmp_type.norm && dst_type.norm) {
- /* FIXME */
+ if(tmp_type.sign != dst_type.sign || tmp_type.norm != dst_type.norm) {
+ struct lp_build_context bld;
+ lp_build_context_init(&bld, builder, tmp_type);
+
+ if(tmp_type.sign && !dst_type.sign)
+ for(i = 0; i < num_tmps; ++i)
+ tmp[i] = lp_build_max(&bld, tmp[i], bld.zero);
+
+ if(!tmp_type.norm && dst_type.norm)
+ for(i = 0; i < num_tmps; ++i)
+ tmp[i] = lp_build_min(&bld, tmp[i], bld.one);
}
/*
* Truncate or expand bit width
*/
- assert(!tmp_type.floating);
+ assert(!tmp_type.floating || tmp_type.width == dst_type.width);
if(tmp_type.width > dst_type.width) {
assert(num_dsts == 1);
/* FIXME: compensate different offsets too */
if(src_shift < dst_shift) {
- LLVMValueRef shift = lp_build_int_const_uni(tmp_type, src_shift - dst_shift);
+ LLVMValueRef shift = lp_build_int_const_uni(tmp_type, dst_shift - src_shift);
for(i = 0; i < num_tmps; ++i)
tmp[i] = LLVMBuildShl(builder, tmp[i], shift, "");
}
fprintf(fp, " dst_type=");
dump_type(fp, dst_type);
+ fprintf(fp, " ...\n");
fflush(fp);
}
provider = LLVMCreateModuleProviderForExistingModule(module);
if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
- dump_conv_types(stderr, src_type, dst_type);
- fprintf(stderr, "\n");
+ if(verbose < 1)
+ dump_conv_types(stderr, src_type, dst_type);
fprintf(stderr, "%s\n", error);
LLVMDisposeMessage(error);
abort();
}
if (!success) {
- dump_conv_types(stderr, src_type, dst_type);
- fprintf(stderr, "\n");
+ if(verbose < 1)
+ dump_conv_types(stderr, src_type, dst_type);
fprintf(stderr, "MISMATCH\n");
for(j = 0; j < num_srcs; ++j) {
fprintf(stderr, "\n");
}
+#if 0
+ fprintf(stderr, " Ref: ", j);
for(j = 0; j < src_type.length*num_srcs; ++j)
fprintf(stderr, " %f", fref[j]);
fprintf(stderr, "\n");
+#endif
for(j = 0; j < num_dsts; ++j) {
fprintf(stderr, " Dst%u: ", j);
}
- if(verbose >= 1) {
- fprintf(stdout, " cycles=%.1f", cycles_avg);
- }
-
- if(verbose >= 1) {
- fprintf(stdout, " result=%s\n", success ? "pass" : "fail");
- fflush(stdout);
- }
-
if(fp)
write_tsv_row(fp, src_type, dst_type, cycles_avg, success);
if (!success) {
- LLVMDumpModule(module);
- LLVMWriteBitcodeToFile(module, "conv.bc");
- fprintf(stderr, "conv.bc written\n");
- abort();
+ static boolean firsttime = TRUE;
+ if(firsttime) {
+ if(verbose < 2)
+ LLVMDumpModule(module);
+ LLVMWriteBitcodeToFile(module, "conv.bc");
+ fprintf(stderr, "conv.bc written\n");
+ fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
+ firsttime = FALSE;
+ //abort();
+ }
}
LLVMFreeMachineCodeForFunction(engine, func);
const union lp_type conv_types[] = {
/* float, fixed, sign, norm, width, len */
- {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }}, /* f32 x 4 */
- {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }}, /* u8n x 16 */
+
+ {{ TRUE, FALSE, TRUE, TRUE, 32, 4 }},
+ {{ TRUE, FALSE, TRUE, FALSE, 32, 4 }},
+ {{ TRUE, FALSE, FALSE, TRUE, 32, 4 }},
+ {{ TRUE, FALSE, FALSE, FALSE, 32, 4 }},
+
+ {{ FALSE, FALSE, TRUE, TRUE, 32, 4 }},
+ {{ FALSE, FALSE, TRUE, FALSE, 32, 4 }},
+ {{ FALSE, FALSE, FALSE, TRUE, 32, 4 }},
+ {{ FALSE, FALSE, FALSE, FALSE, 32, 4 }},
+
+ {{ FALSE, FALSE, TRUE, TRUE, 16, 8 }},
+ {{ FALSE, FALSE, TRUE, FALSE, 16, 8 }},
+ {{ FALSE, FALSE, FALSE, TRUE, 16, 8 }},
+ {{ FALSE, FALSE, FALSE, FALSE, 16, 8 }},
+
+ {{ FALSE, FALSE, TRUE, TRUE, 8, 16 }},
+ {{ FALSE, FALSE, TRUE, FALSE, 8, 16 }},
+ {{ FALSE, FALSE, FALSE, TRUE, 8, 16 }},
+ {{ FALSE, FALSE, FALSE, FALSE, 8, 16 }},
};
if(src_type == dst_type)
continue;
+ if(src_type->norm != dst_type->norm)
+ continue;
+
if(!test_one(verbose, fp, *src_type, *dst_type))
success = FALSE;
do {
dst_type = &conv_types[random() % num_types];
- } while (src_type == dst_type);
+ } while (src_type == dst_type || src_type->norm != dst_type->norm);
if(!test_one(verbose, fp, *src_type, *dst_type))
success = FALSE;
dump_type(FILE *fp,
union lp_type type)
{
- fprintf(fp, "%s%u%sx%u",
- type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
+ fprintf(fp, "%s%s%u%sx%u",
+ type.sign ? (type.floating || type.fixed ? "" : "s") : "u",
+ type.floating ? "f" : (type.fixed ? "h" : "i"),
type.width,
type.norm ? "n" : "",
type.length);
void
-write_elem(union lp_type type, void *dst, unsigned index, double src)
+write_elem(union lp_type type, void *dst, unsigned index, double value)
{
- double scale = lp_const_scale(type);
- double value = scale*src;
assert(index < type.length);
+ if(!type.sign && value < 0.0)
+ value = 0.0;
+ if(type.norm && value > 1.0)
+ value = 1.0;
if (type.floating) {
switch(type.width) {
case 32:
}
}
else {
+ double scale = lp_const_scale(type);
+ value = round(value*scale);
if(type.sign) {
switch(type.width) {
case 8:
- *((int8_t *)dst + index) = (int8_t)round(value);
+ *((int8_t *)dst + index) = (int8_t)value;
break;
case 16:
- *((int16_t *)dst + index) = (int16_t)round(value);
+ *((int16_t *)dst + index) = (int16_t)value;
break;
case 32:
- *((int32_t *)dst + index) = (int32_t)round(value);
+ *((int32_t *)dst + index) = (int32_t)value;
break;
case 64:
- *((int64_t *)dst + index) = (int32_t)round(value);
+ *((int64_t *)dst + index) = (int32_t)value;
break;
default:
assert(0);
else {
switch(type.width) {
case 8:
- *((uint8_t *)dst + index) = (uint8_t)round(value);
+ *((uint8_t *)dst + index) = (uint8_t)value;
break;
case 16:
- *((uint16_t *)dst + index) = (uint16_t)round(value);
+ *((uint16_t *)dst + index) = (uint16_t)value;
break;
case 32:
- *((uint32_t *)dst + index) = (uint32_t)round(value);
+ *((uint32_t *)dst + index) = (uint32_t)value;
break;
case 64:
- *((uint64_t *)dst + index) = (uint64_t)round(value);
+ *((uint64_t *)dst + index) = (uint64_t)value;
break;
default:
assert(0);
void
random_elem(union lp_type type, void *dst, unsigned index)
{
+ double value;
assert(index < type.length);
- if (type.floating) {
- double value = (double)random()/(double)RAND_MAX;
- if(!type.norm) {
- value += (double)random();
- if(random() & 1)
- value = -value;
- }
- switch(type.width) {
- case 32:
- *((float *)dst + index) = (float)value;
- break;
- case 64:
- *((double *)dst + index) = value;
- break;
- default:
- assert(0);
- }
- }
- else {
- switch(type.width) {
- case 8:
- *((uint8_t *)dst + index) = (uint8_t)random();
- break;
- case 16:
- *((uint16_t *)dst + index) = (uint16_t)random();
- break;
- case 32:
- *((uint32_t *)dst + index) = (uint32_t)random();
- break;
- case 64:
- *((uint64_t *)dst + index) = (uint64_t)random();
- break;
- default:
- assert(0);
- }
+ value = (double)random()/(double)RAND_MAX;
+ if(!type.norm) {
+ unsigned long long mask;
+ if (type.floating)
+ mask = ~(unsigned long long)0;
+ else if (type.fixed)
+ mask = ((unsigned long long)1 << (type.width / 2)) - 1;
+ else if (type.sign)
+ mask = ((unsigned long long)1 << (type.width - 1)) - 1;
+ else
+ mask = ((unsigned long long)1 << type.width) - 1;
+ value += (double)(mask & random());
}
+ if(!type.sign)
+ if(random() & 1)
+ value = -value;
+ write_elem(type, dst, index, value);
}