srcs[3].u16[ins->swizzle[3][c]]); \
}
+#define bv4i8(fxn) \
+ for (unsigned c = 0; c < 4; ++c) { \
+ dest.u8[c] = fxn(srcs[0].u8[ins->swizzle[0][c]], \
+ srcs[1].u8[ins->swizzle[1][c]], \
+ srcs[2].u8[ins->swizzle[2][c]], \
+ srcs[3].u8[ins->swizzle[3][c]]); \
+ }
+
#define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
#define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
bv2i16(fxn16); \
break; \
} else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
- unreachable("TODO: 8-bit"); \
+ bv4i8(fxn8); \
+ break; \
}
#define bpoly(name) \
bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
unreachable("Invalid type");
-#define bit_make_float(name, expr) \
+#define bit_make_float_2(name, expr32, expr64) \
static inline double \
bit_f64 ## name(double a, double b, double c, double d) \
{ \
- return expr; \
+ return expr64; \
} \
static inline float \
bit_f32 ## name(float a, float b, float c, float d) \
{ \
- return expr; \
+ return expr32; \
} \
+#define bit_make_float(name, expr) \
+ bit_make_float_2(name, expr, expr)
+
#define bit_make_int(name, expr) \
static inline int64_t \
bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
bit_make_int(name, expr) \
bit_make_poly(add, a + b);
+bit_make_int(sub, a - b);
bit_make_float(fma, (a * b) + c);
bit_make_poly(mov, a);
bit_make_poly(min, MIN2(a, b));
bit_make_poly(max, MAX2(a, b));
+bit_make_float_2(floor, floorf(a), floor(a));
+bit_make_float_2(ceil, ceilf(a), ceil(a));
+bit_make_float_2(trunc, truncf(a), trunc(a));
+bit_make_float_2(nearbyint, nearbyintf(a), nearbyint(a));
/* Modifiers */
case BIFROST_SAT_SIGNED:
return CLAMP(raw, -1.0, 1.0);
case BIFROST_SAT:
- return CLAMP(raw, 0.0, 1.0);
+ return SATURATE(raw);
default:
return raw;
}
else { return true; }
static bool
-bit_eval_cond(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned c)
+bit_eval_cond(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr)
{
if (T == nir_type_float32) {
BIT_COND(cond, l.f32, r.f32);
} else if (T == nir_type_float16) {
- float left = bf(l.f16[c]);
- float right = bf(r.f16[c]);
+ float left = bf(l.f16[cl]);
+ float right = bf(r.f16[cr]);
BIT_COND(cond, left, right);
} else if (T == nir_type_int32) {
- int32_t left = (int32_t) l.u32;
- int32_t right = (int32_t) r.u32;
+ int32_t left = l.u32;
+ int32_t right = r.u32;
BIT_COND(cond, left, right);
} else if (T == nir_type_int16) {
- int16_t left = (int16_t) l.u32;
- int16_t right = (int16_t) r.u32;
+ int16_t left = l.i16[cl];
+ int16_t right = r.i16[cr];
BIT_COND(cond, left, right);
} else if (T == nir_type_uint32) {
BIT_COND(cond, l.u32, r.u32);
} else if (T == nir_type_uint16) {
- BIT_COND(cond, l.u16[c], r.u16[c]);
+ BIT_COND(cond, l.u16[cl], r.u16[cr]);
} else {
unreachable("Unknown type evaluated");
}
}
+static unsigned
+bit_cmp(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr, bool d3d)
+{
+ bool v = bit_eval_cond(cond, l, r, T, cl, cr);
+
+ /* Fill for D3D but only up to 32-bit... 64-bit is only partial
+ * (although we probably need a cleverer representation for 64-bit) */
+
+ unsigned sz = MIN2(nir_alu_type_get_type_size(T), 32);
+ unsigned max = (sz == 32) ? (~0) : ((1 << sz) - 1);
+
+ return v ? (d3d ? max : 1) : 0;
+}
+
static float
biti_special(float Q, enum bi_special_op op)
{
bpoly(add);
case BI_BRANCH:
- case BI_CMP:
- case BI_BITWISE:
unreachable("Unsupported op");
+ case BI_CMP: {
+ nir_alu_type T = ins->src_types[0];
+ unsigned sz = nir_alu_type_get_type_size(T);
+
+ if (sz == 32 || sz == 64) {
+ dest.u32 = bit_cmp(ins->cond, srcs[0], srcs[1], T, 0, 0, false);
+ } else if (sz == 16) {
+ for (unsigned c = 0; c < 2; ++c) {
+ dest.u16[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
+ T, ins->swizzle[0][c], ins->swizzle[1][c],
+ false);
+ }
+ } else if (sz == 8) {
+ for (unsigned c = 0; c < 4; ++c) {
+ dest.u8[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
+ T, ins->swizzle[0][c], ins->swizzle[1][c],
+ false);
+ }
+ } else {
+ unreachable("Invalid");
+ }
+
+ break;
+ }
+
+ case BI_BITWISE: {
+ /* Apply inverts first */
+ if (ins->bitwise.src_invert[0])
+ srcs[0].u64 = ~srcs[0].u64;
+
+ if (ins->bitwise.src_invert[1])
+ srcs[1].u64 = ~srcs[1].u64;
+
+ /* TODO: Shifting */
+ assert(srcs[2].u32 == 0);
+
+ if (ins->op.bitwise == BI_BITWISE_AND)
+ dest.u64 = srcs[0].u64 & srcs[1].u64;
+ else if (ins->op.bitwise == BI_BITWISE_OR)
+ dest.u64 = srcs[0].u64 | srcs[1].u64;
+ else if (ins->op.bitwise == BI_BITWISE_XOR)
+ dest.u64 = srcs[0].u64 ^ srcs[1].u64;
+ else
+ unreachable("Unsupported op");
+
+ break;
+ }
+
case BI_CONVERT: {
/* If it exists */
unsigned comp = ins->swizzle[0][1];
dest.i32 = bit_as_int32(ins->src_types[0], srcs[0], comp, ins->roundmode);
else if (ins->dest_type == nir_type_float16) {
dest.u16[0] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][0]);
-
- if (ins->src_types[0] == nir_type_float32) {
- /* TODO: Second argument */
- dest.u16[1] = 0;
- } else {
- dest.u16[1] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][1]);
- }
+ dest.u16[1] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][1]);
} else if (ins->dest_type == nir_type_uint16) {
dest.u16[0] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
dest.u16[1] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
case BI_CSEL: {
bool direct = ins->cond == BI_COND_ALWAYS;
- bool cond = direct ? srcs[0].u32 :
- bit_eval_cond(ins->csel_cond, srcs[0], srcs[1], ins->src_types[0], 0);
+ unsigned sz = nir_alu_type_get_type_size(ins->src_types[0]);
+
+ if (sz == 32) {
+ bool cond = direct ? srcs[0].u32 :
+ bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], 0, 0);
+
+ dest = cond ? srcs[2] : srcs[3];
+ } else if (sz == 16) {
+ for (unsigned c = 0; c < 2; ++c) {
+ bool cond = direct ? srcs[0].u16[c] :
+ bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], c, c);
+
+ dest.u16[c] = cond ? srcs[2].u16[c] : srcs[3].u16[c];
+ }
+ } else {
+ unreachable("Remaining types todo");
+ }
- dest = cond ? srcs[2] : srcs[3];
break;
}
break;
}
- case BI_ISUB:
- unreachable("Unsupported op");
+
+ case BI_IMATH: {
+ if (ins->op.imath == BI_IMATH_ADD) {
+ bint(bit_i64add, bit_i32add, bit_i16add, bit_i8add);
+ } else if (ins->op.imath == BI_IMATH_SUB) {
+ bint(bit_i64sub, bit_i32sub, bit_i16sub, bit_i8sub);
+ } else {
+ unreachable("Unsupported op");
+ }
+
+ break;
+ }
case BI_MINMAX: {
if (ins->op.minmax == BI_MINMAX_MIN) {
break;
}
- case BI_SHIFT:
- case BI_ROUND:
- unreachable("Unsupported op");
+ case BI_ROUND: {
+ if (ins->roundmode == BIFROST_RTP) {
+ bfloat(bit_f64ceil, bit_f32ceil);
+ } else if (ins->roundmode == BIFROST_RTN) {
+ bfloat(bit_f64floor, bit_f32floor);
+ } else if (ins->roundmode == BIFROST_RTE) {
+ bfloat(bit_f64nearbyint, bit_f32nearbyint);
+ } else if (ins->roundmode == BIFROST_RTZ) {
+ bfloat(bit_f64trunc, bit_f32trunc);
+ } else
+ unreachable("Invalid");
+
+ break;
+ }
/* We only interpret vertex shaders */
case BI_DISCARD: