case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
switch (util_bitcount(mask)) {
case 1: return 0x3;
case 2: return 0xf;
case TGSI_OPCODE_SHL:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_U2D:
+ case TGSI_OPCODE_U2I64:
case TGSI_OPCODE_UADD:
case TGSI_OPCODE_UDIV:
case TGSI_OPCODE_UMOD:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_I2D:
+ case TGSI_OPCODE_I2I64:
case TGSI_OPCODE_IDIV:
case TGSI_OPCODE_IMUL_HI:
case TGSI_OPCODE_IMAX:
case TGSI_OPCODE_D2F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
+ case TGSI_OPCODE_D2I64:
+ case TGSI_OPCODE_D2U64:
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DFLR:
case TGSI_OPCODE_DROUND:
return nv50_ir::TYPE_F64;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_U64SHR:
+ case TGSI_OPCODE_U64DIV:
+ case TGSI_OPCODE_U64MOD:
+ case TGSI_OPCODE_U642F:
+ case TGSI_OPCODE_U642D:
+ return nv50_ir::TYPE_U64;
+ case TGSI_OPCODE_I64ABS:
+ case TGSI_OPCODE_I64SSG:
+ case TGSI_OPCODE_I64NEG:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64MAX:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_I64DIV:
+ case TGSI_OPCODE_I64MOD:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_I642D:
+ return nv50_ir::TYPE_S64;
default:
return nv50_ir::TYPE_F32;
}
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
case TGSI_OPCODE_PK2H:
return nv50_ir::TYPE_U32;
case TGSI_OPCODE_I2F:
case TGSI_OPCODE_U2F:
case TGSI_OPCODE_D2F:
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
case TGSI_OPCODE_UP2H:
return nv50_ir::TYPE_F32;
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
+ case TGSI_OPCODE_I642D:
+ case TGSI_OPCODE_U642D:
return nv50_ir::TYPE_F64;
+ case TGSI_OPCODE_I2I64:
+ case TGSI_OPCODE_U2I64:
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_D2I64:
+ return nv50_ir::TYPE_S64;
+ case TGSI_OPCODE_F2U64:
+ case TGSI_OPCODE_D2U64:
+ return nv50_ir::TYPE_U64;
default:
return inferSrcType();
}
case TGSI_OPCODE_USLT:
case TGSI_OPCODE_FSLT:
case TGSI_OPCODE_DSLT:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_U64SLT:
return CC_LT;
case TGSI_OPCODE_SLE:
return CC_LE;
case TGSI_OPCODE_USGE:
case TGSI_OPCODE_FSGE:
case TGSI_OPCODE_DSGE:
+ case TGSI_OPCODE_I64SGE:
+ case TGSI_OPCODE_U64SGE:
return CC_GE;
case TGSI_OPCODE_SGT:
return CC_GT;
case TGSI_OPCODE_USEQ:
case TGSI_OPCODE_FSEQ:
case TGSI_OPCODE_DSEQ:
+ case TGSI_OPCODE_U64SEQ:
return CC_EQ;
case TGSI_OPCODE_SNE:
case TGSI_OPCODE_FSNE:
case TGSI_OPCODE_DSNE:
+ case TGSI_OPCODE_U64SNE:
return CC_NEU;
case TGSI_OPCODE_USNE:
return CC_NE;
NV50_IR_OPCODE_CASE(DFLR, FLOOR);
NV50_IR_OPCODE_CASE(DROUND, CVT);
+ NV50_IR_OPCODE_CASE(U64SEQ, SET);
+ NV50_IR_OPCODE_CASE(U64SNE, SET);
+ NV50_IR_OPCODE_CASE(U64SLT, SET);
+ NV50_IR_OPCODE_CASE(U64SGE, SET);
+ NV50_IR_OPCODE_CASE(I64SLT, SET);
+ NV50_IR_OPCODE_CASE(I64SGE, SET);
+ NV50_IR_OPCODE_CASE(I2I64, CVT);
+ NV50_IR_OPCODE_CASE(U2I64, CVT);
+ NV50_IR_OPCODE_CASE(F2I64, CVT);
+ NV50_IR_OPCODE_CASE(F2U64, CVT);
+ NV50_IR_OPCODE_CASE(D2I64, CVT);
+ NV50_IR_OPCODE_CASE(D2U64, CVT);
+ NV50_IR_OPCODE_CASE(I642F, CVT);
+ NV50_IR_OPCODE_CASE(U642F, CVT);
+ NV50_IR_OPCODE_CASE(I642D, CVT);
+ NV50_IR_OPCODE_CASE(U642D, CVT);
+
+ NV50_IR_OPCODE_CASE(I64MIN, MIN);
+ NV50_IR_OPCODE_CASE(U64MIN, MIN);
+ NV50_IR_OPCODE_CASE(I64MAX, MAX);
+ NV50_IR_OPCODE_CASE(U64MAX, MAX);
+ NV50_IR_OPCODE_CASE(I64ABS, ABS);
+ NV50_IR_OPCODE_CASE(I64NEG, NEG);
+ NV50_IR_OPCODE_CASE(U64ADD, ADD);
+ NV50_IR_OPCODE_CASE(U64MUL, MUL);
+ NV50_IR_OPCODE_CASE(U64SHL, SHL);
+ NV50_IR_OPCODE_CASE(I64SHR, SHR);
+ NV50_IR_OPCODE_CASE(U64SHR, SHR);
+
NV50_IR_OPCODE_CASE(IMUL_HI, MUL);
NV50_IR_OPCODE_CASE(UMUL_HI, MUL);
case TGSI_OPCODE_INTERP_OFFSET:
handleINTERP(dst0);
break;
+ case TGSI_OPCODE_I642F:
+ case TGSI_OPCODE_U642F:
case TGSI_OPCODE_D2I:
case TGSI_OPCODE_D2U:
case TGSI_OPCODE_D2F: {
}
break;
}
+ case TGSI_OPCODE_I2I64:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ dst0[c] = fetchSrc(0, c / 2);
+ mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(NULL, 31));
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_U2I64:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ dst0[c] = fetchSrc(0, c / 2);
+ dst0[c + 1] = zero;
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_F2I64:
+ case TGSI_OPCODE_F2U64:
case TGSI_OPCODE_I2D:
case TGSI_OPCODE_U2D:
case TGSI_OPCODE_F2D:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
Value *dreg = getSSA(8);
- mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dreg, srcTy, fetchSrc(0, c / 2));
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
mkSplit(&dst0[c], 4, dreg);
c++;
}
break;
+ case TGSI_OPCODE_D2I64:
+ case TGSI_OPCODE_D2U64:
+ case TGSI_OPCODE_I642D:
+ case TGSI_OPCODE_U642D:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ Instruction *cvt = mkCvt(OP_CVT, dstTy, dst, srcTy, src0);
+ if (!isFloatType(dstTy))
+ cvt->rnd = ROUND_Z;
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_I64NEG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ mkOp2(OP_SUB, dstTy, dst, zero, src0);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_I64ABS:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *neg = getSSA(8), *srcComp[2], *negComp[2];
+ srcComp[0] = fetchSrc(0, c);
+ srcComp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, srcComp[0], srcComp[1]);
+ mkOp2(OP_SUB, dstTy, neg, zero, src0);
+ mkSplit(negComp, 4, neg);
+ mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c], TYPE_S32,
+ negComp[0], srcComp[0], srcComp[1]);
+ mkCmp(OP_SLCT, CC_LT, TYPE_S32, dst0[c + 1], TYPE_S32,
+ negComp[1], srcComp[1], srcComp[1]);
+ c++;
+ }
+ break;
case TGSI_OPCODE_DABS:
case TGSI_OPCODE_DNEG:
case TGSI_OPCODE_DRCP:
c++;
}
break;
+ case TGSI_OPCODE_U64SEQ:
+ case TGSI_OPCODE_U64SNE:
+ case TGSI_OPCODE_U64SLT:
+ case TGSI_OPCODE_U64SGE:
+ case TGSI_OPCODE_I64SLT:
+ case TGSI_OPCODE_I64SGE:
case TGSI_OPCODE_DSLT:
case TGSI_OPCODE_DSGE:
case TGSI_OPCODE_DSEQ:
}
break;
}
+ case TGSI_OPCODE_U64MIN:
+ case TGSI_OPCODE_U64MAX:
+ case TGSI_OPCODE_I64MIN:
+ case TGSI_OPCODE_I64MAX: {
+ dstTy = isSignedIntType(dstTy) ? TYPE_S32 : TYPE_U32;
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ Value *flag = getSSA(1, FILE_FLAGS);
+ src0 = fetchSrc(0, c + 1);
+ src1 = fetchSrc(1, c + 1);
+ geni = mkOp2(op, dstTy, dst0[c + 1], src0, src1);
+ geni->subOp = NV50_IR_SUBOP_MINMAX_HIGH;
+ geni->setFlagsDef(1, flag);
+
+ src0 = fetchSrc(0, c);
+ src1 = fetchSrc(1, c);
+ geni = mkOp2(op, TYPE_U32, dst0[c], src0, src1);
+ geni->subOp = NV50_IR_SUBOP_MINMAX_LOW;
+ geni->setFlagsSrc(2, flag);
+
+ c++;
+ }
+ break;
+ }
+ case TGSI_OPCODE_U64SHL:
+ case TGSI_OPCODE_I64SHR:
+ case TGSI_OPCODE_U64SHR:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *dst = getSSA(8), *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+ src1 = fetchSrc(1, c / 2);
+ mkOp2(op, dstTy, dst, src0, src1);
+ mkSplit(&dst0[c], 4, dst);
+ c++;
+ }
+ break;
+ case TGSI_OPCODE_U64ADD:
+ case TGSI_OPCODE_U64MUL:
case TGSI_OPCODE_DADD:
case TGSI_OPCODE_DMUL:
case TGSI_OPCODE_DDIV:
mkSplit(&dst0[c], 4, dst);
c++;
}
+ case TGSI_OPCODE_I64SSG:
+ FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+ src0 = getSSA(8);
+ Value *tmp[2];
+ tmp[0] = fetchSrc(0, c);
+ tmp[1] = fetchSrc(0, c + 1);
+ mkOp2(OP_MERGE, TYPE_U64, src0, tmp[0], tmp[1]);
+
+ val0 = getScratch();
+ val1 = getScratch();
+ mkCmp(OP_SET, CC_GT, TYPE_U32, val0, TYPE_S64, src0, zero);
+ mkCmp(OP_SET, CC_LT, TYPE_U32, val1, TYPE_S64, src0, zero);
+ mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0);
+ mkOp2(OP_SHR, TYPE_S32, dst0[c + 1], dst0[c], loadImm(0, 31));
+ c++;
+ }
break;
default:
ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode());