2 * Copyright 2011 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "codegen/nv50_ir.h"
25 #include "codegen/nv50_ir_build_util.h"
27 #include "codegen/nv50_ir_target_nvc0.h"
28 #include "codegen/nv50_ir_lowering_gm107.h"
40 #define QUADOP(q, r, s, t) \
41 ((QOP_##q << 6) | (QOP_##r << 4) | \
42 (QOP_##s << 2) | (QOP_##t << 0))
45 GM107LoweringPass::handleManualTXD(TexInstruction
*i
)
47 static const uint8_t qOps
[4][2] =
49 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l0
50 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l1
51 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l2
52 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l3
57 Instruction
*tex
, *add
;
58 Value
*zero
= bld
.loadImm(bld
.getSSA(), 0);
60 const int dim
= i
->tex
.target
.getDim();
61 const int array
= i
->tex
.target
.isArray();
63 i
->op
= OP_TEX
; // no need to clone dPdx/dPdy later
65 for (c
= 0; c
< dim
; ++c
)
66 crd
[c
] = bld
.getScratch();
67 tmp
= bld
.getScratch();
69 for (l
= 0; l
< 4; ++l
) {
70 // mov coordinates from lane l to all lanes
71 bld
.mkOp(OP_QUADON
, TYPE_NONE
, NULL
);
72 for (c
= 0; c
< dim
; ++c
) {
73 bld
.mkOp2(OP_SHFL
, TYPE_F32
, crd
[c
], i
->getSrc(c
+ array
), bld
.mkImm(l
));
74 add
= bld
.mkOp2(OP_QUADOP
, TYPE_F32
, crd
[c
], crd
[c
], zero
);
76 add
->lanes
= 1; /* abused for .ndv */
79 // add dPdx from lane l to lanes dx
80 for (c
= 0; c
< dim
; ++c
) {
81 bld
.mkOp2(OP_SHFL
, TYPE_F32
, tmp
, i
->dPdx
[c
].get(), bld
.mkImm(l
));
82 add
= bld
.mkOp2(OP_QUADOP
, TYPE_F32
, crd
[c
], tmp
, crd
[c
]);
83 add
->subOp
= qOps
[l
][0];
84 add
->lanes
= 1; /* abused for .ndv */
87 // add dPdy from lane l to lanes dy
88 for (c
= 0; c
< dim
; ++c
) {
89 bld
.mkOp2(OP_SHFL
, TYPE_F32
, tmp
, i
->dPdy
[c
].get(), bld
.mkImm(l
));
90 add
= bld
.mkOp2(OP_QUADOP
, TYPE_F32
, crd
[c
], tmp
, crd
[c
]);
91 add
->subOp
= qOps
[l
][1];
92 add
->lanes
= 1; /* abused for .ndv */
96 bld
.insert(tex
= cloneForward(func
, i
));
97 for (c
= 0; c
< dim
; ++c
)
98 tex
->setSrc(c
+ array
, crd
[c
]);
99 bld
.mkOp(OP_QUADPOP
, TYPE_NONE
, NULL
);
102 for (c
= 0; i
->defExists(c
); ++c
) {
104 def
[c
][l
] = bld
.getSSA();
105 mov
= bld
.mkMov(def
[c
][l
], tex
->getDef(c
));
111 for (c
= 0; i
->defExists(c
); ++c
) {
112 Instruction
*u
= bld
.mkOp(OP_UNION
, TYPE_U32
, i
->getDef(c
));
113 for (l
= 0; l
< 4; ++l
)
114 u
->setSrc(l
, def
[c
][l
]);
122 GM107LoweringPass::handleDFDX(Instruction
*insn
)
125 int qop
= 0, xid
= 0;
129 qop
= QUADOP(SUB
, SUBR
, SUB
, SUBR
);
133 qop
= QUADOP(SUB
, SUB
, SUBR
, SUBR
);
137 assert(!"invalid dfdx opcode");
141 shfl
= bld
.mkOp2(OP_SHFL
, TYPE_F32
, bld
.getScratch(),
142 insn
->getSrc(0), bld
.mkImm(xid
));
143 shfl
->subOp
= NV50_IR_SUBOP_SHFL_BFLY
;
144 insn
->op
= OP_QUADOP
;
146 insn
->lanes
= 0; /* abused for !.ndv */
147 insn
->setSrc(1, insn
->getSrc(0));
148 insn
->setSrc(0, shfl
->getDef(0));
153 GM107LoweringPass::handlePFETCH(Instruction
*i
)
155 Value
*tmp0
= bld
.getScratch();
156 Value
*tmp1
= bld
.getScratch();
157 Value
*tmp2
= bld
.getScratch();
158 bld
.mkOp1(OP_RDSV
, TYPE_U32
, tmp0
, bld
.mkSysVal(SV_INVOCATION_INFO
, 0));
159 bld
.mkOp2(OP_SHR
, TYPE_U32
, tmp1
, tmp0
, bld
.mkImm(16));
160 bld
.mkOp2(OP_AND
, TYPE_U32
, tmp0
, tmp0
, bld
.mkImm(0xff));
161 bld
.mkOp2(OP_AND
, TYPE_U32
, tmp1
, tmp1
, bld
.mkImm(0xff));
163 bld
.mkOp2(OP_ADD
, TYPE_U32
, tmp2
, i
->getSrc(0), i
->getSrc(1));
165 bld
.mkOp1(OP_MOV
, TYPE_U32
, tmp2
, i
->getSrc(0));
166 bld
.mkOp3(OP_MAD
, TYPE_U32
, tmp0
, tmp0
, tmp1
, tmp2
);
173 GM107LoweringPass::handlePOPCNT(Instruction
*i
)
175 Value
*tmp
= bld
.mkOp2v(OP_AND
, i
->sType
, bld
.getScratch(),
176 i
->getSrc(0), i
->getSrc(1));
183 // - add quadop dance for texturing
184 // - put FP outputs in GPRs
185 // - convert instruction sequences
188 GM107LoweringPass::visit(Instruction
*i
)
190 bld
.setPosition(i
, false);
192 if (i
->cc
!= CC_ALWAYS
)
197 return handlePFETCH(i
);
200 return handleDFDX(i
);
202 return handlePOPCNT(i
);
204 return NVC0LoweringPass::visit(i
);
208 } // namespace nv50_ir