2 * Copyright 2011 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "codegen/nv50_ir.h"
25 #include "codegen/nv50_ir_build_util.h"
27 #include "codegen/nv50_ir_target_nvc0.h"
28 #include "codegen/nv50_ir_lowering_gm107.h"
40 #define QUADOP(q, r, s, t) \
41 ((QOP_##q << 6) | (QOP_##r << 4) | \
42 (QOP_##s << 2) | (QOP_##t << 0))
45 GM107LoweringPass::handleManualTXD(TexInstruction
*i
)
47 static const uint8_t qOps
[4][2] =
49 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l0
50 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(MOV2
, MOV2
, ADD
, ADD
) }, // l1
51 { QUADOP(MOV2
, ADD
, MOV2
, ADD
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l2
52 { QUADOP(SUBR
, MOV2
, SUBR
, MOV2
), QUADOP(SUBR
, SUBR
, MOV2
, MOV2
) }, // l3
57 Instruction
*tex
, *add
;
58 Value
*zero
= bld
.loadImm(bld
.getSSA(), 0);
60 const int dim
= i
->tex
.target
.getDim();
62 i
->op
= OP_TEX
; // no need to clone dPdx/dPdy later
64 for (c
= 0; c
< dim
; ++c
)
65 crd
[c
] = bld
.getScratch();
66 tmp
= bld
.getScratch();
68 for (l
= 0; l
< 4; ++l
) {
69 // mov coordinates from lane l to all lanes
70 bld
.mkOp(OP_QUADON
, TYPE_NONE
, NULL
);
71 for (c
= 0; c
< dim
; ++c
) {
72 bld
.mkOp2(OP_SHFL
, TYPE_F32
, crd
[c
], i
->getSrc(c
), bld
.mkImm(l
));
73 add
= bld
.mkOp2(OP_QUADOP
, TYPE_F32
, crd
[c
], crd
[c
], zero
);
75 add
->lanes
= 1; /* abused for .ndv */
78 // add dPdx from lane l to lanes dx
79 for (c
= 0; c
< dim
; ++c
) {
80 bld
.mkOp2(OP_SHFL
, TYPE_F32
, tmp
, i
->dPdx
[c
].get(), bld
.mkImm(l
));
81 add
= bld
.mkOp2(OP_QUADOP
, TYPE_F32
, crd
[c
], tmp
, crd
[c
]);
82 add
->subOp
= qOps
[l
][0];
83 add
->lanes
= 1; /* abused for .ndv */
86 // add dPdy from lane l to lanes dy
87 for (c
= 0; c
< dim
; ++c
) {
88 bld
.mkOp2(OP_SHFL
, TYPE_F32
, tmp
, i
->dPdy
[c
].get(), bld
.mkImm(l
));
89 add
= bld
.mkOp2(OP_QUADOP
, TYPE_F32
, crd
[c
], tmp
, crd
[c
]);
90 add
->subOp
= qOps
[l
][1];
91 add
->lanes
= 1; /* abused for .ndv */
95 bld
.insert(tex
= cloneForward(func
, i
));
96 for (c
= 0; c
< dim
; ++c
)
97 tex
->setSrc(c
, crd
[c
]);
98 bld
.mkOp(OP_QUADPOP
, TYPE_NONE
, NULL
);
101 for (c
= 0; i
->defExists(c
); ++c
) {
103 def
[c
][l
] = bld
.getSSA();
104 mov
= bld
.mkMov(def
[c
][l
], tex
->getDef(c
));
110 for (c
= 0; i
->defExists(c
); ++c
) {
111 Instruction
*u
= bld
.mkOp(OP_UNION
, TYPE_U32
, i
->getDef(c
));
112 for (l
= 0; l
< 4; ++l
)
113 u
->setSrc(l
, def
[c
][l
]);
121 GM107LoweringPass::handleDFDX(Instruction
*insn
)
124 int qop
= 0, xid
= 0;
128 qop
= QUADOP(SUB
, SUBR
, SUB
, SUBR
);
132 qop
= QUADOP(SUB
, SUB
, SUBR
, SUBR
);
136 assert(!"invalid dfdx opcode");
140 shfl
= bld
.mkOp2(OP_SHFL
, TYPE_F32
, bld
.getScratch(),
141 insn
->getSrc(0), bld
.mkImm(xid
));
142 shfl
->subOp
= NV50_IR_SUBOP_SHFL_BFLY
;
143 insn
->op
= OP_QUADOP
;
145 insn
->lanes
= 0; /* abused for !.ndv */
146 insn
->setSrc(1, insn
->getSrc(0));
147 insn
->setSrc(0, shfl
->getDef(0));
152 GM107LoweringPass::handlePFETCH(Instruction
*i
)
154 Value
*tmp0
= bld
.getScratch();
155 Value
*tmp1
= bld
.getScratch();
156 Value
*tmp2
= bld
.getScratch();
157 bld
.mkOp1(OP_RDSV
, TYPE_U32
, tmp0
, bld
.mkSysVal(SV_INVOCATION_INFO
, 0));
158 bld
.mkOp2(OP_SHR
, TYPE_U32
, tmp1
, tmp0
, bld
.mkImm(16));
159 bld
.mkOp2(OP_AND
, TYPE_U32
, tmp0
, tmp0
, bld
.mkImm(0xff));
160 bld
.mkOp2(OP_AND
, TYPE_U32
, tmp1
, tmp1
, bld
.mkImm(0xff));
161 bld
.mkOp1(OP_MOV
, TYPE_U32
, tmp2
, bld
.mkImm(i
->getSrc(0)->reg
.data
.u32
));
162 bld
.mkOp3(OP_MAD
, TYPE_U32
, tmp0
, tmp0
, tmp1
, tmp2
);
169 GM107LoweringPass::handlePOPCNT(Instruction
*i
)
171 Value
*tmp
= bld
.mkOp2v(OP_AND
, i
->sType
, bld
.getScratch(),
172 i
->getSrc(0), i
->getSrc(1));
179 // - add quadop dance for texturing
180 // - put FP outputs in GPRs
181 // - convert instruction sequences
184 GM107LoweringPass::visit(Instruction
*i
)
186 bld
.setPosition(i
, false);
188 if (i
->cc
!= CC_ALWAYS
)
197 return handleTEX(i
->asTex());
199 return handleTXD(i
->asTex());
201 return handleTXLQ(i
->asTex());
203 return handleTXQ(i
->asTex());
205 bld
.mkOp1(OP_PREEX2
, TYPE_F32
, i
->getDef(0), i
->getSrc(0));
206 i
->setSrc(0, i
->getDef(0));
215 return handleSQRT(i
);
217 return handleEXPORT(i
);
219 return handlePFETCH(i
);
224 return handleRDSV(i
);
226 return handleWRSV(i
);
228 if (i
->src(0).getFile() == FILE_SHADER_INPUT
) {
229 if (prog
->getType() == Program::TYPE_COMPUTE
) {
230 i
->getSrc(0)->reg
.file
= FILE_MEMORY_CONST
;
231 i
->getSrc(0)->reg
.fileIndex
= 0;
233 if (prog
->getType() == Program::TYPE_GEOMETRY
&&
234 i
->src(0).isIndirect(0)) {
235 // XXX: this assumes vec4 units
236 Value
*ptr
= bld
.mkOp2v(OP_SHL
, TYPE_U32
, bld
.getSSA(),
237 i
->getIndirect(0, 0), bld
.mkImm(4));
238 i
->setIndirect(0, 0, ptr
);
241 assert(prog
->getType() != Program::TYPE_FRAGMENT
); // INTERP
247 const bool cctl
= i
->src(0).getFile() == FILE_MEMORY_GLOBAL
;
249 handleCasExch(i
, cctl
);
258 handleSurfaceOpNVE4(i
->asTex());
273 } // namespace nv50_ir