2 * Copyright 2011 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
29 Target
*getTargetGM107(unsigned int chipset
)
31 return new TargetGM107(chipset
);
34 // BULTINS / LIBRARY FUNCTIONS:
36 // lazyness -> will just hardcode everything for the time being
38 #include "lib/gm107.asm.h"
41 TargetGM107::getBuiltinCode(const uint32_t **code
, uint32_t *size
) const
43 *code
= (const uint32_t *)&gm107_builtin_code
[0];
44 *size
= sizeof(gm107_builtin_code
);
48 TargetGM107::getBuiltinOffset(int builtin
) const
50 assert(builtin
< NVC0_BUILTIN_COUNT
);
51 return gm107_builtin_offsets
[builtin
];
55 TargetGM107::isOpSupported(operation op
, DataType ty
) const
66 return chipset
>= NVISA_GM200_CHIPSET
;
74 // Return true when an instruction supports the reuse flag. When supported, the
75 // hardware will use the operand reuse cache introduced since Maxwell, which
76 // should try to reduce bank conflicts by caching values for the subsequent
77 // instructions. Note that the next instructions have to use the same GPR id in
78 // the same operand slot.
80 TargetGM107::isReuseSupported(const Instruction
*insn
) const
82 const OpClass cl
= getOpClass(insn
->op
);
84 // TODO: double-check!
92 case OPCLASS_BITFIELD
:
93 if (insn
->op
== OP_INSBF
|| insn
->op
== OP_EXTBF
)
102 // Return true when an instruction requires to set up a barrier because it
103 // doesn't operate at a fixed latency. Variable latency instructions are memory
104 // operations, double precision operations, special function unit operations
105 // and other low throughput instructions.
107 TargetGM107::isBarrierRequired(const Instruction
*insn
) const
109 const OpClass cl
= getOpClass(insn
->op
);
111 if (insn
->dType
== TYPE_F64
|| insn
->sType
== TYPE_F64
)
118 case OPCLASS_SURFACE
:
119 case OPCLASS_TEXTURE
:
137 case OPCLASS_BITFIELD
:
146 case OPCLASS_CONTROL
:
163 return !isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
);
169 // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
170 if ((insn
->op
== OP_MUL
|| insn
->op
== OP_MAD
) &&
171 !isFloatType(insn
->dType
))
174 case OPCLASS_CONVERT
:
175 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
176 insn
->src(0).getFile() != FILE_PREDICATE
)
186 TargetGM107::canDualIssue(const Instruction
*a
, const Instruction
*b
) const
192 // Return the number of stall counts needed to complete a single instruction.
193 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
194 // different number of stall counts like memory operations.
196 TargetGM107::getLatency(const Instruction
*insn
) const
198 // TODO: better values! This should be good enough for now though.
237 if (insn
->dType
!= TYPE_F64
)
241 return isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
) ? 6 : 15;
249 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
250 insn
->src(0).getFile() == FILE_PREDICATE
))
268 // Use the maximum number of stall counts for other instructions.
272 // Return the operand read latency which is the number of stall counts before
273 // an instruction can read its sources. For memory operations like ATOM, LOAD
274 // and STORE, the memory access has to be indirect.
276 TargetGM107::getReadLatency(const Instruction
*insn
) const
302 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
303 insn
->src(0).getFile() != FILE_PREDICATE
)
309 if (insn
->src(0).isIndirect(0)) {
310 switch (insn
->src(0).getFile()) {
311 case FILE_MEMORY_SHARED
:
312 case FILE_MEMORY_CONST
:
314 case FILE_MEMORY_GLOBAL
:
315 case FILE_MEMORY_LOCAL
:
334 TargetGM107::isCS2RSV(SVSemantic sv
) const
336 return sv
== SV_CLOCK
;
340 TargetGM107::runLegalizePass(Program
*prog
, CGStage stage
) const
342 if (stage
== CG_STAGE_PRE_SSA
) {
343 GM107LoweringPass
pass(prog
);
344 return pass
.run(prog
, false, true);
346 if (stage
== CG_STAGE_POST_RA
) {
347 NVC0LegalizePostRA
pass(prog
);
348 return pass
.run(prog
, false, true);
350 if (stage
== CG_STAGE_SSA
) {
351 GM107LegalizeSSA pass
;
352 return pass
.run(prog
, false, true);
358 TargetGM107::getCodeEmitter(Program::Type type
)
360 return createCodeEmitterGM107(type
);
363 } // namespace nv50_ir