2 * Copyright 2011 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
29 Target
*getTargetGM107(unsigned int chipset
)
31 return new TargetGM107(chipset
);
34 // BULTINS / LIBRARY FUNCTIONS:
36 // lazyness -> will just hardcode everything for the time being
38 #include "lib/gm107.asm.h"
41 TargetGM107::getBuiltinCode(const uint32_t **code
, uint32_t *size
) const
43 *code
= (const uint32_t *)&gm107_builtin_code
[0];
44 *size
= sizeof(gm107_builtin_code
);
48 TargetGM107::getBuiltinOffset(int builtin
) const
50 assert(builtin
< NVC0_BUILTIN_COUNT
);
51 return gm107_builtin_offsets
[builtin
];
55 TargetGM107::isOpSupported(operation op
, DataType ty
) const
71 // Return true when an instruction supports the reuse flag. When supported, the
72 // hardware will use the operand reuse cache introduced since Maxwell, which
73 // should try to reduce bank conflicts by caching values for the subsequent
74 // instructions. Note that the next instructions have to use the same GPR id in
75 // the same operand slot.
77 TargetGM107::isReuseSupported(const Instruction
*insn
) const
79 const OpClass cl
= getOpClass(insn
->op
);
81 // TODO: double-check!
89 case OPCLASS_BITFIELD
:
90 if (insn
->op
== OP_INSBF
|| insn
->op
== OP_EXTBF
)
99 // Return true when an instruction requires to set up a barrier because it
100 // doesn't operate at a fixed latency. Variable latency instructions are memory
101 // operations, double precision operations, special function unit operations
102 // and other low throughput instructions.
104 TargetGM107::isBarrierRequired(const Instruction
*insn
) const
106 const OpClass cl
= getOpClass(insn
->op
);
108 if (insn
->dType
== TYPE_F64
|| insn
->sType
== TYPE_F64
)
115 case OPCLASS_SURFACE
:
116 case OPCLASS_TEXTURE
:
133 case OPCLASS_BITFIELD
:
142 case OPCLASS_CONTROL
:
164 // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
165 if ((insn
->op
== OP_MUL
|| insn
->op
== OP_MAD
) &&
166 !isFloatType(insn
->dType
))
169 case OPCLASS_CONVERT
:
170 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
171 insn
->src(0).getFile() != FILE_PREDICATE
)
181 TargetGM107::canDualIssue(const Instruction
*a
, const Instruction
*b
) const
187 // Return the number of stall counts needed to complete a single instruction.
188 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
189 // different number of stall counts like memory operations.
191 TargetGM107::getLatency(const Instruction
*insn
) const
193 // TODO: better values! This should be good enough for now though.
232 if (insn
->dType
!= TYPE_F64
)
242 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
243 insn
->src(0).getFile() == FILE_PREDICATE
))
260 // Use the maximum number of stall counts for other instructions.
264 // Return the operand read latency which is the number of stall counts before
265 // an instruction can read its sources. For memory operations like ATOM, LOAD
266 // and STORE, the memory access has to be indirect.
268 TargetGM107::getReadLatency(const Instruction
*insn
) const
293 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
294 insn
->src(0).getFile() != FILE_PREDICATE
)
300 if (insn
->src(0).isIndirect(0)) {
301 switch (insn
->src(0).getFile()) {
302 case FILE_MEMORY_SHARED
:
303 case FILE_MEMORY_CONST
:
305 case FILE_MEMORY_GLOBAL
:
306 case FILE_MEMORY_LOCAL
:
325 TargetGM107::runLegalizePass(Program
*prog
, CGStage stage
) const
327 if (stage
== CG_STAGE_PRE_SSA
) {
328 GM107LoweringPass
pass(prog
);
329 return pass
.run(prog
, false, true);
331 if (stage
== CG_STAGE_POST_RA
) {
332 NVC0LegalizePostRA
pass(prog
);
333 return pass
.run(prog
, false, true);
335 if (stage
== CG_STAGE_SSA
) {
336 GM107LegalizeSSA pass
;
337 return pass
.run(prog
, false, true);
343 TargetGM107::getCodeEmitter(Program::Type type
)
345 return createCodeEmitterGM107(type
);
348 } // namespace nv50_ir