2 * Copyright 2011 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "codegen/nv50_ir_target_gm107.h"
25 #include "codegen/nv50_ir_lowering_gm107.h"
29 Target
*getTargetGM107(unsigned int chipset
)
31 return new TargetGM107(chipset
);
34 // BULTINS / LIBRARY FUNCTIONS:
36 // lazyness -> will just hardcode everything for the time being
38 #include "lib/gm107.asm.h"
41 TargetGM107::getBuiltinCode(const uint32_t **code
, uint32_t *size
) const
43 *code
= (const uint32_t *)&gm107_builtin_code
[0];
44 *size
= sizeof(gm107_builtin_code
);
48 TargetGM107::getBuiltinOffset(int builtin
) const
50 assert(builtin
< NVC0_BUILTIN_COUNT
);
51 return gm107_builtin_offsets
[builtin
];
55 TargetGM107::isOpSupported(operation op
, DataType ty
) const
71 // Return true when an instruction supports the reuse flag. When supported, the
72 // hardware will use the operand reuse cache introduced since Maxwell, which
73 // should try to reduce bank conflicts by caching values for the subsequent
74 // instructions. Note that the next instructions have to use the same GPR id in
75 // the same operand slot.
77 TargetGM107::isReuseSupported(const Instruction
*insn
) const
79 const OpClass cl
= getOpClass(insn
->op
);
81 // TODO: double-check!
89 case OPCLASS_BITFIELD
:
90 if (insn
->op
== OP_INSBF
|| insn
->op
== OP_EXTBF
)
99 // Return true when an instruction requires to set up a barrier because it
100 // doesn't operate at a fixed latency. Variable latency instructions are memory
101 // operations, double precision operations, special function unit operations
102 // and other low throughput instructions.
104 TargetGM107::isBarrierRequired(const Instruction
*insn
) const
106 const OpClass cl
= getOpClass(insn
->op
);
108 if (insn
->dType
== TYPE_F64
|| insn
->sType
== TYPE_F64
)
115 case OPCLASS_SURFACE
:
116 case OPCLASS_TEXTURE
:
133 case OPCLASS_BITFIELD
:
142 case OPCLASS_CONTROL
:
159 return !isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
);
165 // TODO: IMUL/IMAD require barriers too, use of XMAD instead!
166 if ((insn
->op
== OP_MUL
|| insn
->op
== OP_MAD
) &&
167 !isFloatType(insn
->dType
))
170 case OPCLASS_CONVERT
:
171 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
172 insn
->src(0).getFile() != FILE_PREDICATE
)
182 TargetGM107::canDualIssue(const Instruction
*a
, const Instruction
*b
) const
188 // Return the number of stall counts needed to complete a single instruction.
189 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
190 // different number of stall counts like memory operations.
192 TargetGM107::getLatency(const Instruction
*insn
) const
194 // TODO: better values! This should be good enough for now though.
233 if (insn
->dType
!= TYPE_F64
)
237 return isCS2RSV(insn
->getSrc(0)->reg
.data
.sv
.sv
) ? 6 : 15;
245 if (insn
->op
== OP_CVT
&& (insn
->def(0).getFile() == FILE_PREDICATE
||
246 insn
->src(0).getFile() == FILE_PREDICATE
))
263 // Use the maximum number of stall counts for other instructions.
267 // Return the operand read latency which is the number of stall counts before
268 // an instruction can read its sources. For memory operations like ATOM, LOAD
269 // and STORE, the memory access has to be indirect.
271 TargetGM107::getReadLatency(const Instruction
*insn
) const
296 if (insn
->def(0).getFile() != FILE_PREDICATE
&&
297 insn
->src(0).getFile() != FILE_PREDICATE
)
303 if (insn
->src(0).isIndirect(0)) {
304 switch (insn
->src(0).getFile()) {
305 case FILE_MEMORY_SHARED
:
306 case FILE_MEMORY_CONST
:
308 case FILE_MEMORY_GLOBAL
:
309 case FILE_MEMORY_LOCAL
:
328 TargetGM107::isCS2RSV(SVSemantic sv
) const
330 return sv
== SV_CLOCK
;
334 TargetGM107::runLegalizePass(Program
*prog
, CGStage stage
) const
336 if (stage
== CG_STAGE_PRE_SSA
) {
337 GM107LoweringPass
pass(prog
);
338 return pass
.run(prog
, false, true);
340 if (stage
== CG_STAGE_POST_RA
) {
341 NVC0LegalizePostRA
pass(prog
);
342 return pass
.run(prog
, false, true);
344 if (stage
== CG_STAGE_SSA
) {
345 GM107LegalizeSSA pass
;
346 return pass
.run(prog
, false, true);
352 TargetGM107::getCodeEmitter(Program::Type type
)
354 return createCodeEmitterGM107(type
);
357 } // namespace nv50_ir