2 * Copyright 2011 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "codegen/nv50_ir_target_nv50.h"
27 Target
*getTargetNV50(unsigned int chipset
)
29 return new TargetNV50(chipset
);
32 TargetNV50::TargetNV50(unsigned int card
) : Target(true, true, false)
37 for (unsigned int i
= 0; i
<= SV_LAST
; ++i
)
38 sysvalLocation
[i
] = ~0;
44 // BULTINS / LIBRARY FUNCTIONS:
47 static const uint32_t nvc0_builtin_code
[] =
51 static const uint16_t nvc0_builtin_offsets
[NV50_BUILTIN_COUNT
] =
57 TargetNV50::getBuiltinCode(const uint32_t **code
, uint32_t *size
) const
64 TargetNV50::getBuiltinOffset(int builtin
) const
72 unsigned int mNeg
: 4;
73 unsigned int mAbs
: 4;
74 unsigned int mNot
: 4;
75 unsigned int mSat
: 4;
76 unsigned int fConst
: 3;
77 unsigned int fShared
: 3;
78 unsigned int fAttrib
: 3;
79 unsigned int fImm
: 3;
82 static const struct opProperties _initProps
[] =
84 // neg abs not sat c[] s[], a[], imm
85 { OP_ADD
, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
86 { OP_SUB
, 0x3, 0x0, 0x0, 0x8, 0x2, 0x1, 0x1, 0x2 },
87 { OP_MUL
, 0x3, 0x0, 0x0, 0x0, 0x2, 0x1, 0x1, 0x2 },
88 { OP_MAX
, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
89 { OP_MIN
, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
90 { OP_MAD
, 0x7, 0x0, 0x0, 0x8, 0x6, 0x1, 0x1, 0x0 }, // special constraint
91 { OP_ABS
, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
92 { OP_NEG
, 0x0, 0x1, 0x0, 0x0, 0x0, 0x1, 0x1, 0x0 },
93 { OP_CVT
, 0x1, 0x1, 0x0, 0x8, 0x0, 0x1, 0x1, 0x0 },
94 { OP_AND
, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
95 { OP_OR
, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
96 { OP_XOR
, 0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0, 0x2 },
97 { OP_SHL
, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 },
98 { OP_SHR
, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2 },
99 { OP_SET
, 0x3, 0x3, 0x0, 0x0, 0x2, 0x1, 0x1, 0x0 },
100 { OP_PREEX2
, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
101 { OP_PRESIN
, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
102 { OP_EX2
, 0x0, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x0 },
103 { OP_LG2
, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
104 { OP_RCP
, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
105 { OP_RSQ
, 0x1, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
106 { OP_DFDX
, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
107 { OP_DFDY
, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 },
110 void TargetNV50::initOpInfo()
114 static const operation commutativeList
[] =
116 OP_ADD
, OP_MUL
, OP_MAD
, OP_FMA
, OP_AND
, OP_OR
, OP_XOR
, OP_MAX
, OP_MIN
,
117 OP_SET_AND
, OP_SET_OR
, OP_SET_XOR
, OP_SET
, OP_SELP
, OP_SLCT
119 static const operation shortFormList
[] =
121 OP_MOV
, OP_ADD
, OP_SUB
, OP_MUL
, OP_MAD
, OP_SAD
, OP_RCP
, OP_LINTERP
,
122 OP_PINTERP
, OP_TEX
, OP_TXF
124 static const operation noDestList
[] =
126 OP_STORE
, OP_WRSV
, OP_EXPORT
, OP_BRA
, OP_CALL
, OP_RET
, OP_EXIT
,
127 OP_DISCARD
, OP_CONT
, OP_BREAK
, OP_PRECONT
, OP_PREBREAK
, OP_PRERET
,
128 OP_JOIN
, OP_JOINAT
, OP_BRKPT
, OP_MEMBAR
, OP_EMIT
, OP_RESTART
,
129 OP_QUADON
, OP_QUADPOP
, OP_TEXBAR
, OP_SUSTB
, OP_SUSTP
, OP_SUREDP
,
132 static const operation noPredList
[] =
134 OP_CALL
, OP_PREBREAK
, OP_PRERET
, OP_QUADON
, OP_QUADPOP
, OP_JOINAT
,
138 for (i
= 0; i
< DATA_FILE_COUNT
; ++i
)
139 nativeFileMap
[i
] = (DataFile
)i
;
140 nativeFileMap
[FILE_PREDICATE
] = FILE_FLAGS
;
142 for (i
= 0; i
< OP_LAST
; ++i
) {
143 opInfo
[i
].variants
= NULL
;
144 opInfo
[i
].op
= (operation
)i
;
145 opInfo
[i
].srcTypes
= 1 << (int)TYPE_F32
;
146 opInfo
[i
].dstTypes
= 1 << (int)TYPE_F32
;
147 opInfo
[i
].immdBits
= 0xffffffff;
148 opInfo
[i
].srcNr
= operationSrcNr
[i
];
150 for (j
= 0; j
< opInfo
[i
].srcNr
; ++j
) {
151 opInfo
[i
].srcMods
[j
] = 0;
152 opInfo
[i
].srcFiles
[j
] = 1 << (int)FILE_GPR
;
154 opInfo
[i
].dstMods
= 0;
155 opInfo
[i
].dstFiles
= 1 << (int)FILE_GPR
;
157 opInfo
[i
].hasDest
= 1;
158 opInfo
[i
].vector
= (i
>= OP_TEX
&& i
<= OP_TEXCSAA
);
159 opInfo
[i
].commutative
= false; /* set below */
160 opInfo
[i
].pseudo
= (i
< OP_MOV
);
161 opInfo
[i
].predicate
= !opInfo
[i
].pseudo
;
162 opInfo
[i
].flow
= (i
>= OP_BRA
&& i
<= OP_JOIN
);
163 opInfo
[i
].minEncSize
= 8; /* set below */
165 for (i
= 0; i
< ARRAY_SIZE(commutativeList
); ++i
)
166 opInfo
[commutativeList
[i
]].commutative
= true;
167 for (i
= 0; i
< ARRAY_SIZE(shortFormList
); ++i
)
168 opInfo
[shortFormList
[i
]].minEncSize
= 4;
169 for (i
= 0; i
< ARRAY_SIZE(noDestList
); ++i
)
170 opInfo
[noDestList
[i
]].hasDest
= 0;
171 for (i
= 0; i
< ARRAY_SIZE(noPredList
); ++i
)
172 opInfo
[noPredList
[i
]].predicate
= 0;
174 for (i
= 0; i
< ARRAY_SIZE(_initProps
); ++i
) {
175 const struct opProperties
*prop
= &_initProps
[i
];
177 for (int s
= 0; s
< 3; ++s
) {
178 if (prop
->mNeg
& (1 << s
))
179 opInfo
[prop
->op
].srcMods
[s
] |= NV50_IR_MOD_NEG
;
180 if (prop
->mAbs
& (1 << s
))
181 opInfo
[prop
->op
].srcMods
[s
] |= NV50_IR_MOD_ABS
;
182 if (prop
->mNot
& (1 << s
))
183 opInfo
[prop
->op
].srcMods
[s
] |= NV50_IR_MOD_NOT
;
184 if (prop
->fConst
& (1 << s
))
185 opInfo
[prop
->op
].srcFiles
[s
] |= 1 << (int)FILE_MEMORY_CONST
;
186 if (prop
->fShared
& (1 << s
))
187 opInfo
[prop
->op
].srcFiles
[s
] |= 1 << (int)FILE_MEMORY_SHARED
;
188 if (prop
->fAttrib
& (1 << s
))
189 opInfo
[prop
->op
].srcFiles
[s
] |= 1 << (int)FILE_SHADER_INPUT
;
190 if (prop
->fImm
& (1 << s
))
191 opInfo
[prop
->op
].srcFiles
[s
] |= 1 << (int)FILE_IMMEDIATE
;
194 opInfo
[prop
->op
].dstMods
= NV50_IR_MOD_SAT
;
198 opInfo
[OP_MUL
].dstMods
= NV50_IR_MOD_SAT
;
202 TargetNV50::getFileSize(DataFile file
) const
205 case FILE_NULL
: return 0;
206 case FILE_GPR
: return 254; // in 16-bit units **
207 case FILE_PREDICATE
: return 0;
208 case FILE_FLAGS
: return 4;
209 case FILE_ADDRESS
: return 4;
210 case FILE_IMMEDIATE
: return 0;
211 case FILE_MEMORY_CONST
: return 65536;
212 case FILE_SHADER_INPUT
: return 0x200;
213 case FILE_SHADER_OUTPUT
: return 0x200;
214 case FILE_MEMORY_BUFFER
: return 0xffffffff;
215 case FILE_MEMORY_GLOBAL
: return 0xffffffff;
216 case FILE_MEMORY_SHARED
: return 16 << 10;
217 case FILE_MEMORY_LOCAL
: return 48 << 10;
218 case FILE_SYSTEM_VALUE
: return 16;
220 assert(!"invalid file");
223 // ** only first 128 units encodable for 16-bit regs
227 TargetNV50::getFileUnit(DataFile file
) const
229 if (file
== FILE_GPR
|| file
== FILE_ADDRESS
)
231 if (file
== FILE_SYSTEM_VALUE
)
237 TargetNV50::getSVAddress(DataFile shaderFile
, const Symbol
*sym
) const
239 switch (sym
->reg
.data
.sv
.sv
) {
244 uint32_t addr
= sysvalLocation
[sym
->reg
.data
.sv
.sv
];
245 for (int c
= 0; c
< sym
->reg
.data
.sv
.index
; ++c
)
246 if (wposMask
& (1 << c
))
250 case SV_PRIMITIVE_ID
:
251 return shaderFile
== FILE_SHADER_INPUT
? 0x18 :
252 sysvalLocation
[sym
->reg
.data
.sv
.sv
];
254 return 0x8 + 2 * sym
->reg
.data
.sv
.index
;
256 return 0xc + 2 * sym
->reg
.data
.sv
.index
;
258 return 0x2 + 2 * sym
->reg
.data
.sv
.index
;
260 case SV_COMBINED_TID
:
263 return 0; /* sample position is handled differently */
265 return sysvalLocation
[sym
->reg
.data
.sv
.sv
];
269 // long: rrr, arr, rcr, acr, rrc, arc, gcr, grr
270 // short: rr, ar, rc, gr
273 TargetNV50::insnCanLoad(const Instruction
*i
, int s
,
274 const Instruction
*ld
) const
276 DataFile sf
= ld
->src(0).getFile();
278 // immediate 0 can be represented by GPR $r63/$r127
279 if (sf
== FILE_IMMEDIATE
&& ld
->getSrc(0)->reg
.data
.u64
== 0)
280 return (!i
->isPseudo() &&
282 i
->op
!= OP_EXPORT
&& i
->op
!= OP_STORE
);
284 if (sf
== FILE_IMMEDIATE
&& (i
->predSrc
>= 0 || i
->flagsDef
>= 0))
286 if (s
>= opInfo
[i
->op
].srcNr
)
288 if (!(opInfo
[i
->op
].srcFiles
[s
] & (1 << (int)sf
)))
290 if (s
== 2 && i
->src(1).getFile() != FILE_GPR
)
293 // NOTE: don't rely on flagsDef
294 if (sf
== FILE_IMMEDIATE
)
295 for (int d
= 0; i
->defExists(d
); ++d
)
296 if (i
->def(d
).getFile() == FILE_FLAGS
)
301 for (int z
= 0; z
< Target::operationSrcNr
[i
->op
]; ++z
) {
302 DataFile zf
= (z
== s
) ? sf
: i
->src(z
).getFile();
306 case FILE_MEMORY_SHARED
:
307 case FILE_SHADER_INPUT
:
308 mode
|= 1 << (z
* 2);
310 case FILE_MEMORY_CONST
:
311 mode
|= 2 << (z
* 2);
314 mode
|= 3 << (z
* 2);
330 // Shader inputs get transformed to p[] in geometry shaders, and those
331 // aren't allowed to be used at the same time as c[].
332 if (ld
->bb
->getProgram()->getType() == Program::TYPE_GEOMETRY
)
336 if (ld
->bb
->getProgram()->getType() != Program::TYPE_GEOMETRY
)
345 if ((i
->op
== OP_MUL
|| i
->op
== OP_MAD
) && !isFloatType(i
->dType
)) {
346 // 32-bit MUL will be split into 16-bit MULs
347 if (ld
->src(0).isIndirect(0))
349 if (sf
== FILE_IMMEDIATE
)
351 if (i
->subOp
== NV50_IR_SUBOP_MUL_HIGH
&& sf
== FILE_MEMORY_CONST
)
355 ldSize
= typeSizeof(ld
->dType
);
358 if (sf
== FILE_IMMEDIATE
)
362 // Check if memory access is encodable:
364 if (ldSize
< 4 && sf
== FILE_SHADER_INPUT
) // no < 4-byte aligned a[] access
366 if (ld
->getSrc(0)->reg
.data
.offset
> (int32_t)(127 * ldSize
))
369 if (ld
->src(0).isIndirect(0)) {
370 for (int z
= 0; i
->srcExists(z
); ++z
)
371 if (i
->src(z
).isIndirect(0))
374 // s[] access only possible in CP, $aX always applies
375 if (sf
== FILE_MEMORY_SHARED
)
377 if (!ld
->bb
) // can't check type ...
379 Program::Type pt
= ld
->bb
->getProgram()->getType();
381 // $aX applies to c[] only in VP, FP, GP if p[] is not accessed
382 if (pt
== Program::TYPE_COMPUTE
)
384 if (pt
== Program::TYPE_GEOMETRY
) {
385 if (sf
== FILE_MEMORY_CONST
)
386 return i
->src(s
).getFile() != FILE_SHADER_INPUT
;
387 return sf
== FILE_SHADER_INPUT
;
389 return sf
== FILE_MEMORY_CONST
;
395 TargetNV50::insnCanLoadOffset(const Instruction
*i
, int s
, int offset
) const
397 if (!i
->src(s
).isIndirect(0))
399 offset
+= i
->src(s
).get()->reg
.data
.offset
;
400 if (i
->op
== OP_LOAD
|| i
->op
== OP_STORE
) {
401 // There are some restrictions in theory, but in practice they're never
402 // going to be hit. When we enable shared/global memory, this will
403 // become more important.
406 return offset
>= 0 && offset
<= (int32_t)(127 * i
->src(s
).get()->reg
.size
);
410 TargetNV50::isAccessSupported(DataFile file
, DataType ty
) const
412 if (ty
== TYPE_B96
|| ty
== TYPE_NONE
)
414 if (typeSizeof(ty
) > 4)
415 return (file
== FILE_MEMORY_LOCAL
) || (file
== FILE_MEMORY_GLOBAL
) ||
416 (file
== FILE_MEMORY_BUFFER
);
421 TargetNV50::isOpSupported(operation op
, DataType ty
) const
423 if (ty
== TYPE_F64
&& chipset
< 0xa0)
428 return chipset
>= 0xa0;
430 return chipset
>= 0xa3 && chipset
!= 0xaa && chipset
!= 0xac;
443 case OP_EXIT
: // want exit modifier instead (on NOP if required)
449 return ty
== TYPE_S32
;
451 return !isFloatType(ty
);
458 TargetNV50::isModSupported(const Instruction
*insn
, int s
, Modifier mod
) const
460 if (!isFloatType(insn
->dType
)) {
473 if (insn
->src(s
? 0 : 1).mod
.neg())
478 return insn
->src(1).mod
.neg() ? false : true;
481 if (insn
->sType
!= TYPE_F32
)
488 if (s
>= opInfo
[insn
->op
].srcNr
|| s
>= 3)
490 return (mod
& Modifier(opInfo
[insn
->op
].srcMods
[s
])) == mod
;
494 TargetNV50::mayPredicate(const Instruction
*insn
, const Value
*pred
) const
496 if (insn
->getPredicate() || insn
->flagsSrc
>= 0)
498 for (int s
= 0; insn
->srcExists(s
); ++s
)
499 if (insn
->src(s
).getFile() == FILE_IMMEDIATE
)
501 return opInfo
[insn
->op
].predicate
;
505 TargetNV50::isSatSupported(const Instruction
*insn
) const
507 if (insn
->op
== OP_CVT
)
509 if (insn
->dType
!= TYPE_F32
)
511 return opInfo
[insn
->op
].dstMods
& NV50_IR_MOD_SAT
;
514 int TargetNV50::getLatency(const Instruction
*i
) const
516 // TODO: tune these values
517 if (i
->op
== OP_LOAD
) {
518 switch (i
->src(0).getFile()) {
519 case FILE_MEMORY_LOCAL
:
520 case FILE_MEMORY_GLOBAL
:
521 case FILE_MEMORY_BUFFER
:
522 return 100; // really 400 to 800
530 // These are "inverse" throughput values, i.e. the number of cycles required
531 // to issue a specific instruction for a full warp (32 threads).
533 // Assuming we have more than 1 warp in flight, a higher issue latency results
534 // in a lower result latency since the MP will have spent more time with other
536 // This also helps to determine the number of cycles between instructions in
539 int TargetNV50::getThroughput(const Instruction
*i
) const
541 // TODO: tune these values
542 if (i
->dType
== TYPE_F32
) {
556 if (i
->dType
== TYPE_U32
|| i
->dType
== TYPE_S32
) {
559 if (i
->dType
== TYPE_F64
) {
567 recordLocation(uint16_t *locs
, uint8_t *masks
,
568 const struct nv50_ir_varying
*var
)
570 uint16_t addr
= var
->slot
[0] * 4;
573 case TGSI_SEMANTIC_POSITION
: locs
[SV_POSITION
] = addr
; break;
574 case TGSI_SEMANTIC_INSTANCEID
: locs
[SV_INSTANCE_ID
] = addr
; break;
575 case TGSI_SEMANTIC_VERTEXID
: locs
[SV_VERTEX_ID
] = addr
; break;
576 case TGSI_SEMANTIC_PRIMID
: locs
[SV_PRIMITIVE_ID
] = addr
; break;
577 case TGSI_SEMANTIC_LAYER
: locs
[SV_LAYER
] = addr
; break;
578 case TGSI_SEMANTIC_VIEWPORT_INDEX
: locs
[SV_VIEWPORT_INDEX
] = addr
; break;
582 if (var
->sn
== TGSI_SEMANTIC_POSITION
&& masks
)
583 masks
[0] = var
->mask
;
587 TargetNV50::parseDriverInfo(const struct nv50_ir_prog_info
*info
)
590 for (i
= 0; i
< info
->numOutputs
; ++i
)
591 recordLocation(sysvalLocation
, NULL
, &info
->out
[i
]);
592 for (i
= 0; i
< info
->numInputs
; ++i
)
593 recordLocation(sysvalLocation
, &wposMask
, &info
->in
[i
]);
594 for (i
= 0; i
< info
->numSysVals
; ++i
)
595 recordLocation(sysvalLocation
, NULL
, &info
->sv
[i
]);
597 if (sysvalLocation
[SV_POSITION
] >= 0x200) {
598 // not assigned by driver, but we need it internally
600 sysvalLocation
[SV_POSITION
] = 0;
603 Target::parseDriverInfo(info
);
606 } // namespace nv50_ir