This effectively limits registers to 32 and 64 for fermi and kepler when
1024 threads are used, but allows the full amount to be used with
smaller thread sizes.
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
pOne = new_LValue(fn, FILE_PREDICATE);
carry = new_LValue(fn, FILE_FLAGS);
- rZero->reg.data.id = prog->getTarget()->getFileSize(FILE_GPR);
+ rZero->reg.data.id = (prog->getTarget()->getChipset() >= NVISA_GK20A_CHIPSET) ? 255 : 63;
carry->reg.data.id = 0;
pOne->reg.data.id = 7;
unsigned int
TargetNVC0::getFileSize(DataFile file) const
{
+ const unsigned int gprs = (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
+ const unsigned int smregs = (chipset >= NVISA_GK104_CHIPSET) ? 65536 : 32768;
switch (file) {
case FILE_NULL: return 0;
- case FILE_GPR: return (chipset >= NVISA_GK20A_CHIPSET) ? 255 : 63;
+ case FILE_GPR: return MIN2(gprs, smregs / threads);
case FILE_PREDICATE: return 7;
case FILE_FLAGS: return 1;
case FILE_ADDRESS: return 0;