import operator
header = r"""/****************************************************************************
-* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
+* Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
'INSERT_ELEMENT': 'VINSERT',
'EXTRACT_ELEMENT': 'VEXTRACT',
'MEM_SET': 'MEMSET',
- 'MEM_CPY': 'MEMCPY',
+ 'MEM_CPY': 'MEMCOPY',
'MEM_MOVE': 'MEMMOVE',
'L_SHR': 'LSHR',
'A_SHR': 'ASHR',
}
intrinsics = [
- ["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
+ ["VGATHERPD", "x86_avx2_gather_d_pd_256", ["src", "pBase", "indices", "mask", "scale"]],
+ ["VGATHERPS", "x86_avx2_gather_d_ps_256", ["src", "pBase", "indices", "mask", "scale"]],
["VGATHERDD", "x86_avx2_gather_d_d_256", ["src", "pBase", "indices", "mask", "scale"]],
- ["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
- ["VRSQRTPS", "x86_avx_rsqrt_ps_256", ["a"]],
- ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
- ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
- ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
- ["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]],
- ["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]],
- ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
+ ["VSQRTPS", "x86_avx_sqrt_ps_256", ["a"]],
+ ["VRSQRTPS", "x86_avx_rsqrt_ps_256", ["a"]],
+ ["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
+ ["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
+ ["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
+ ["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]],
["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b", "mask"]],
["BEXTR_32", "x86_bmi_bextr_32", ["src", "control"]],
["VMASKLOADD", "x86_avx2_maskload_d_256", ["src", "mask"]],
["VMASKMOVPS", "x86_avx_maskload_ps_256", ["src", "mask"]],
+ ["VMASKSTOREPS", "x86_avx_maskstore_ps_256", ["src", "mask", "val"]],
["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]],
- ["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components
- ["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components
- ["VPERMD", "x86_avx2_permd", ["idx", "a"]],
+ ["VPERMD", "x86_avx2_permd", ["a", "idx"]],
+ ["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
+ ["VCVTPD2PS", "x86_avx_cvt_pd2_ps_256", ["a"]],
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
["VHSUBPS", "x86_avx_hsub_ps_256", ["a", "b"]],
["VPTESTC", "x86_avx_ptestc_256", ["a", "b"]],
["VPTESTZ", "x86_avx_ptestz_256", ["a", "b"]],
["VFMADDPS", "x86_fma_vfmadd_ps_256", ["a", "b", "c"]],
- ["VCVTTPS2DQ", "x86_avx_cvtt_ps2dq_256", ["a"]],
["VMOVMSKPS", "x86_avx_movmsk_ps_256", ["a"]],
["INTERRUPT", "x86_int", ["a"]],
]
output_lines += [
'#include \"builder.h\"',
- ''
+ '',
+ 'namespace SwrJit',
+ '{',
+ ' using namespace llvm;',
+ '',
]
for func in functions:
first_arg = False
output_lines += [
- '//////////////////////////////////////////////////////////////////////////',
- '%sBuilder::%s(%s)' % (func['return'], name, func['args_nodefs']),
- '{',
- ' return IRB()->%s(%s);' % (func['name'], func_args),
- '}',
+ ' //////////////////////////////////////////////////////////////////////////',
+ ' %sBuilder::%s(%s)' % (func['return'], name, func['args_nodefs']),
+ ' {',
+ ' return IRB()->%s(%s);' % (func['name'], func_args),
+ ' }',
'',
]
-
+ output_lines.append('}')
output_file.write('\n'.join(output_lines) + '\n')
"""
output_lines += [
'#include \"builder.h\"',
- ''
+ '',
+ 'namespace SwrJit',
+ '{',
+ ' using namespace llvm;',
+ '',
]
for inst in intrinsics:
first = False
output_lines += [
- '//////////////////////////////////////////////////////////////////////////',
- 'Value *Builder::%s(%s)' % (inst[0], args),
- '{',
- ' Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::%s);' % inst[1],
- ' return CALL(func, std::initializer_list<Value*>{%s});' % pass_args,
- '}',
+ ' //////////////////////////////////////////////////////////////////////////',
+ ' Value *Builder::%s(%s)' % (inst[0], args),
+ ' {',
+ ' Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::%s);' % inst[1],
+ ]
+ if inst[0] == "VPERMD":
+ rev_args = ''
+ first = True
+ for arg in reversed(inst[2]):
+ if not first:
+ rev_args += ', '
+ rev_args += arg
+ first = False
+
+ output_lines += [
+ '#if (HAVE_LLVM == 0x306) && (LLVM_VERSION_PATCH == 0)',
+ ' return CALL(func, std::initializer_list<Value*>{%s});' % rev_args,
+ '#else',
+ ]
+ output_lines += [
+ ' return CALL(func, std::initializer_list<Value*>{%s});' % pass_args,
+ ]
+ if inst[0] == "VPERMD":
+ output_lines += [
+ '#endif',
+ ]
+ output_lines += [
+ ' }',
'',
]
+ output_lines.append('}')
output_file.write('\n'.join(output_lines) + '\n')
"""