(UNSPEC_FIST 66)
(UNSPEC_F2XM1 67)
(UNSPEC_TAN 68)
+ (UNSPEC_FXAM 69)
; x87 Rounding
(UNSPEC_FRNDINT_FLOOR 70)
(UNSPEC_PSHUFB 120)
(UNSPEC_PSIGN 121)
(UNSPEC_PALIGNR 122)
+
+ ; For SSE4A support
+ (UNSPEC_EXTRQI 130)
+ (UNSPEC_EXTRQ 131)
+ (UNSPEC_INSERTQI 132)
+ (UNSPEC_INSERTQ 133)
])
(define_constants
\f
;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
+ nocona,core2,generic32,generic64,amdfam10"
(const (symbol_ref "ix86_tune")))
;; A basic instruction type. Refinements due to arguments to be
incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
icmp,test,ibr,setcc,icmov,
push,pop,call,callv,leave,
- str,
+ str,bitmanip,
fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
sselog,sselog1,sseiadd,sseishft,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
(const_string "other"))
(cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
(const_string "i387")
(eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
- sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
+ sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
(const_string "sse")
(eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
(const_string "mmx")
;; The (bounding maximum) length of an instruction immediate.
(define_attr "length_immediate" ""
- (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave")
+ (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
+ bitmanip")
(const_int 0)
(eq_attr "unit" "i387,sse,mmx")
(const_int 0)
;; Set when 0f opcode prefix is used.
(define_attr "prefix_0f" ""
(if_then_else
- (ior (eq_attr "type" "imovx,setcc,icmov")
+ (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
(eq_attr "unit" "sse,mmx"))
(const_int 1)
(const_int 0)))
(const_string "load")
(and (eq_attr "type"
"!alu1,negnot,ishift1,
- imov,imovx,icmp,test,
+ imov,imovx,icmp,test,bitmanip,
fmov,fcmp,fsgn,
sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
mmx,mmxmov,mmxcmp,mmxcvt")
"sahf"
[(set_attr "length" "1")
(set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "SI")])
;; Pentium Pro can do steps 1 through 3 in one go.
-
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes)
(define_insn "*cmpfp_i_mixed"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP (match_operand 0 "register_operand" "f,x")
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_i_sse"
[(set (reg:CCFP FLAGS_REG)
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_i_i387"
[(set (reg:CCFP FLAGS_REG)
(const_string "DF")
]
(const_string "XF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_mixed"
[(set (reg:CCFPU FLAGS_REG)
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_sse"
[(set (reg:CCFPU FLAGS_REG)
(if_then_else (match_operand:SF 1 "" "")
(const_string "SF")
(const_string "DF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*cmpfp_iu_387"
[(set (reg:CCFPU FLAGS_REG)
(const_string "DF")
]
(const_string "XF")))
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "direct")])
\f
;; Move instructions.
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movhi"
[(set (match_operand:HI 0 "nonimmediate_operand" "")
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swaphi_2"
[(set (match_operand:HI 0 "register_operand" "+r")
(match_operand:HI 1 "register_operand" "+r"))
[(set_attr "type" "imov")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
(define_insn "*swapqi_2"
[(set (match_operand:QI 0 "register_operand" "+q")
(match_operand:QI 1 "register_operand" "+q"))
[(set_attr "type" "imov")
(set_attr "mode" "DI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "movti"
[(set (match_operand:TI 0 "nonimmediate_operand" "")
"cvttss2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncdfdi_sse"
[(set (match_operand:DI 0 "register_operand" "=r,r")
"cvttsd2si{q}\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncsfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "fix_truncdfsi_sse"
[(set (match_operand:SI 0 "register_operand" "=r,r")
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
(define_peephole2
[(set_attr "length" "2")
(set_attr "mode" "HI")
(set_attr "unit" "i387")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
\f
;; Conversion between fixed point and floating point.
(set_attr "mode" "SF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_sse"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsisf2_i387"
(set_attr "mode" "SF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,vector,double")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_sse"
[(set_attr "type" "sseicvt")
(set_attr "mode" "SF")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdisf2_i387"
(set_attr "mode" "DF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_sse"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatsidf2_i387"
(set_attr "mode" "DF")
(set_attr "unit" "*,i387,*,*")
(set_attr "athlon_decode" "*,*,double,direct")
+ (set_attr "amdfam10_decode" "*,*,vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_sse"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
(set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "fp_int_src" "true")])
(define_insn "*floatdidf2_i387"
"TARGET_64BIT"
"")
+;; On AMDFAM10
+;; IMUL reg64, reg64, imm8 Direct
+;; IMUL reg64, mem64, imm8 VectorPath
+;; IMUL reg64, reg64, imm32 Direct
+;; IMUL reg64, mem64, imm32 VectorPath
+;; IMUL reg64, reg64 Direct
+;; IMUL reg64, mem64 Direct
+
(define_insn "*muldi3_1_rex64"
[(set (match_operand:DI 0 "register_operand" "=r,r,r")
(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "DI")])
(define_expand "mulsi3"
""
"")
+;; On AMDFAM10
+;; IMUL reg32, reg32, imm8 Direct
+;; IMUL reg32, mem32, imm8 VectorPath
+;; IMUL reg32, reg32, imm32 Direct
+;; IMUL reg32, mem32, imm32 VectorPath
+;; IMUL reg32, reg32 Direct
+;; IMUL reg32, mem32 Direct
+
(define_insn "*mulsi3_1"
[(set (match_operand:SI 0 "register_operand" "=r,r,r")
(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_insn "*mulsi3_1_zext"
(match_operand 1 "memory_operand" ""))
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(and (eq_attr "alternative" "0,1")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "SI")])
(define_expand "mulhi3"
"TARGET_HIMODE_MATH"
"")
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 VectorPath
+;; IMUL reg16, mem16, imm8 VectorPath
+;; IMUL reg16, reg16, imm16 VectorPath
+;; IMUL reg16, mem16, imm16 VectorPath
+;; IMUL reg16, reg16 Direct
+;; IMUL reg16, mem16 Direct
(define_insn "*mulhi3_1"
[(set (match_operand:HI 0 "register_operand" "=r,r,r")
(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
(eq_attr "alternative" "1,2")
(const_string "vector")]
(const_string "direct")))
+ (set (attr "amdfam10_decode")
+ (cond [(eq_attr "alternative" "0,1")
+ (const_string "vector")]
+ (const_string "direct")))
(set_attr "mode" "HI")])
(define_expand "mulqi3"
"TARGET_QIMODE_MATH"
"")
+;;On AMDFAM10
+;; MUL reg8 Direct
+;; MUL mem8 Direct
+
(define_insn "*mulqi3_1"
[(set (match_operand:QI 0 "register_operand" "=a")
(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulqihi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "mulqihi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "direct")))
+ (set_attr "amdfam10_decode" "direct")
(set_attr "mode" "QI")])
(define_expand "umulditi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "mulditi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "mulsidi3"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "umuldi3_highpart"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "umulsi3_highpart"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_insn "*umulsi3_highpart_zext"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_expand "smuldi3_highpart"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "DI")])
(define_expand "smulsi3_highpart"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
(define_insn "*smulsi3_highpart_zext"
(if_then_else (eq_attr "cpu" "athlon")
(const_string "vector")
(const_string "double")))
+ (set_attr "amdfam10_decode" "double")
(set_attr "mode" "SI")])
;; The patterns that match these are at the end of this file.
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_64_shift_adj"
[(set (reg:CCZ FLAGS_REG)
(set_attr "prefix_0f" "1")
(set_attr "mode" "SI")
(set_attr "pent_pair" "np")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "x86_shift_adj_1"
[(set (reg:CCZ FLAGS_REG)
[(set_attr "type" "ishift")
(set_attr "prefix_0f" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "vector")])
+ (set_attr "athlon_decode" "vector")
+ (set_attr "amdfam10_decode" "vector")])
(define_expand "ashrdi3"
[(set (match_operand:DI 0 "shiftdi_operand" "")
[(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
(clobber (reg:CC FLAGS_REG))])]
""
- "")
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzsi2_abm"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
(define_insn "*bsr"
[(set (match_operand:SI 0 "register_operand" "=r")
(clobber (reg:CC FLAGS_REG))]
""
"bsr{l}\t{%1, %0|%0, %1}"
- [(set_attr "prefix_0f" "1")])
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "SI")])
+
+(define_insn "popcountsi2"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+ "popcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn "*popcountsi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:SI 0 "register_operand" "=r")
+ (popcount:SI (match_dup 1)))]
+ "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
+
+(define_insn "*popcountsi2_cmp_zext"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI(popcount:SI (match_dup 1))))]
+ "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{l}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "SI")])
(define_insn "bswapsi2"
[(set (match_operand:SI 0 "register_operand" "=r")
[(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_64BIT"
- "")
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzdi2_abm"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_ABM"
+ "lzcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
(define_insn "*bsr_rex64"
[(set (match_operand:DI 0 "register_operand" "=r")
(clobber (reg:CC FLAGS_REG))]
"TARGET_64BIT"
"bsr{q}\t{%1, %0|%0, %1}"
- [(set_attr "prefix_0f" "1")])
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "DI")])
+
+(define_insn "popcountdi2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT && TARGET_POPCNT"
+ "popcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
+
+(define_insn "*popcountdi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:DI 0 "register_operand" "=r")
+ (popcount:DI (match_dup 1)))]
+ "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{q}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "DI")])
+
+(define_expand "clzhi2"
+ [(parallel
+ [(set (match_operand:HI 0 "register_operand" "")
+ (minus:HI (const_int 15)
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
+ (clobber (reg:CC FLAGS_REG))])
+ (parallel
+ [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
+ (clobber (reg:CC FLAGS_REG))])]
+ ""
+{
+ if (TARGET_ABM)
+ {
+ emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
+ DONE;
+ }
+})
+
+(define_insn "clzhi2_abm"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_ABM"
+ "lzcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
+
+(define_insn "*bsrhi"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (minus:HI (const_int 15)
+ (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+ (clobber (reg:CC FLAGS_REG))]
+ ""
+ "bsr{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_0f" "1")
+ (set_attr "mode" "HI")])
+
+(define_insn "popcounthi2"
+ [(set (match_operand:HI 0 "register_operand" "=r")
+ (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_POPCNT"
+ "popcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
+
+(define_insn "*popcounthi2_cmp"
+ [(set (reg FLAGS_REG)
+ (compare
+ (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))
+ (const_int 0)))
+ (set (match_operand:HI 0 "register_operand" "=r")
+ (popcount:HI (match_dup 1)))]
+ "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+ "popcnt{w}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "HI")])
\f
;; Thread-local storage patterns for ELF.
;;
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "sqrt_extend<mode>xf2_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
"fsqrt"
[(set_attr "type" "fpspc")
(set_attr "mode" "XF")
- (set_attr "athlon_decode" "direct")])
+ (set_attr "athlon_decode" "direct")
+ (set_attr "amdfam10_decode" "direct")])
(define_insn "*sqrt<mode>2_sse"
[(set (match_operand:SSEMODEF 0 "register_operand" "=x")
"sqrts<ssemodefsuffix>\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "<MODE>")
- (set_attr "athlon_decode" "*")])
+ (set_attr "athlon_decode" "*")
+ (set_attr "amdfam10_decode" "*")])
(define_expand "sqrt<mode>2"
[(set (match_operand:X87MODEF12 0 "register_operand" "")
&& flag_unsafe_math_optimizations"
{
rtx one = gen_reg_rtx (XFmode);
- operands[2] = CONST1_RTX (XFmode); /* fld1 */
+ rtx op2 = CONST1_RTX (XFmode); /* fld1 */
- emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], operands[2]));
+ emit_insn (gen_fptanxf4_i387 (one, operands[0], operands[1], op2));
DONE;
})
rtx op0 = gen_reg_rtx (XFmode);
rtx one = gen_reg_rtx (<MODE>mode);
- operands[2] = CONST1_RTX (<MODE>mode); /* fld1 */
+ rtx op2 = CONST1_RTX (<MODE>mode); /* fld1 */
emit_insn (gen_fptan_extend<mode>xf4_i387 (one, op0,
- operands[1], operands[2]));
+ operands[1], op2));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
DONE;
})
{
rtx op0 = gen_reg_rtx (XFmode);
- operands[2] = gen_reg_rtx (<MODE>mode);
- emit_move_insn (operands[2], CONST1_RTX (<MODE>mode)); /* fld1 */
+ rtx op2 = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (op2, CONST1_RTX (<MODE>mode)); /* fld1 */
- emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, operands[2], operands[1]));
+ emit_insn (gen_fpatan_extend<mode>xf3_i387 (op0, op2, operands[1]));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
DONE;
})
{
rtx op0 = gen_reg_rtx (XFmode);
- operands[2] = gen_reg_rtx (XFmode);
- emit_move_insn (operands[2], standard_80387_constant_rtx (4)); /* fldln2 */
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (4)); /* fldln2 */
- emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], operands[2]));
+ emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
DONE;
})
{
rtx op0 = gen_reg_rtx (XFmode);
- operands[2] = gen_reg_rtx (XFmode);
- emit_move_insn (operands[2], standard_80387_constant_rtx (3)); /* fldlg2 */
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (3)); /* fldlg2 */
- emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], operands[2]));
+ emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
DONE;
})
{
rtx op0 = gen_reg_rtx (XFmode);
- operands[2] = gen_reg_rtx (XFmode);
- emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
- emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], operands[2]));
+ emit_insn (gen_fyl2x_extend<mode>xf3_i387 (op0, operands[1], op2));
emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
DONE;
})
DONE;
})
-(define_insn "*fxtractxf3_i387"
+(define_insn "fxtractxf3_i387"
[(set (match_operand:XF 0 "register_operand" "=f")
(unspec:XF [(match_operand:XF 2 "register_operand" "0")]
UNSPEC_XTRACT_FRACT))
DONE;
})
-(define_expand "ilogbsi2"
- [(parallel [(set (match_dup 2)
- (unspec:XF [(match_operand:XF 1 "register_operand" "")]
- UNSPEC_XTRACT_FRACT))
- (set (match_dup 3)
- (unspec:XF [(match_dup 1)] UNSPEC_XTRACT_EXP))])
- (parallel [(set (match_operand:SI 0 "register_operand" "")
- (fix:SI (match_dup 3)))
- (clobber (reg:CC FLAGS_REG))])]
+(define_expand "ilogbxf2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:XF 1 "register_operand" ""))]
"TARGET_USE_FANCY_MATH_387
- && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
&& flag_unsafe_math_optimizations && !optimize_size"
{
- operands[2] = gen_reg_rtx (XFmode);
- operands[3] = gen_reg_rtx (XFmode);
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
+ emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+ DONE;
+})
+
+(define_expand "ilogb<mode>2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)
+ && flag_unsafe_math_optimizations && !optimize_size"
+{
+ rtx op0 = gen_reg_rtx (XFmode);
+ rtx op1 = gen_reg_rtx (XFmode);
+
+ emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
+ emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
+ DONE;
})
(define_insn "*f2xm1xf2_i387"
"TARGET_USE_FANCY_MATH_387
&& flag_unsafe_math_optimizations && !optimize_size"
{
- operands[2] = gen_reg_rtx (XFmode);
- emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
- emit_insn (gen_expNcorexf3 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
DONE;
})
"TARGET_USE_FANCY_MATH_387
&& flag_unsafe_math_optimizations && !optimize_size"
{
- operands[2] = gen_reg_rtx (XFmode);
- emit_move_insn (operands[2], standard_80387_constant_rtx (6)); /* fldl2t */
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
- emit_insn (gen_expNcorexf3 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
DONE;
})
"TARGET_USE_FANCY_MATH_387
&& flag_unsafe_math_optimizations && !optimize_size"
{
- operands[2] = gen_reg_rtx (XFmode);
- emit_move_insn (operands[2], CONST1_RTX (XFmode)); /* fld1 */
+ rtx op2 = gen_reg_rtx (XFmode);
+ emit_move_insn (op2, CONST1_RTX (XFmode)); /* fld1 */
- emit_insn (gen_expNcorexf3 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
DONE;
})
DONE;
})
+(define_insn "fxam<mode>2_i387"
+ [(set (match_operand:HI 0 "register_operand" "=a")
+ (unspec:HI
+ [(match_operand:X87MODEF 1 "register_operand" "f")]
+ UNSPEC_FXAM))]
+ "TARGET_USE_FANCY_MATH_387"
+ "fxam\n\tfnstsw\t%0"
+ [(set_attr "type" "multi")
+ (set_attr "unit" "i387")
+ (set_attr "mode" "<MODE>")])
+
+(define_expand "isinf<mode>2"
+ [(use (match_operand:SI 0 "register_operand" ""))
+ (use (match_operand:X87MODEF 1 "register_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+ && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+ || TARGET_MIX_SSE_I387)"
+{
+ rtx mask = GEN_INT (0x45);
+ rtx val = GEN_INT (0x05);
+
+ rtx cond;
+
+ rtx scratch = gen_reg_rtx (HImode);
+ rtx res = gen_reg_rtx (QImode);
+
+ emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1]));
+ emit_insn (gen_andqi_ext_0 (scratch, scratch, mask));
+ emit_insn (gen_cmpqi_ext_3 (scratch, val));
+ cond = gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG (CCmode, FLAGS_REG),
+ const0_rtx);
+ emit_insn (gen_rtx_SET (VOIDmode, res, cond));
+ emit_insn (gen_zero_extendqisi2 (operands[0], res));
+ DONE;
+})
+
\f
;; Block operation instructions
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "nonmemory_operand" ""))
- (use (match_operand:SI 3 "const_int_operand" ""))]
+ (use (match_operand:SI 3 "const_int_operand" ""))
+ (use (match_operand:SI 4 "const_int_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))]
""
{
if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
- operands[3], constm1_rtx))
+ operands[4], operands[5]))
DONE;
else
FAIL;
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:DI 2 "nonmemory_operand" ""))
- (use (match_operand:DI 3 "const_int_operand" ""))]
+ (use (match_operand:DI 3 "const_int_operand" ""))
+ (use (match_operand:SI 4 "const_int_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))]
"TARGET_64BIT"
{
if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
- operands[3], constm1_rtx))
+ operands[4], operands[5]))
DONE;
else
FAIL;
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:SI 1 "nonmemory_operand" ""))
(use (match_operand 2 "const_int_operand" ""))
- (use (match_operand 3 "const_int_operand" ""))]
+ (use (match_operand 3 "const_int_operand" ""))
+ (use (match_operand:SI 4 "const_int_operand" ""))
+ (use (match_operand:SI 5 "const_int_operand" ""))]
""
{
if (ix86_expand_setmem (operands[0], operands[1],
operands[2], operands[3],
- operands[3], constm1_rtx))
+ operands[4], operands[5]))
DONE;
else
FAIL;
{
if (ix86_expand_setmem (operands[0], operands[1],
operands[2], operands[3],
- operands[3], constm1_rtx))
+ operands[4], operands[5]))
DONE;
else
FAIL;
(mult:DI (match_operand:DI 1 "memory_operand" "")
(match_operand:DI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
(mult:SI (match_operand:SI 1 "memory_operand" "")
(match_operand:SI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
(mult:SI (match_operand:SI 1 "memory_operand" "")
(match_operand:SI 2 "immediate_operand" ""))))
(clobber (reg:CC FLAGS_REG))])]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& !satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
(match_operand:DI 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:DI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
(match_operand:SI 2 "const_int_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:SI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
&& satisfies_constraint_K (operands[2])"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
(match_operand:HI 2 "immediate_operand" "")))
(clobber (reg:CC FLAGS_REG))])
(match_scratch:HI 3 "r")]
- "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+ "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
[(set (match_dup 3) (match_dup 2))
(parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
(clobber (reg:CC FLAGS_REG))])]