(match_dup 3)))
(clobber (reg:CC FLAGS_REG))])])
+(define_insn_and_split "*btr<mode>_1"
+ [(set (match_operand:SWI12 0 "register_operand")
+ (and:SWI12
+ (subreg:SWI12
+ (rotate:SI (const_int -2)
+ (match_operand:QI 2 "register_operand")) 0)
+ (match_operand:SWI12 1 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (and:SI (rotate:SI (const_int -2) (match_dup 2))
+ (match_dup 1)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
+ if (MEM_P (operands[1]))
+ operands[1] = force_reg (<MODE>mode, operands[1]);
+ operands[1] = lowpart_subreg (SImode, operands[1], <MODE>mode);
+})
+
+(define_insn_and_split "*btr<mode>_2"
+ [(set (zero_extract:HI
+ (match_operand:SWI12 0 "nonimmediate_operand")
+ (const_int 1)
+ (zero_extend:SI (match_operand:QI 1 "register_operand")))
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT && ix86_pre_reload_split ()"
+ "#"
+ "&& MEM_P (operands[0])"
+ [(set (match_dup 2) (match_dup 0))
+ (parallel
+ [(set (match_dup 3)
+ (and:SI (rotate:SI (const_int -2) (match_dup 1))
+ (match_dup 4)))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (match_dup 0) (match_dup 5))]
+{
+ operands[2] = gen_reg_rtx (<MODE>mode);
+ operands[5] = gen_reg_rtx (<MODE>mode);
+ operands[3] = lowpart_subreg (SImode, operands[5], <MODE>mode);
+ operands[4] = lowpart_subreg (SImode, operands[2], <MODE>mode);
+})
+
+(define_split
+ [(set (zero_extract:HI
+ (match_operand:SWI12 0 "register_operand")
+ (const_int 1)
+ (zero_extend:SI (match_operand:QI 1 "register_operand")))
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT && ix86_pre_reload_split ()"
+ [(parallel
+ [(set (match_dup 0)
+ (and:SI (rotate:SI (const_int -2) (match_dup 1))
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = lowpart_subreg (SImode, operands[0], <MODE>mode);
+ operands[0] = lowpart_subreg (SImode, operands[0], <MODE>mode);
+})
+
;; These instructions are never faster than the corresponding
;; and/ior/xor operations when using immediate operand, so with
;; 32-bit there's no point. But in 64-bit, we can't hold the
--- /dev/null
+/* PR target/96938 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att" } */
+/* { dg-final { scan-assembler-times "\tbtrl\t" 10 } } */
+
+void
+f1 (unsigned char *f, int o, unsigned char v)
+{
+ *f = (*f & ~(1 << o)) | (v << o);
+}
+
+void
+f2 (unsigned char *f, int o, unsigned char v)
+{
+ int t = *f & ~(1 << o);
+ *f = t | (v << o);
+}
+
+void
+f3 (unsigned char *f, int o, unsigned char v)
+{
+ *f &= ~(1 << o);
+}
+
+void
+f4 (unsigned char *f, int o, unsigned char v)
+{
+ *f = (*f & ~(1 << (o & 31))) | v;
+}
+
+void
+f5 (unsigned char *f, int o, unsigned char v)
+{
+ *f = (*f & ~(1 << (o & 31))) | (v << (o & 31));
+}
+
+void
+f6 (unsigned short *f, int o, unsigned short v)
+{
+ *f = (*f & ~(1 << o)) | (v << o);
+}
+
+void
+f7 (unsigned short *f, int o, unsigned short v)
+{
+ int t = *f & ~(1 << o);
+ *f = t | (v << o);
+}
+
+void
+f8 (unsigned short *f, int o, unsigned short v)
+{
+ *f &= ~(1 << o);
+}
+
+void
+f9 (unsigned short *f, int o, unsigned short v)
+{
+ *f = (*f & ~(1 << (o & 31))) | v;
+}
+
+void
+f10 (unsigned short *f, int o, unsigned short v)
+{
+ *f = (*f & ~(1 << (o & 31))) | (v << (o & 31));
+}