From 78ce265b7148c1b8ef41ab8f3c22a1a8069d2995 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 7 Aug 2012 10:32:19 -0700 Subject: [PATCH] s390: Convert from sync to atomic optabs * config/s390/s390.c (s390_emit_compare_and_swap): Update for atomic_compare_and_swapsi_internal name change. (s390_expand_insv): Try RISBG last, not requiring operands in registers but forcing them there. Try a limited form of ICM. (s390_two_part_insv): New. (s390_expand_cs_hqi): Use it. Update for atomic optabs. * config/s390/s390.md (UNSPEC_MOVA): New. (TDSI): Remove mode iterator. (DGPR, DINT, TDI): New mode iterators. (td): New mode attribute. (mem_signal_fence): New. (mem_thread_fence): Rename from memory_barrier; update for model arg. (mem_thread_fence_1): Rename from *memory_barrier. (atomic_load, atomic_store): New expanders. (atomic_loaddi_1, atomic_loadti_1): New. (atomic_storedi_1, atomic_storeti_1): New. (sync_compare_and_swap, sync_compare_and_swap): Remove. (atomic_compare_and_swap, atomic_compare_and_swap): New. (atomic_compare_and_swap_internal): New. (*sync_compare_and_swap, *sync_compare_and_swap): Remove. (*atomic_compare_and_swap_1): New. (*atomic_compare_and_swapdi_2, *atomic_compare_and_swapsi_3): New. (sync_, sync_old_): Remove. (atomic_fetch_): New. (atomic_fetch__iaf): New. (sync_, sync_old_): Remove. (sync_new_): Remove. (atomic_, atomic_fetch_): New. (atomic__fetch): New. (sync_lock_test_and_set): Remove. (atomic_exchange): New. From-SVN: r190209 --- gcc/ChangeLog | 34 +++ gcc/config/s390/s390-protos.h | 3 +- gcc/config/s390/s390.c | 302 ++++++++++++++++---------- gcc/config/s390/s390.md | 389 ++++++++++++++++++++++++---------- 4 files changed, 505 insertions(+), 223 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5896e0b7f11..bdf47c73ad7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2012-08-07 Richard Henderson + + * config/s390/s390.c (s390_emit_compare_and_swap): Update for + atomic_compare_and_swapsi_internal name change. + (s390_expand_insv): Try RISBG last, not requiring operands in + registers but forcing them there. Try a limited form of ICM. + (s390_two_part_insv): New. + (s390_expand_cs_hqi): Use it. Update for atomic optabs. + * config/s390/s390.md (UNSPEC_MOVA): New. + (TDSI): Remove mode iterator. + (DGPR, DINT, TDI): New mode iterators. + (td): New mode attribute. + (mem_signal_fence): New. + (mem_thread_fence): Rename from memory_barrier; update for model arg. + (mem_thread_fence_1): Rename from *memory_barrier. + (atomic_load, atomic_store): New expanders. + (atomic_loaddi_1, atomic_loadti_1): New. + (atomic_storedi_1, atomic_storeti_1): New. + (sync_compare_and_swap, sync_compare_and_swap): Remove. + (atomic_compare_and_swap, atomic_compare_and_swap): New. + (atomic_compare_and_swap_internal): New. + (*sync_compare_and_swap, *sync_compare_and_swap): Remove. + (*atomic_compare_and_swap_1): New. + (*atomic_compare_and_swapdi_2, *atomic_compare_and_swapsi_3): New. + (sync_, sync_old_): Remove. + (atomic_fetch_): New. + (atomic_fetch__iaf): New. + (sync_, sync_old_): Remove. + (sync_new_): Remove. + (atomic_, atomic_fetch_): New. + (atomic__fetch): New. + (sync_lock_test_and_set): Remove. + (atomic_exchange): New. + 2012-08-07 Paul Brook Sandra Loosemore diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index 4f1eb42a73f..79673d6d835 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -85,7 +85,8 @@ extern void s390_expand_setmem (rtx, rtx, rtx); extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); extern bool s390_expand_insv (rtx, rtx, rtx, rtx); -extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx); +extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, + rtx, rtx, bool); extern void s390_expand_atomic (enum machine_mode, enum rtx_code, rtx, rtx, rtx, bool); extern rtx s390_return_addr_rtx (int, rtx); diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index 3a872911570..20a2db66042 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -896,10 +896,12 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) conditional branch testing the result. */ static rtx -s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new_rtx) +s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, + rtx cmp, rtx new_rtx) { - emit_insn (gen_sync_compare_and_swapsi (old, mem, cmp, new_rtx)); - return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), const0_rtx); + emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx)); + return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), + const0_rtx); } /* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an @@ -4548,106 +4550,146 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) { int bitsize = INTVAL (op1); int bitpos = INTVAL (op2); + enum machine_mode mode = GET_MODE (dest); + enum machine_mode smode; + int smode_bsize, mode_bsize; + rtx op, clobber; - /* On z10 we can use the risbg instruction to implement insv. */ - if (TARGET_Z10 - && ((GET_MODE (dest) == DImode && GET_MODE (src) == DImode) - || (GET_MODE (dest) == SImode && GET_MODE (src) == SImode))) + /* Generate INSERT IMMEDIATE (IILL et al). */ + /* (set (ze (reg)) (const_int)). */ + if (TARGET_ZARCH + && register_operand (dest, word_mode) + && (bitpos % 16) == 0 + && (bitsize % 16) == 0 + && const_int_operand (src, VOIDmode)) { - rtx op; - rtx clobber; + HOST_WIDE_INT val = INTVAL (src); + int regpos = bitpos + bitsize; - op = gen_rtx_SET (GET_MODE(src), - gen_rtx_ZERO_EXTRACT (GET_MODE (dest), dest, op1, op2), - src); - clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber))); + while (regpos > bitpos) + { + enum machine_mode putmode; + int putsize; + if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32)) + putmode = SImode; + else + putmode = HImode; + + putsize = GET_MODE_BITSIZE (putmode); + regpos -= putsize; + emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, + GEN_INT (putsize), + GEN_INT (regpos)), + gen_int_mode (val, putmode)); + val >>= putsize; + } + gcc_assert (regpos == bitpos); return true; } - /* We need byte alignment. */ - if (bitsize % BITS_PER_UNIT) - return false; + smode = smallest_mode_for_size (bitsize, MODE_INT); + smode_bsize = GET_MODE_BITSIZE (smode); + mode_bsize = GET_MODE_BITSIZE (mode); + /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */ if (bitpos == 0 - && memory_operand (dest, VOIDmode) + && (bitsize % BITS_PER_UNIT) == 0 + && MEM_P (dest) && (register_operand (src, word_mode) || const_int_operand (src, VOIDmode))) { /* Emit standard pattern if possible. */ - enum machine_mode mode = smallest_mode_for_size (bitsize, MODE_INT); - if (GET_MODE_BITSIZE (mode) == bitsize) - emit_move_insn (adjust_address (dest, mode, 0), gen_lowpart (mode, src)); + if (smode_bsize == bitsize) + { + emit_move_insn (adjust_address (dest, smode, 0), + gen_lowpart (smode, src)); + return true; + } /* (set (ze (mem)) (const_int)). */ else if (const_int_operand (src, VOIDmode)) { int size = bitsize / BITS_PER_UNIT; - rtx src_mem = adjust_address (force_const_mem (word_mode, src), BLKmode, - GET_MODE_SIZE (word_mode) - size); + rtx src_mem = adjust_address (force_const_mem (word_mode, src), + BLKmode, + UNITS_PER_WORD - size); dest = adjust_address (dest, BLKmode, 0); set_mem_size (dest, size); s390_expand_movmem (dest, src_mem, GEN_INT (size)); + return true; } /* (set (ze (mem)) (reg)). */ else if (register_operand (src, word_mode)) { - if (bitsize <= GET_MODE_BITSIZE (SImode)) + if (bitsize <= 32) emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1, const0_rtx), src); else { /* Emit st,stcmh sequence. */ - int stcmh_width = bitsize - GET_MODE_BITSIZE (SImode); + int stcmh_width = bitsize - 32; int size = stcmh_width / BITS_PER_UNIT; emit_move_insn (adjust_address (dest, SImode, size), gen_lowpart (SImode, src)); set_mem_size (dest, size); - emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, GEN_INT - (stcmh_width), const0_rtx), - gen_rtx_LSHIFTRT (word_mode, src, GEN_INT - (GET_MODE_BITSIZE (SImode)))); + emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, + GEN_INT (stcmh_width), + const0_rtx), + gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32))); } + return true; } - else - return false; + } - return true; + /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */ + if ((bitpos % BITS_PER_UNIT) == 0 + && (bitsize % BITS_PER_UNIT) == 0 + && (bitpos & 32) == ((bitpos + bitsize - 1) & 32) + && MEM_P (src) + && (mode == DImode || mode == SImode) + && register_operand (dest, mode)) + { + /* Emit a strict_low_part pattern if possible. */ + if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize) + { + op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest)); + op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src)); + clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber))); + return true; + } + + /* ??? There are more powerful versions of ICM that are not + completely represented in the md file. */ } - /* (set (ze (reg)) (const_int)). */ - if (TARGET_ZARCH - && register_operand (dest, word_mode) - && (bitpos % 16) == 0 - && (bitsize % 16) == 0 - && const_int_operand (src, VOIDmode)) + /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */ + if (TARGET_Z10 && (mode == DImode || mode == SImode)) { - HOST_WIDE_INT val = INTVAL (src); - int regpos = bitpos + bitsize; + enum machine_mode mode_s = GET_MODE (src); - while (regpos > bitpos) + if (mode_s == VOIDmode) { - enum machine_mode putmode; - int putsize; + /* Assume const_int etc already in the proper mode. */ + src = force_reg (mode, src); + } + else if (mode_s != mode) + { + gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize); + src = force_reg (mode_s, src); + src = gen_lowpart (mode, src); + } - if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32)) - putmode = SImode; - else - putmode = HImode; + op = gen_rtx_SET (mode, + gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2), + src); + clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber))); - putsize = GET_MODE_BITSIZE (putmode); - regpos -= putsize; - emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, - GEN_INT (putsize), - GEN_INT (regpos)), - gen_int_mode (val, putmode)); - val >>= putsize; - } - gcc_assert (regpos == bitpos); return true; } @@ -4717,92 +4759,134 @@ init_alignment_context (struct alignment_context *ac, rtx mem, /* As we already have some offset, evaluate the remaining distance. */ ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset, NULL_RTX, 1, OPTAB_DIRECT); - } + /* Shift is the byte count, but we need the bitcount. */ - ac->shift = expand_simple_binop (SImode, MULT, ac->shift, GEN_INT (BITS_PER_UNIT), - NULL_RTX, 1, OPTAB_DIRECT); + ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3), + NULL_RTX, 1, OPTAB_DIRECT); + /* Calculate masks. */ ac->modemask = expand_simple_binop (SImode, ASHIFT, - GEN_INT (GET_MODE_MASK (mode)), ac->shift, - NULL_RTX, 1, OPTAB_DIRECT); - ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1); + GEN_INT (GET_MODE_MASK (mode)), + ac->shift, NULL_RTX, 1, OPTAB_DIRECT); + ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, + NULL_RTX, 1); +} + +/* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible, + use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and + perform the merge in SEQ2. */ + +static rtx +s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2, + enum machine_mode mode, rtx val, rtx ins) +{ + rtx tmp; + + if (ac->aligned) + { + start_sequence (); + tmp = copy_to_mode_reg (SImode, val); + if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)), + const0_rtx, ins)) + { + *seq1 = NULL; + *seq2 = get_insns (); + end_sequence (); + return tmp; + } + end_sequence (); + } + + /* Failed to use insv. Generate a two part shift and mask. */ + start_sequence (); + tmp = s390_expand_mask_and_shift (ins, mode, ac->shift); + *seq1 = get_insns (); + end_sequence (); + + start_sequence (); + tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT); + *seq2 = get_insns (); + end_sequence (); + + return tmp; } /* Expand an atomic compare and swap operation for HImode and QImode. MEM is - the memory location, CMP the old value to compare MEM with and NEW_RTX the value - to set if CMP == MEM. - CMP is never in memory for compare_and_swap_cc because - expand_bool_compare_and_swap puts it into a register for later compare. */ + the memory location, CMP the old value to compare MEM with and NEW_RTX the + value to set if CMP == MEM. */ void -s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new_rtx) +s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem, + rtx cmp, rtx new_rtx, bool is_weak) { struct alignment_context ac; - rtx cmpv, newv, val, resv, cc; + rtx cmpv, newv, val, resv, cc, seq0, seq1, seq2, seq3; rtx res = gen_reg_rtx (SImode); - rtx csloop = gen_label_rtx (); - rtx csend = gen_label_rtx (); + rtx csloop = NULL, csend = NULL; - gcc_assert (register_operand (target, VOIDmode)); + gcc_assert (register_operand (vtarget, VOIDmode)); gcc_assert (MEM_P (mem)); init_alignment_context (&ac, mem, mode); - /* Shift the values to the correct bit positions. */ - if (!(ac.aligned && MEM_P (cmp))) - cmp = s390_expand_mask_and_shift (cmp, mode, ac.shift); - if (!(ac.aligned && MEM_P (new_rtx))) - new_rtx = s390_expand_mask_and_shift (new_rtx, mode, ac.shift); - /* Load full word. Subsequent loads are performed by CS. */ val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski, NULL_RTX, 1, OPTAB_DIRECT); + /* Prepare insertions of cmp and new_rtx into the loaded value. When + possible, we try to use insv to make this happen efficiently. If + that fails we'll generate code both inside and outside the loop. */ + cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp); + newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx); + + if (seq0) + emit_insn (seq0); + if (seq1) + emit_insn (seq1); + /* Start CS loop. */ - emit_label (csloop); + if (!is_weak) + { + /* Begin assuming success. */ + emit_move_insn (btarget, const1_rtx); + + csloop = gen_label_rtx (); + csend = gen_label_rtx (); + emit_label (csloop); + } + /* val = "00..0" * cmp = "00..000..0" * new = "00..000..0" */ - /* Patch cmp and new with val at correct position. */ - if (ac.aligned && MEM_P (cmp)) - { - cmpv = force_reg (SImode, val); - store_bit_field (cmpv, GET_MODE_BITSIZE (mode), 0, - 0, 0, SImode, cmp); - } + emit_insn (seq2); + emit_insn (seq3); + + cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv); + if (is_weak) + emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1))); else - cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val, - NULL_RTX, 1, OPTAB_DIRECT)); - if (ac.aligned && MEM_P (new_rtx)) { - newv = force_reg (SImode, val); - store_bit_field (newv, GET_MODE_BITSIZE (mode), 0, - 0, 0, SImode, new_rtx); - } - else - newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val, - NULL_RTX, 1, OPTAB_DIRECT)); + /* Jump to end if we're done (likely?). */ + s390_emit_jump (csend, cc); - /* Jump to end if we're done (likely?). */ - s390_emit_jump (csend, s390_emit_compare_and_swap (EQ, res, ac.memsi, - cmpv, newv)); + /* Check for changes outside mode, and loop internal if so. */ + resv = expand_simple_binop (SImode, AND, res, ac.modemaski, + NULL_RTX, 1, OPTAB_DIRECT); + cc = s390_emit_compare (NE, resv, val); + emit_move_insn (val, resv); + s390_emit_jump (csloop, cc); - /* Check for changes outside mode. */ - resv = expand_simple_binop (SImode, AND, res, ac.modemaski, - NULL_RTX, 1, OPTAB_DIRECT); - cc = s390_emit_compare (NE, resv, val); - emit_move_insn (val, resv); - /* Loop internal if so. */ - s390_emit_jump (csloop, cc); - - emit_label (csend); + /* Failed. */ + emit_move_insn (btarget, const0_rtx); + emit_label (csend); + } /* Return the correct part of the bitfield. */ - convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift, - NULL_RTX, 1, OPTAB_DIRECT), 1); + convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift, + NULL_RTX, 1, OPTAB_DIRECT), 1); } /* Expand an atomic operation CODE of mode MODE. MEM is the memory location diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 096f2665534..0e43e51a486 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -84,6 +84,7 @@ ; Atomic Support UNSPEC_MB + UNSPEC_MOVA ; TLS relocation specifiers UNSPEC_TLSGD @@ -349,21 +350,19 @@ (define_mode_iterator DD_DF [DF DD]) (define_mode_iterator TD_TF [TF TD]) -;; This mode iterator allows 31-bit and 64-bit TDSI patterns to be generated -;; from the same template. -(define_mode_iterator TDSI [(TI "TARGET_64BIT") DI SI]) - ;; These mode iterators allow 31-bit and 64-bit GPR patterns to be generated ;; from the same template. (define_mode_iterator GPR [(DI "TARGET_ZARCH") SI]) +(define_mode_iterator DGPR [(TI "TARGET_ZARCH") DI SI]) (define_mode_iterator DSI [DI SI]) +(define_mode_iterator TDI [TI DI]) ;; These mode iterators allow :P to be used for patterns that operate on ;; pointer-sized quantities. Exactly one of the two alternatives will match. (define_mode_iterator P [(DI "TARGET_64BIT") (SI "!TARGET_64BIT")]) -;; These macros refer to the actual word_mode of the configuration. This is equal -;; to Pmode except on 31-bit machines in zarch mode. +;; These macros refer to the actual word_mode of the configuration. +;; This is equal to Pmode except on 31-bit machines in zarch mode. (define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")]) (define_mode_iterator W [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")]) @@ -379,6 +378,7 @@ ;; same template. (define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI]) (define_mode_iterator INTALL [TI DI SI HI QI]) +(define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI]) ;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from ;; the same template. @@ -487,6 +487,9 @@ ;; and "cds" in DImode. (define_mode_attr tg [(TI "g") (DI "")]) +;; In TDI templates, a string like "csg". +(define_mode_attr td [(TI "d") (DI "")]) + ;; In GPR templates, a string like "cdbr" will expand to "cgdbr" in DImode ;; and "cfdbr" in SImode. (define_mode_attr gf [(DI "g") (SI "f")]) @@ -8739,164 +8742,324 @@ ;; ; -; memory barrier pattern. +; memory barrier patterns. ; -(define_expand "memory_barrier" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MB))] +(define_expand "mem_signal_fence" + [(match_operand:SI 0 "const_int_operand")] ;; model "" { - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; + /* The s390 memory model is strong enough not to require any + barrier in order to synchronize a thread with itself. */ + DONE; +}) + +(define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand")] ;; model + "" +{ + /* Unless this is a SEQ_CST fence, the s390 memory model is strong + enough not to require barriers of any kind. */ + if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST) + { + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + emit_insn (gen_mem_thread_fence_1 (mem)); + } + DONE; }) -(define_insn "*memory_barrier" +; Although bcr is superscalar on Z10, this variant will never +; become part of an execution group. +(define_insn "mem_thread_fence_1" [(set (match_operand:BLK 0 "" "") (unspec:BLK [(match_dup 0)] UNSPEC_MB))] "" "bcr\t15,0" [(set_attr "op_type" "RR")]) -; Although bcr is superscalar on Z10, this variant will never become part of -; an execution group. +; +; atomic load/store operations +; + +; Atomic loads need not examine the memory model at all. +(define_expand "atomic_load" + [(match_operand:DINT 0 "register_operand") ;; output + (match_operand:DINT 1 "memory_operand") ;; memory + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + if (mode == TImode) + emit_insn (gen_atomic_loadti_1 (operands[0], operands[1])); + else if (mode == DImode && !TARGET_ZARCH) + emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1])); + else + emit_move_insn (operands[0], operands[1]); + DONE; +}) + +; Different from movdi_31 in that we want no splitters. +(define_insn "atomic_loaddi_1" + [(set (match_operand:DI 0 "register_operand" "=d,d,!*f,!*f") + (unspec:DI [(match_operand:DI 1 "memory_operand" "Q,S,R,T")] + UNSPEC_MOVA))] + "!TARGET_ZARCH" + "@ + lm\t%0,%M0,%S1 + lmy\t%0,%M0,%S1 + ld\t%0,%1 + ldy\t%0,%1" + [(set_attr "op_type" "RS,RSY,RS,RSY") + (set_attr "type" "lm,lm,floaddf,floaddf")]) + +(define_insn "atomic_loadti_1" + [(set (match_operand:TI 0 "register_operand" "=r") + (unspec:TI [(match_operand:TI 1 "memory_operand" "RT")] + UNSPEC_MOVA))] + "TARGET_ZARCH" + "lpq\t%0,%1" + [(set_attr "op_type" "RXY") + (set_attr "type" "other")]) + +; Atomic stores must(?) enforce sequential consistency. +(define_expand "atomic_store" + [(match_operand:DINT 0 "memory_operand") ;; memory + (match_operand:DINT 1 "register_operand") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + enum memmodel model = (enum memmodel) INTVAL (operands[2]); + + if (mode == TImode) + emit_insn (gen_atomic_storeti_1 (operands[0], operands[1])); + else if (mode == DImode && !TARGET_ZARCH) + emit_insn (gen_atomic_storedi_1 (operands[0], operands[1])); + else + emit_move_insn (operands[0], operands[1]); + if (model == MEMMODEL_SEQ_CST) + emit_insn (gen_mem_thread_fence (operands[2])); + DONE; +}) + +; Different from movdi_31 in that we want no splitters. +(define_insn "atomic_storedi_1" + [(set (match_operand:DI 0 "memory_operand" "=Q,S,R,T") + (unspec:DI [(match_operand:DI 1 "register_operand" "d,d,!*f,!*f")] + UNSPEC_MOVA))] + "!TARGET_ZARCH" + "@ + stm\t%1,%N1,%S0 + stmy\t%1,%N1,%S0 + std %1,%0 + stdy %1,%0" + [(set_attr "op_type" "RS,RSY,RS,RSY") + (set_attr "type" "stm,stm,fstoredf,fstoredf")]) + +(define_insn "atomic_storeti_1" + [(set (match_operand:TI 0 "memory_operand" "=RT") + (unspec:TI [(match_operand:TI 1 "register_operand" "r")] + UNSPEC_MOVA))] + "TARGET_ZARCH" + "stpq\t%1,%0" + [(set_attr "op_type" "RXY") + (set_attr "type" "other")]) ; ; compare and swap patterns. ; -(define_expand "sync_compare_and_swap" - [(parallel - [(set (match_operand:TDSI 0 "register_operand" "") - (match_operand:TDSI 1 "memory_operand" "")) - (set (match_dup 1) - (unspec_volatile:TDSI - [(match_dup 1) - (match_operand:TDSI 2 "register_operand" "") - (match_operand:TDSI 3 "register_operand" "")] - UNSPECV_CAS)) - (set (reg:CCZ1 CC_REGNUM) - (compare:CCZ1 (match_dup 1) (match_dup 2)))])] - "") +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand") ;; bool success output + (match_operand:DGPR 1 "register_operand") ;; oldval output + (match_operand:DGPR 2 "memory_operand") ;; memory + (match_operand:DGPR 3 "register_operand") ;; expected intput + (match_operand:DGPR 4 "register_operand") ;; newval intput + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; success model + (match_operand:SI 7 "const_int_operand")] ;; failure model + "" +{ + rtx cc, cmp; + emit_insn (gen_atomic_compare_and_swap_internal + (operands[1], operands[2], operands[3], operands[4])); + cc = gen_rtx_REG (CCZ1mode, CC_REGNUM); + cmp = gen_rtx_EQ (SImode, cc, const0_rtx); + emit_insn (gen_cstorecc4 (operands[0], cmp, cc, const0_rtx)); + DONE; +}) -(define_expand "sync_compare_and_swap" - [(parallel - [(set (match_operand:HQI 0 "register_operand" "") - (match_operand:HQI 1 "memory_operand" "")) - (set (match_dup 1) - (unspec_volatile:HQI - [(match_dup 1) - (match_operand:HQI 2 "general_operand" "") - (match_operand:HQI 3 "general_operand" "")] - UNSPECV_CAS)) - (clobber (reg:CC CC_REGNUM))])] +(define_expand "atomic_compare_and_swap" + [(match_operand:SI 0 "register_operand") ;; bool success output + (match_operand:HQI 1 "register_operand") ;; oldval output + (match_operand:HQI 2 "memory_operand") ;; memory + (match_operand:HQI 3 "general_operand") ;; expected intput + (match_operand:HQI 4 "general_operand") ;; newval intput + (match_operand:SI 5 "const_int_operand") ;; is_weak + (match_operand:SI 6 "const_int_operand") ;; success model + (match_operand:SI 7 "const_int_operand")] ;; failure model "" - "s390_expand_cs_hqi (mode, operands[0], operands[1], - operands[2], operands[3]); DONE;") +{ + s390_expand_cs_hqi (mode, operands[0], operands[1], operands[2], + operands[3], operands[4], INTVAL (operands[5])); + DONE; +}) -; cds, cdsg -(define_insn "*sync_compare_and_swap" - [(set (match_operand:DW 0 "register_operand" "=r") - (match_operand:DW 1 "memory_operand" "+Q")) +(define_expand "atomic_compare_and_swap_internal" + [(parallel + [(set (match_operand:DGPR 0 "register_operand") + (match_operand:DGPR 1 "memory_operand")) + (set (match_dup 1) + (unspec_volatile:DGPR + [(match_dup 1) + (match_operand:DGPR 2 "register_operand") + (match_operand:DGPR 3 "register_operand")] + UNSPECV_CAS)) + (set (reg:CCZ1 CC_REGNUM) + (compare:CCZ1 (match_dup 1) (match_dup 2)))])] + "") + +; cdsg, csg +(define_insn "*atomic_compare_and_swap_1" + [(set (match_operand:TDI 0 "register_operand" "=r") + (match_operand:TDI 1 "memory_operand" "+QS")) (set (match_dup 1) - (unspec_volatile:DW + (unspec_volatile:TDI [(match_dup 1) - (match_operand:DW 2 "register_operand" "0") - (match_operand:DW 3 "register_operand" "r")] + (match_operand:TDI 2 "register_operand" "0") + (match_operand:TDI 3 "register_operand" "r")] UNSPECV_CAS)) (set (reg:CCZ1 CC_REGNUM) (compare:CCZ1 (match_dup 1) (match_dup 2)))] - "" - "cds\t%0,%3,%S1" - [(set_attr "op_type" "RS") + "TARGET_ZARCH" + "csg\t%0,%3,%S1" + [(set_attr "op_type" "RSY") (set_attr "type" "sem")]) -; cs, csg -(define_insn "*sync_compare_and_swap" - [(set (match_operand:GPR 0 "register_operand" "=r") - (match_operand:GPR 1 "memory_operand" "+Q")) +; cds, cdsy +(define_insn "*atomic_compare_and_swapdi_2" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (match_operand:DI 1 "memory_operand" "+Q,S")) (set (match_dup 1) - (unspec_volatile:GPR + (unspec_volatile:DI + [(match_dup 1) + (match_operand:DI 2 "register_operand" "0,0") + (match_operand:DI 3 "register_operand" "r,r")] + UNSPECV_CAS)) + (set (reg:CCZ1 CC_REGNUM) + (compare:CCZ1 (match_dup 1) (match_dup 2)))] + "!TARGET_ZARCH" + "@ + cds\t%0,%3,%S1 + cdsy\t%0,%3,%S1" + [(set_attr "op_type" "RS,RSY") + (set_attr "type" "sem")]) + +; cs, csy +(define_insn "*atomic_compare_and_swapsi_3" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (match_operand:SI 1 "memory_operand" "+Q,S")) + (set (match_dup 1) + (unspec_volatile:SI [(match_dup 1) - (match_operand:GPR 2 "register_operand" "0") - (match_operand:GPR 3 "register_operand" "r")] + (match_operand:SI 2 "register_operand" "0,0") + (match_operand:SI 3 "register_operand" "r,r")] UNSPECV_CAS)) (set (reg:CCZ1 CC_REGNUM) (compare:CCZ1 (match_dup 1) (match_dup 2)))] "" - "cs\t%0,%3,%S1" - [(set_attr "op_type" "RS") + "@ + cs\t%0,%3,%S1 + csy\t%0,%3,%S1" + [(set_attr "op_type" "RS,RSY") (set_attr "type" "sem")]) - ; ; Other atomic instruction patterns. ; -(define_expand "sync_lock_test_and_set" - [(match_operand:HQI 0 "register_operand") - (match_operand:HQI 1 "memory_operand") - (match_operand:HQI 2 "general_operand")] - "" - "s390_expand_atomic (mode, SET, operands[0], operands[1], - operands[2], false); DONE;") - ; z196 load and add, xor, or and and instructions -; lan, lang, lao, laog, lax, laxg, laa, laag -(define_insn "sync_" - [(parallel - [(set (match_operand:GPR 0 "memory_operand" "+QS") - (unspec_volatile:GPR - [(ATOMIC_Z196:GPR (match_dup 0) - (match_operand:GPR 1 "general_operand" "d"))] - UNSPECV_ATOMIC_OP)) - (clobber (match_scratch:GPR 2 "=d")) - (clobber (reg:CC CC_REGNUM))])] +(define_expand "atomic_fetch_" + [(match_operand:GPR 0 "register_operand") ;; val out + (ATOMIC_Z196:GPR + (match_operand:GPR 1 "memory_operand") ;; memory + (match_operand:GPR 2 "register_operand")) ;; val in + (match_operand:SI 3 "const_int_operand")] ;; model "TARGET_Z196" - "la\t%2,%1,%0") +{ + emit_insn (gen_atomic_fetch__iaf + (operands[0], operands[1], operands[2])); + DONE; +}) ; lan, lang, lao, laog, lax, laxg, laa, laag -(define_insn "sync_old_" - [(parallel - [(set (match_operand:GPR 0 "register_operand" "=d") - (match_operand:GPR 1 "memory_operand" "+QS")) - (set (match_dup 1) - (unspec_volatile:GPR - [(ATOMIC_Z196:GPR (match_dup 1) - (match_operand:GPR 2 "general_operand" "d"))] - UNSPECV_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM))])] +(define_insn "atomic_fetch__iaf" + [(set (match_operand:GPR 0 "register_operand" "=d") + (match_operand:GPR 1 "memory_operand" "+QS")) + (set (match_dup 1) + (unspec_volatile:GPR + [(ATOMIC_Z196:GPR (match_dup 1) + (match_operand:GPR 2 "general_operand" "d"))] + UNSPECV_ATOMIC_OP)) + (clobber (reg:CC CC_REGNUM))] "TARGET_Z196" - "la\t%0,%2,%1") + "la\t%0,%2,%1" + [(set_attr "op_type" "RSY") + (set_attr "type" "sem")]) +;; For SImode and larger, the optabs.c code will do just fine in +;; expanding a compare-and-swap loop. For QI/HImode, we can do +;; better by expanding our own loop. -(define_expand "sync_" - [(set (match_operand:HQI 0 "memory_operand") - (ATOMIC:HQI (match_dup 0) - (match_operand:HQI 1 "general_operand")))] +(define_expand "atomic_" + [(ATOMIC:HQI + (match_operand:HQI 0 "memory_operand") ;; memory + (match_operand:HQI 1 "general_operand")) ;; val in + (match_operand:SI 2 "const_int_operand")] ;; model "" - "s390_expand_atomic (mode, , NULL_RTX, operands[0], - operands[1], false); DONE;") +{ + s390_expand_atomic (mode, , NULL_RTX, operands[0], + operands[1], false); + DONE; +}) -(define_expand "sync_old_" - [(set (match_operand:HQI 0 "register_operand") - (match_operand:HQI 1 "memory_operand")) - (set (match_dup 1) - (ATOMIC:HQI (match_dup 1) - (match_operand:HQI 2 "general_operand")))] +(define_expand "atomic_fetch_" + [(match_operand:HQI 0 "register_operand") ;; val out + (ATOMIC:HQI + (match_operand:HQI 1 "memory_operand") ;; memory + (match_operand:HQI 2 "general_operand")) ;; val in + (match_operand:SI 3 "const_int_operand")] ;; model "" - "s390_expand_atomic (mode, , operands[0], operands[1], - operands[2], false); DONE;") - -(define_expand "sync_new_" - [(set (match_operand:HQI 0 "register_operand") - (ATOMIC:HQI (match_operand:HQI 1 "memory_operand") - (match_operand:HQI 2 "general_operand"))) - (set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))] +{ + s390_expand_atomic (mode, , operands[0], operands[1], + operands[2], false); + DONE; +}) + +(define_expand "atomic__fetch" + [(match_operand:HQI 0 "register_operand") ;; val out + (ATOMIC:HQI + (match_operand:HQI 1 "memory_operand") ;; memory + (match_operand:HQI 2 "general_operand")) ;; val in + (match_operand:SI 3 "const_int_operand")] ;; model + "" +{ + s390_expand_atomic (mode, , operands[0], operands[1], + operands[2], true); + DONE; +}) + +(define_expand "atomic_exchange" + [(match_operand:HQI 0 "register_operand") ;; val out + (match_operand:HQI 1 "memory_operand") ;; memory + (match_operand:HQI 2 "general_operand") ;; val in + (match_operand:SI 3 "const_int_operand")] ;; model "" - "s390_expand_atomic (mode, , operands[0], operands[1], - operands[2], true); DONE;") +{ + s390_expand_atomic (mode, SET, operands[0], operands[1], + operands[2], false); + DONE; +}) ;; ;;- Miscellaneous instructions. -- 2.30.2