S/390 Add vector scalar instruction support.
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "print-tree.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "varasm.h"
43 #include "calls.h"
44 #include "tm_p.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "function.h"
54 #include "recog.h"
55 #include "hashtab.h"
56 #include "statistics.h"
57 #include "real.h"
58 #include "fixed-value.h"
59 #include "expmed.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "emit-rtl.h"
63 #include "stmt.h"
64 #include "expr.h"
65 #include "reload.h"
66 #include "diagnostic-core.h"
67 #include "predict.h"
68 #include "dominance.h"
69 #include "cfg.h"
70 #include "cfgrtl.h"
71 #include "cfganal.h"
72 #include "lcm.h"
73 #include "cfgbuild.h"
74 #include "cfgcleanup.h"
75 #include "basic-block.h"
76 #include "ggc.h"
77 #include "target.h"
78 #include "target-def.h"
79 #include "debug.h"
80 #include "langhooks.h"
81 #include "insn-codes.h"
82 #include "optabs.h"
83 #include "hash-table.h"
84 #include "tree-ssa-alias.h"
85 #include "internal-fn.h"
86 #include "gimple-fold.h"
87 #include "tree-eh.h"
88 #include "gimple-expr.h"
89 #include "is-a.h"
90 #include "gimple.h"
91 #include "gimplify.h"
92 #include "df.h"
93 #include "params.h"
94 #include "cfgloop.h"
95 #include "opts.h"
96 #include "tree-pass.h"
97 #include "context.h"
98 #include "builtins.h"
99 #include "rtl-iter.h"
100 #include "intl.h"
101 #include "plugin-api.h"
102 #include "ipa-ref.h"
103 #include "cgraph.h"
104
105 /* Define the specific costs for a given cpu. */
106
107 struct processor_costs
108 {
109 /* multiplication */
110 const int m; /* cost of an M instruction. */
111 const int mghi; /* cost of an MGHI instruction. */
112 const int mh; /* cost of an MH instruction. */
113 const int mhi; /* cost of an MHI instruction. */
114 const int ml; /* cost of an ML instruction. */
115 const int mr; /* cost of an MR instruction. */
116 const int ms; /* cost of an MS instruction. */
117 const int msg; /* cost of an MSG instruction. */
118 const int msgf; /* cost of an MSGF instruction. */
119 const int msgfr; /* cost of an MSGFR instruction. */
120 const int msgr; /* cost of an MSGR instruction. */
121 const int msr; /* cost of an MSR instruction. */
122 const int mult_df; /* cost of multiplication in DFmode. */
123 const int mxbr;
124 /* square root */
125 const int sqxbr; /* cost of square root in TFmode. */
126 const int sqdbr; /* cost of square root in DFmode. */
127 const int sqebr; /* cost of square root in SFmode. */
128 /* multiply and add */
129 const int madbr; /* cost of multiply and add in DFmode. */
130 const int maebr; /* cost of multiply and add in SFmode. */
131 /* division */
132 const int dxbr;
133 const int ddbr;
134 const int debr;
135 const int dlgr;
136 const int dlr;
137 const int dr;
138 const int dsgfr;
139 const int dsgr;
140 };
141
142 const struct processor_costs *s390_cost;
143
144 static const
145 struct processor_costs z900_cost =
146 {
147 COSTS_N_INSNS (5), /* M */
148 COSTS_N_INSNS (10), /* MGHI */
149 COSTS_N_INSNS (5), /* MH */
150 COSTS_N_INSNS (4), /* MHI */
151 COSTS_N_INSNS (5), /* ML */
152 COSTS_N_INSNS (5), /* MR */
153 COSTS_N_INSNS (4), /* MS */
154 COSTS_N_INSNS (15), /* MSG */
155 COSTS_N_INSNS (7), /* MSGF */
156 COSTS_N_INSNS (7), /* MSGFR */
157 COSTS_N_INSNS (10), /* MSGR */
158 COSTS_N_INSNS (4), /* MSR */
159 COSTS_N_INSNS (7), /* multiplication in DFmode */
160 COSTS_N_INSNS (13), /* MXBR */
161 COSTS_N_INSNS (136), /* SQXBR */
162 COSTS_N_INSNS (44), /* SQDBR */
163 COSTS_N_INSNS (35), /* SQEBR */
164 COSTS_N_INSNS (18), /* MADBR */
165 COSTS_N_INSNS (13), /* MAEBR */
166 COSTS_N_INSNS (134), /* DXBR */
167 COSTS_N_INSNS (30), /* DDBR */
168 COSTS_N_INSNS (27), /* DEBR */
169 COSTS_N_INSNS (220), /* DLGR */
170 COSTS_N_INSNS (34), /* DLR */
171 COSTS_N_INSNS (34), /* DR */
172 COSTS_N_INSNS (32), /* DSGFR */
173 COSTS_N_INSNS (32), /* DSGR */
174 };
175
176 static const
177 struct processor_costs z990_cost =
178 {
179 COSTS_N_INSNS (4), /* M */
180 COSTS_N_INSNS (2), /* MGHI */
181 COSTS_N_INSNS (2), /* MH */
182 COSTS_N_INSNS (2), /* MHI */
183 COSTS_N_INSNS (4), /* ML */
184 COSTS_N_INSNS (4), /* MR */
185 COSTS_N_INSNS (5), /* MS */
186 COSTS_N_INSNS (6), /* MSG */
187 COSTS_N_INSNS (4), /* MSGF */
188 COSTS_N_INSNS (4), /* MSGFR */
189 COSTS_N_INSNS (4), /* MSGR */
190 COSTS_N_INSNS (4), /* MSR */
191 COSTS_N_INSNS (1), /* multiplication in DFmode */
192 COSTS_N_INSNS (28), /* MXBR */
193 COSTS_N_INSNS (130), /* SQXBR */
194 COSTS_N_INSNS (66), /* SQDBR */
195 COSTS_N_INSNS (38), /* SQEBR */
196 COSTS_N_INSNS (1), /* MADBR */
197 COSTS_N_INSNS (1), /* MAEBR */
198 COSTS_N_INSNS (60), /* DXBR */
199 COSTS_N_INSNS (40), /* DDBR */
200 COSTS_N_INSNS (26), /* DEBR */
201 COSTS_N_INSNS (176), /* DLGR */
202 COSTS_N_INSNS (31), /* DLR */
203 COSTS_N_INSNS (31), /* DR */
204 COSTS_N_INSNS (31), /* DSGFR */
205 COSTS_N_INSNS (31), /* DSGR */
206 };
207
208 static const
209 struct processor_costs z9_109_cost =
210 {
211 COSTS_N_INSNS (4), /* M */
212 COSTS_N_INSNS (2), /* MGHI */
213 COSTS_N_INSNS (2), /* MH */
214 COSTS_N_INSNS (2), /* MHI */
215 COSTS_N_INSNS (4), /* ML */
216 COSTS_N_INSNS (4), /* MR */
217 COSTS_N_INSNS (5), /* MS */
218 COSTS_N_INSNS (6), /* MSG */
219 COSTS_N_INSNS (4), /* MSGF */
220 COSTS_N_INSNS (4), /* MSGFR */
221 COSTS_N_INSNS (4), /* MSGR */
222 COSTS_N_INSNS (4), /* MSR */
223 COSTS_N_INSNS (1), /* multiplication in DFmode */
224 COSTS_N_INSNS (28), /* MXBR */
225 COSTS_N_INSNS (130), /* SQXBR */
226 COSTS_N_INSNS (66), /* SQDBR */
227 COSTS_N_INSNS (38), /* SQEBR */
228 COSTS_N_INSNS (1), /* MADBR */
229 COSTS_N_INSNS (1), /* MAEBR */
230 COSTS_N_INSNS (60), /* DXBR */
231 COSTS_N_INSNS (40), /* DDBR */
232 COSTS_N_INSNS (26), /* DEBR */
233 COSTS_N_INSNS (30), /* DLGR */
234 COSTS_N_INSNS (23), /* DLR */
235 COSTS_N_INSNS (23), /* DR */
236 COSTS_N_INSNS (24), /* DSGFR */
237 COSTS_N_INSNS (24), /* DSGR */
238 };
239
240 static const
241 struct processor_costs z10_cost =
242 {
243 COSTS_N_INSNS (10), /* M */
244 COSTS_N_INSNS (10), /* MGHI */
245 COSTS_N_INSNS (10), /* MH */
246 COSTS_N_INSNS (10), /* MHI */
247 COSTS_N_INSNS (10), /* ML */
248 COSTS_N_INSNS (10), /* MR */
249 COSTS_N_INSNS (10), /* MS */
250 COSTS_N_INSNS (10), /* MSG */
251 COSTS_N_INSNS (10), /* MSGF */
252 COSTS_N_INSNS (10), /* MSGFR */
253 COSTS_N_INSNS (10), /* MSGR */
254 COSTS_N_INSNS (10), /* MSR */
255 COSTS_N_INSNS (1) , /* multiplication in DFmode */
256 COSTS_N_INSNS (50), /* MXBR */
257 COSTS_N_INSNS (120), /* SQXBR */
258 COSTS_N_INSNS (52), /* SQDBR */
259 COSTS_N_INSNS (38), /* SQEBR */
260 COSTS_N_INSNS (1), /* MADBR */
261 COSTS_N_INSNS (1), /* MAEBR */
262 COSTS_N_INSNS (111), /* DXBR */
263 COSTS_N_INSNS (39), /* DDBR */
264 COSTS_N_INSNS (32), /* DEBR */
265 COSTS_N_INSNS (160), /* DLGR */
266 COSTS_N_INSNS (71), /* DLR */
267 COSTS_N_INSNS (71), /* DR */
268 COSTS_N_INSNS (71), /* DSGFR */
269 COSTS_N_INSNS (71), /* DSGR */
270 };
271
272 static const
273 struct processor_costs z196_cost =
274 {
275 COSTS_N_INSNS (7), /* M */
276 COSTS_N_INSNS (5), /* MGHI */
277 COSTS_N_INSNS (5), /* MH */
278 COSTS_N_INSNS (5), /* MHI */
279 COSTS_N_INSNS (7), /* ML */
280 COSTS_N_INSNS (7), /* MR */
281 COSTS_N_INSNS (6), /* MS */
282 COSTS_N_INSNS (8), /* MSG */
283 COSTS_N_INSNS (6), /* MSGF */
284 COSTS_N_INSNS (6), /* MSGFR */
285 COSTS_N_INSNS (8), /* MSGR */
286 COSTS_N_INSNS (6), /* MSR */
287 COSTS_N_INSNS (1) , /* multiplication in DFmode */
288 COSTS_N_INSNS (40), /* MXBR B+40 */
289 COSTS_N_INSNS (100), /* SQXBR B+100 */
290 COSTS_N_INSNS (42), /* SQDBR B+42 */
291 COSTS_N_INSNS (28), /* SQEBR B+28 */
292 COSTS_N_INSNS (1), /* MADBR B */
293 COSTS_N_INSNS (1), /* MAEBR B */
294 COSTS_N_INSNS (101), /* DXBR B+101 */
295 COSTS_N_INSNS (29), /* DDBR */
296 COSTS_N_INSNS (22), /* DEBR */
297 COSTS_N_INSNS (160), /* DLGR cracked */
298 COSTS_N_INSNS (160), /* DLR cracked */
299 COSTS_N_INSNS (160), /* DR expanded */
300 COSTS_N_INSNS (160), /* DSGFR cracked */
301 COSTS_N_INSNS (160), /* DSGR cracked */
302 };
303
304 static const
305 struct processor_costs zEC12_cost =
306 {
307 COSTS_N_INSNS (7), /* M */
308 COSTS_N_INSNS (5), /* MGHI */
309 COSTS_N_INSNS (5), /* MH */
310 COSTS_N_INSNS (5), /* MHI */
311 COSTS_N_INSNS (7), /* ML */
312 COSTS_N_INSNS (7), /* MR */
313 COSTS_N_INSNS (6), /* MS */
314 COSTS_N_INSNS (8), /* MSG */
315 COSTS_N_INSNS (6), /* MSGF */
316 COSTS_N_INSNS (6), /* MSGFR */
317 COSTS_N_INSNS (8), /* MSGR */
318 COSTS_N_INSNS (6), /* MSR */
319 COSTS_N_INSNS (1) , /* multiplication in DFmode */
320 COSTS_N_INSNS (40), /* MXBR B+40 */
321 COSTS_N_INSNS (100), /* SQXBR B+100 */
322 COSTS_N_INSNS (42), /* SQDBR B+42 */
323 COSTS_N_INSNS (28), /* SQEBR B+28 */
324 COSTS_N_INSNS (1), /* MADBR B */
325 COSTS_N_INSNS (1), /* MAEBR B */
326 COSTS_N_INSNS (131), /* DXBR B+131 */
327 COSTS_N_INSNS (29), /* DDBR */
328 COSTS_N_INSNS (22), /* DEBR */
329 COSTS_N_INSNS (160), /* DLGR cracked */
330 COSTS_N_INSNS (160), /* DLR cracked */
331 COSTS_N_INSNS (160), /* DR expanded */
332 COSTS_N_INSNS (160), /* DSGFR cracked */
333 COSTS_N_INSNS (160), /* DSGR cracked */
334 };
335
336 extern int reload_completed;
337
338 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
339 static rtx_insn *last_scheduled_insn;
340
341 /* Structure used to hold the components of a S/390 memory
342 address. A legitimate address on S/390 is of the general
343 form
344 base + index + displacement
345 where any of the components is optional.
346
347 base and index are registers of the class ADDR_REGS,
348 displacement is an unsigned 12-bit immediate constant. */
349
350 struct s390_address
351 {
352 rtx base;
353 rtx indx;
354 rtx disp;
355 bool pointer;
356 bool literal_pool;
357 };
358
359 /* The following structure is embedded in the machine
360 specific part of struct function. */
361
362 struct GTY (()) s390_frame_layout
363 {
364 /* Offset within stack frame. */
365 HOST_WIDE_INT gprs_offset;
366 HOST_WIDE_INT f0_offset;
367 HOST_WIDE_INT f4_offset;
368 HOST_WIDE_INT f8_offset;
369 HOST_WIDE_INT backchain_offset;
370
371 /* Number of first and last gpr where slots in the register
372 save area are reserved for. */
373 int first_save_gpr_slot;
374 int last_save_gpr_slot;
375
376 /* Location (FP register number) where GPRs (r0-r15) should
377 be saved to.
378 0 - does not need to be saved at all
379 -1 - stack slot */
380 signed char gpr_save_slots[16];
381
382 /* Number of first and last gpr to be saved, restored. */
383 int first_save_gpr;
384 int first_restore_gpr;
385 int last_save_gpr;
386 int last_restore_gpr;
387
388 /* Bits standing for floating point registers. Set, if the
389 respective register has to be saved. Starting with reg 16 (f0)
390 at the rightmost bit.
391 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
392 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
393 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
394 unsigned int fpr_bitmap;
395
396 /* Number of floating point registers f8-f15 which must be saved. */
397 int high_fprs;
398
399 /* Set if return address needs to be saved.
400 This flag is set by s390_return_addr_rtx if it could not use
401 the initial value of r14 and therefore depends on r14 saved
402 to the stack. */
403 bool save_return_addr_p;
404
405 /* Size of stack frame. */
406 HOST_WIDE_INT frame_size;
407 };
408
409 /* Define the structure for the machine field in struct function. */
410
411 struct GTY(()) machine_function
412 {
413 struct s390_frame_layout frame_layout;
414
415 /* Literal pool base register. */
416 rtx base_reg;
417
418 /* True if we may need to perform branch splitting. */
419 bool split_branches_pending_p;
420
421 bool has_landing_pad_p;
422
423 /* True if the current function may contain a tbegin clobbering
424 FPRs. */
425 bool tbegin_p;
426 };
427
428 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
429
430 #define cfun_frame_layout (cfun->machine->frame_layout)
431 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
432 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
433 ? cfun_frame_layout.fpr_bitmap & 0x0f \
434 : cfun_frame_layout.fpr_bitmap & 0x03))
435 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
436 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
437 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
438 (1 << (REGNO - FPR0_REGNUM)))
439 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
440 (1 << (REGNO - FPR0_REGNUM))))
441 #define cfun_gpr_save_slot(REGNO) \
442 cfun->machine->frame_layout.gpr_save_slots[REGNO]
443
444 /* Number of GPRs and FPRs used for argument passing. */
445 #define GP_ARG_NUM_REG 5
446 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
447 #define VEC_ARG_NUM_REG 8
448
449 /* A couple of shortcuts. */
450 #define CONST_OK_FOR_J(x) \
451 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
452 #define CONST_OK_FOR_K(x) \
453 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
454 #define CONST_OK_FOR_Os(x) \
455 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
456 #define CONST_OK_FOR_Op(x) \
457 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
458 #define CONST_OK_FOR_On(x) \
459 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
460
461 #define REGNO_PAIR_OK(REGNO, MODE) \
462 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
463
464 /* That's the read ahead of the dynamic branch prediction unit in
465 bytes on a z10 (or higher) CPU. */
466 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
467
468 static const int s390_hotpatch_hw_max = 1000000;
469 static int s390_hotpatch_hw_before_label = 0;
470 static int s390_hotpatch_hw_after_label = 0;
471
472 /* Check whether the hotpatch attribute is applied to a function and, if it has
473 an argument, the argument is valid. */
474
475 static tree
476 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
477 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
478 {
479 tree expr;
480 tree expr2;
481 int err;
482
483 if (TREE_CODE (*node) != FUNCTION_DECL)
484 {
485 warning (OPT_Wattributes, "%qE attribute only applies to functions",
486 name);
487 *no_add_attrs = true;
488 }
489 if (args != NULL && TREE_CHAIN (args) != NULL)
490 {
491 expr = TREE_VALUE (args);
492 expr2 = TREE_VALUE (TREE_CHAIN (args));
493 }
494 if (args == NULL || TREE_CHAIN (args) == NULL)
495 err = 1;
496 else if (TREE_CODE (expr) != INTEGER_CST
497 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
498 || wi::gtu_p (expr, s390_hotpatch_hw_max))
499 err = 1;
500 else if (TREE_CODE (expr2) != INTEGER_CST
501 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
502 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
503 err = 1;
504 else
505 err = 0;
506 if (err)
507 {
508 error ("requested %qE attribute is not a comma separated pair of"
509 " non-negative integer constants or too large (max. %d)", name,
510 s390_hotpatch_hw_max);
511 *no_add_attrs = true;
512 }
513
514 return NULL_TREE;
515 }
516
517 static const struct attribute_spec s390_attribute_table[] = {
518 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false
519 },
520 /* End element. */
521 { NULL, 0, 0, false, false, false, NULL, false }
522 };
523
524 /* Return the alignment for LABEL. We default to the -falign-labels
525 value except for the literal pool base label. */
526 int
527 s390_label_align (rtx label)
528 {
529 rtx_insn *prev_insn = prev_active_insn (label);
530 rtx set, src;
531
532 if (prev_insn == NULL_RTX)
533 goto old;
534
535 set = single_set (prev_insn);
536
537 if (set == NULL_RTX)
538 goto old;
539
540 src = SET_SRC (set);
541
542 /* Don't align literal pool base labels. */
543 if (GET_CODE (src) == UNSPEC
544 && XINT (src, 1) == UNSPEC_MAIN_BASE)
545 return 0;
546
547 old:
548 return align_labels_log;
549 }
550
551 static machine_mode
552 s390_libgcc_cmp_return_mode (void)
553 {
554 return TARGET_64BIT ? DImode : SImode;
555 }
556
557 static machine_mode
558 s390_libgcc_shift_count_mode (void)
559 {
560 return TARGET_64BIT ? DImode : SImode;
561 }
562
563 static machine_mode
564 s390_unwind_word_mode (void)
565 {
566 return TARGET_64BIT ? DImode : SImode;
567 }
568
569 /* Return true if the back end supports mode MODE. */
570 static bool
571 s390_scalar_mode_supported_p (machine_mode mode)
572 {
573 /* In contrast to the default implementation reject TImode constants on 31bit
574 TARGET_ZARCH for ABI compliance. */
575 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
576 return false;
577
578 if (DECIMAL_FLOAT_MODE_P (mode))
579 return default_decimal_float_supported_p ();
580
581 return default_scalar_mode_supported_p (mode);
582 }
583
584 /* Return true if the back end supports vector mode MODE. */
585 static bool
586 s390_vector_mode_supported_p (machine_mode mode)
587 {
588 machine_mode inner;
589
590 if (!VECTOR_MODE_P (mode)
591 || !TARGET_VX
592 || GET_MODE_SIZE (mode) > 16)
593 return false;
594
595 inner = GET_MODE_INNER (mode);
596
597 switch (inner)
598 {
599 case QImode:
600 case HImode:
601 case SImode:
602 case DImode:
603 case TImode:
604 case SFmode:
605 case DFmode:
606 case TFmode:
607 return true;
608 default:
609 return false;
610 }
611 }
612
613 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
614
615 void
616 s390_set_has_landing_pad_p (bool value)
617 {
618 cfun->machine->has_landing_pad_p = value;
619 }
620
621 /* If two condition code modes are compatible, return a condition code
622 mode which is compatible with both. Otherwise, return
623 VOIDmode. */
624
625 static machine_mode
626 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
627 {
628 if (m1 == m2)
629 return m1;
630
631 switch (m1)
632 {
633 case CCZmode:
634 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
635 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
636 return m2;
637 return VOIDmode;
638
639 case CCSmode:
640 case CCUmode:
641 case CCTmode:
642 case CCSRmode:
643 case CCURmode:
644 case CCZ1mode:
645 if (m2 == CCZmode)
646 return m1;
647
648 return VOIDmode;
649
650 default:
651 return VOIDmode;
652 }
653 return VOIDmode;
654 }
655
656 /* Return true if SET either doesn't set the CC register, or else
657 the source and destination have matching CC modes and that
658 CC mode is at least as constrained as REQ_MODE. */
659
660 static bool
661 s390_match_ccmode_set (rtx set, machine_mode req_mode)
662 {
663 machine_mode set_mode;
664
665 gcc_assert (GET_CODE (set) == SET);
666
667 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
668 return 1;
669
670 set_mode = GET_MODE (SET_DEST (set));
671 switch (set_mode)
672 {
673 case CCSmode:
674 case CCSRmode:
675 case CCUmode:
676 case CCURmode:
677 case CCLmode:
678 case CCL1mode:
679 case CCL2mode:
680 case CCL3mode:
681 case CCT1mode:
682 case CCT2mode:
683 case CCT3mode:
684 case CCVEQmode:
685 case CCVFHmode:
686 case CCVFHEmode:
687 if (req_mode != set_mode)
688 return 0;
689 break;
690
691 case CCZmode:
692 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
693 && req_mode != CCSRmode && req_mode != CCURmode)
694 return 0;
695 break;
696
697 case CCAPmode:
698 case CCANmode:
699 if (req_mode != CCAmode)
700 return 0;
701 break;
702
703 default:
704 gcc_unreachable ();
705 }
706
707 return (GET_MODE (SET_SRC (set)) == set_mode);
708 }
709
710 /* Return true if every SET in INSN that sets the CC register
711 has source and destination with matching CC modes and that
712 CC mode is at least as constrained as REQ_MODE.
713 If REQ_MODE is VOIDmode, always return false. */
714
715 bool
716 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
717 {
718 int i;
719
720 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
721 if (req_mode == VOIDmode)
722 return false;
723
724 if (GET_CODE (PATTERN (insn)) == SET)
725 return s390_match_ccmode_set (PATTERN (insn), req_mode);
726
727 if (GET_CODE (PATTERN (insn)) == PARALLEL)
728 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
729 {
730 rtx set = XVECEXP (PATTERN (insn), 0, i);
731 if (GET_CODE (set) == SET)
732 if (!s390_match_ccmode_set (set, req_mode))
733 return false;
734 }
735
736 return true;
737 }
738
739 /* If a test-under-mask instruction can be used to implement
740 (compare (and ... OP1) OP2), return the CC mode required
741 to do that. Otherwise, return VOIDmode.
742 MIXED is true if the instruction can distinguish between
743 CC1 and CC2 for mixed selected bits (TMxx), it is false
744 if the instruction cannot (TM). */
745
746 machine_mode
747 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
748 {
749 int bit0, bit1;
750
751 /* ??? Fixme: should work on CONST_DOUBLE as well. */
752 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
753 return VOIDmode;
754
755 /* Selected bits all zero: CC0.
756 e.g.: int a; if ((a & (16 + 128)) == 0) */
757 if (INTVAL (op2) == 0)
758 return CCTmode;
759
760 /* Selected bits all one: CC3.
761 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
762 if (INTVAL (op2) == INTVAL (op1))
763 return CCT3mode;
764
765 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
766 int a;
767 if ((a & (16 + 128)) == 16) -> CCT1
768 if ((a & (16 + 128)) == 128) -> CCT2 */
769 if (mixed)
770 {
771 bit1 = exact_log2 (INTVAL (op2));
772 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
773 if (bit0 != -1 && bit1 != -1)
774 return bit0 > bit1 ? CCT1mode : CCT2mode;
775 }
776
777 return VOIDmode;
778 }
779
780 /* Given a comparison code OP (EQ, NE, etc.) and the operands
781 OP0 and OP1 of a COMPARE, return the mode to be used for the
782 comparison. */
783
784 machine_mode
785 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
786 {
787 if (TARGET_VX
788 && register_operand (op0, DFmode)
789 && register_operand (op1, DFmode))
790 {
791 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
792 s390_emit_compare or s390_canonicalize_comparison will take
793 care of it. */
794 switch (code)
795 {
796 case EQ:
797 case NE:
798 return CCVEQmode;
799 case GT:
800 case UNLE:
801 return CCVFHmode;
802 case GE:
803 case UNLT:
804 return CCVFHEmode;
805 default:
806 ;
807 }
808 }
809
810 switch (code)
811 {
812 case EQ:
813 case NE:
814 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
815 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
816 return CCAPmode;
817 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
818 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
819 return CCAPmode;
820 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
821 || GET_CODE (op1) == NEG)
822 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
823 return CCLmode;
824
825 if (GET_CODE (op0) == AND)
826 {
827 /* Check whether we can potentially do it via TM. */
828 machine_mode ccmode;
829 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
830 if (ccmode != VOIDmode)
831 {
832 /* Relax CCTmode to CCZmode to allow fall-back to AND
833 if that turns out to be beneficial. */
834 return ccmode == CCTmode ? CCZmode : ccmode;
835 }
836 }
837
838 if (register_operand (op0, HImode)
839 && GET_CODE (op1) == CONST_INT
840 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
841 return CCT3mode;
842 if (register_operand (op0, QImode)
843 && GET_CODE (op1) == CONST_INT
844 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
845 return CCT3mode;
846
847 return CCZmode;
848
849 case LE:
850 case LT:
851 case GE:
852 case GT:
853 /* The only overflow condition of NEG and ABS happens when
854 -INT_MAX is used as parameter, which stays negative. So
855 we have an overflow from a positive value to a negative.
856 Using CCAP mode the resulting cc can be used for comparisons. */
857 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
858 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
859 return CCAPmode;
860
861 /* If constants are involved in an add instruction it is possible to use
862 the resulting cc for comparisons with zero. Knowing the sign of the
863 constant the overflow behavior gets predictable. e.g.:
864 int a, b; if ((b = a + c) > 0)
865 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
866 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
867 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
868 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
869 /* Avoid INT32_MIN on 32 bit. */
870 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
871 {
872 if (INTVAL (XEXP((op0), 1)) < 0)
873 return CCANmode;
874 else
875 return CCAPmode;
876 }
877 /* Fall through. */
878 case UNORDERED:
879 case ORDERED:
880 case UNEQ:
881 case UNLE:
882 case UNLT:
883 case UNGE:
884 case UNGT:
885 case LTGT:
886 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
887 && GET_CODE (op1) != CONST_INT)
888 return CCSRmode;
889 return CCSmode;
890
891 case LTU:
892 case GEU:
893 if (GET_CODE (op0) == PLUS
894 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
895 return CCL1mode;
896
897 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
898 && GET_CODE (op1) != CONST_INT)
899 return CCURmode;
900 return CCUmode;
901
902 case LEU:
903 case GTU:
904 if (GET_CODE (op0) == MINUS
905 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
906 return CCL2mode;
907
908 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
909 && GET_CODE (op1) != CONST_INT)
910 return CCURmode;
911 return CCUmode;
912
913 default:
914 gcc_unreachable ();
915 }
916 }
917
918 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
919 that we can implement more efficiently. */
920
921 static void
922 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
923 bool op0_preserve_value)
924 {
925 if (op0_preserve_value)
926 return;
927
928 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
929 if ((*code == EQ || *code == NE)
930 && *op1 == const0_rtx
931 && GET_CODE (*op0) == ZERO_EXTRACT
932 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
933 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
934 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
935 {
936 rtx inner = XEXP (*op0, 0);
937 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
938 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
939 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
940
941 if (len > 0 && len < modesize
942 && pos >= 0 && pos + len <= modesize
943 && modesize <= HOST_BITS_PER_WIDE_INT)
944 {
945 unsigned HOST_WIDE_INT block;
946 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
947 block <<= modesize - pos - len;
948
949 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
950 gen_int_mode (block, GET_MODE (inner)));
951 }
952 }
953
954 /* Narrow AND of memory against immediate to enable TM. */
955 if ((*code == EQ || *code == NE)
956 && *op1 == const0_rtx
957 && GET_CODE (*op0) == AND
958 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
959 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
960 {
961 rtx inner = XEXP (*op0, 0);
962 rtx mask = XEXP (*op0, 1);
963
964 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
965 if (GET_CODE (inner) == SUBREG
966 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
967 && (GET_MODE_SIZE (GET_MODE (inner))
968 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
969 && ((INTVAL (mask)
970 & GET_MODE_MASK (GET_MODE (inner))
971 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
972 == 0))
973 inner = SUBREG_REG (inner);
974
975 /* Do not change volatile MEMs. */
976 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
977 {
978 int part = s390_single_part (XEXP (*op0, 1),
979 GET_MODE (inner), QImode, 0);
980 if (part >= 0)
981 {
982 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
983 inner = adjust_address_nv (inner, QImode, part);
984 *op0 = gen_rtx_AND (QImode, inner, mask);
985 }
986 }
987 }
988
989 /* Narrow comparisons against 0xffff to HImode if possible. */
990 if ((*code == EQ || *code == NE)
991 && GET_CODE (*op1) == CONST_INT
992 && INTVAL (*op1) == 0xffff
993 && SCALAR_INT_MODE_P (GET_MODE (*op0))
994 && (nonzero_bits (*op0, GET_MODE (*op0))
995 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
996 {
997 *op0 = gen_lowpart (HImode, *op0);
998 *op1 = constm1_rtx;
999 }
1000
1001 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1002 if (GET_CODE (*op0) == UNSPEC
1003 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1004 && XVECLEN (*op0, 0) == 1
1005 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1006 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1007 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1008 && *op1 == const0_rtx)
1009 {
1010 enum rtx_code new_code = UNKNOWN;
1011 switch (*code)
1012 {
1013 case EQ: new_code = EQ; break;
1014 case NE: new_code = NE; break;
1015 case LT: new_code = GTU; break;
1016 case GT: new_code = LTU; break;
1017 case LE: new_code = GEU; break;
1018 case GE: new_code = LEU; break;
1019 default: break;
1020 }
1021
1022 if (new_code != UNKNOWN)
1023 {
1024 *op0 = XVECEXP (*op0, 0, 0);
1025 *code = new_code;
1026 }
1027 }
1028
1029 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1030 if (GET_CODE (*op0) == UNSPEC
1031 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1032 && XVECLEN (*op0, 0) == 1
1033 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1034 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1035 && CONST_INT_P (*op1))
1036 {
1037 enum rtx_code new_code = UNKNOWN;
1038 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1039 {
1040 case CCZmode:
1041 case CCRAWmode:
1042 switch (*code)
1043 {
1044 case EQ: new_code = EQ; break;
1045 case NE: new_code = NE; break;
1046 default: break;
1047 }
1048 break;
1049 default: break;
1050 }
1051
1052 if (new_code != UNKNOWN)
1053 {
1054 /* For CCRAWmode put the required cc mask into the second
1055 operand. */
1056 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1057 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1058 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1059 *op0 = XVECEXP (*op0, 0, 0);
1060 *code = new_code;
1061 }
1062 }
1063
1064 /* Simplify cascaded EQ, NE with const0_rtx. */
1065 if ((*code == NE || *code == EQ)
1066 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1067 && GET_MODE (*op0) == SImode
1068 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1069 && REG_P (XEXP (*op0, 0))
1070 && XEXP (*op0, 1) == const0_rtx
1071 && *op1 == const0_rtx)
1072 {
1073 if ((*code == EQ && GET_CODE (*op0) == NE)
1074 || (*code == NE && GET_CODE (*op0) == EQ))
1075 *code = EQ;
1076 else
1077 *code = NE;
1078 *op0 = XEXP (*op0, 0);
1079 }
1080
1081 /* Prefer register over memory as first operand. */
1082 if (MEM_P (*op0) && REG_P (*op1))
1083 {
1084 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1085 *code = (int)swap_condition ((enum rtx_code)*code);
1086 }
1087
1088 /* Using the scalar variants of vector instructions for 64 bit FP
1089 comparisons might require swapping the operands. */
1090 if (TARGET_VX
1091 && register_operand (*op0, DFmode)
1092 && register_operand (*op1, DFmode)
1093 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1094 {
1095 rtx tmp;
1096
1097 switch (*code)
1098 {
1099 case LT: *code = GT; break;
1100 case LE: *code = GE; break;
1101 case UNGT: *code = UNLE; break;
1102 case UNGE: *code = UNLT; break;
1103 default: ;
1104 }
1105 tmp = *op0; *op0 = *op1; *op1 = tmp;
1106 }
1107 }
1108
1109 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1110 FP compare using the single element variant of vector instructions.
1111 Replace CODE with the comparison code to be used in the CC reg
1112 compare and return the condition code register RTX in CC. */
1113
1114 static bool
1115 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1116 rtx *cc)
1117 {
1118 machine_mode cmp_mode;
1119 bool swap_p = false;
1120
1121 switch (*code)
1122 {
1123 case EQ: cmp_mode = CCVEQmode; break;
1124 case NE: cmp_mode = CCVEQmode; break;
1125 case GT: cmp_mode = CCVFHmode; break;
1126 case GE: cmp_mode = CCVFHEmode; break;
1127 case UNLE: cmp_mode = CCVFHmode; break;
1128 case UNLT: cmp_mode = CCVFHEmode; break;
1129 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1130 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1131 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1132 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1133 default: return false;
1134 }
1135
1136 if (swap_p)
1137 {
1138 rtx tmp = cmp2;
1139 cmp2 = cmp1;
1140 cmp1 = tmp;
1141 }
1142 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1143 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1144 gen_rtvec (2,
1145 gen_rtx_SET (*cc,
1146 gen_rtx_COMPARE (cmp_mode, cmp1,
1147 cmp2)),
1148 gen_rtx_CLOBBER (VOIDmode,
1149 gen_rtx_SCRATCH (V2DImode)))));
1150 return true;
1151 }
1152
1153
1154 /* Emit a compare instruction suitable to implement the comparison
1155 OP0 CODE OP1. Return the correct condition RTL to be placed in
1156 the IF_THEN_ELSE of the conditional branch testing the result. */
1157
1158 rtx
1159 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1160 {
1161 machine_mode mode = s390_select_ccmode (code, op0, op1);
1162 rtx cc;
1163
1164 if (TARGET_VX
1165 && register_operand (op0, DFmode)
1166 && register_operand (op1, DFmode)
1167 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1168 {
1169 /* Work has been done by s390_expand_vec_compare_scalar already. */
1170 }
1171 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1172 {
1173 /* Do not output a redundant compare instruction if a
1174 compare_and_swap pattern already computed the result and the
1175 machine modes are compatible. */
1176 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1177 == GET_MODE (op0));
1178 cc = op0;
1179 }
1180 else
1181 {
1182 cc = gen_rtx_REG (mode, CC_REGNUM);
1183 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1184 }
1185
1186 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1187 }
1188
1189 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1190 matches CMP.
1191 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1192 conditional branch testing the result. */
1193
1194 static rtx
1195 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1196 rtx cmp, rtx new_rtx)
1197 {
1198 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1199 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1200 const0_rtx);
1201 }
1202
1203 /* Emit a jump instruction to TARGET and return it. If COND is
1204 NULL_RTX, emit an unconditional jump, else a conditional jump under
1205 condition COND. */
1206
1207 rtx_insn *
1208 s390_emit_jump (rtx target, rtx cond)
1209 {
1210 rtx insn;
1211
1212 target = gen_rtx_LABEL_REF (VOIDmode, target);
1213 if (cond)
1214 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1215
1216 insn = gen_rtx_SET (pc_rtx, target);
1217 return emit_jump_insn (insn);
1218 }
1219
1220 /* Return branch condition mask to implement a branch
1221 specified by CODE. Return -1 for invalid comparisons. */
1222
1223 int
1224 s390_branch_condition_mask (rtx code)
1225 {
1226 const int CC0 = 1 << 3;
1227 const int CC1 = 1 << 2;
1228 const int CC2 = 1 << 1;
1229 const int CC3 = 1 << 0;
1230
1231 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1232 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1233 gcc_assert (XEXP (code, 1) == const0_rtx
1234 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1235 && CONST_INT_P (XEXP (code, 1))));
1236
1237
1238 switch (GET_MODE (XEXP (code, 0)))
1239 {
1240 case CCZmode:
1241 case CCZ1mode:
1242 switch (GET_CODE (code))
1243 {
1244 case EQ: return CC0;
1245 case NE: return CC1 | CC2 | CC3;
1246 default: return -1;
1247 }
1248 break;
1249
1250 case CCT1mode:
1251 switch (GET_CODE (code))
1252 {
1253 case EQ: return CC1;
1254 case NE: return CC0 | CC2 | CC3;
1255 default: return -1;
1256 }
1257 break;
1258
1259 case CCT2mode:
1260 switch (GET_CODE (code))
1261 {
1262 case EQ: return CC2;
1263 case NE: return CC0 | CC1 | CC3;
1264 default: return -1;
1265 }
1266 break;
1267
1268 case CCT3mode:
1269 switch (GET_CODE (code))
1270 {
1271 case EQ: return CC3;
1272 case NE: return CC0 | CC1 | CC2;
1273 default: return -1;
1274 }
1275 break;
1276
1277 case CCLmode:
1278 switch (GET_CODE (code))
1279 {
1280 case EQ: return CC0 | CC2;
1281 case NE: return CC1 | CC3;
1282 default: return -1;
1283 }
1284 break;
1285
1286 case CCL1mode:
1287 switch (GET_CODE (code))
1288 {
1289 case LTU: return CC2 | CC3; /* carry */
1290 case GEU: return CC0 | CC1; /* no carry */
1291 default: return -1;
1292 }
1293 break;
1294
1295 case CCL2mode:
1296 switch (GET_CODE (code))
1297 {
1298 case GTU: return CC0 | CC1; /* borrow */
1299 case LEU: return CC2 | CC3; /* no borrow */
1300 default: return -1;
1301 }
1302 break;
1303
1304 case CCL3mode:
1305 switch (GET_CODE (code))
1306 {
1307 case EQ: return CC0 | CC2;
1308 case NE: return CC1 | CC3;
1309 case LTU: return CC1;
1310 case GTU: return CC3;
1311 case LEU: return CC1 | CC2;
1312 case GEU: return CC2 | CC3;
1313 default: return -1;
1314 }
1315
1316 case CCUmode:
1317 switch (GET_CODE (code))
1318 {
1319 case EQ: return CC0;
1320 case NE: return CC1 | CC2 | CC3;
1321 case LTU: return CC1;
1322 case GTU: return CC2;
1323 case LEU: return CC0 | CC1;
1324 case GEU: return CC0 | CC2;
1325 default: return -1;
1326 }
1327 break;
1328
1329 case CCURmode:
1330 switch (GET_CODE (code))
1331 {
1332 case EQ: return CC0;
1333 case NE: return CC2 | CC1 | CC3;
1334 case LTU: return CC2;
1335 case GTU: return CC1;
1336 case LEU: return CC0 | CC2;
1337 case GEU: return CC0 | CC1;
1338 default: return -1;
1339 }
1340 break;
1341
1342 case CCAPmode:
1343 switch (GET_CODE (code))
1344 {
1345 case EQ: return CC0;
1346 case NE: return CC1 | CC2 | CC3;
1347 case LT: return CC1 | CC3;
1348 case GT: return CC2;
1349 case LE: return CC0 | CC1 | CC3;
1350 case GE: return CC0 | CC2;
1351 default: return -1;
1352 }
1353 break;
1354
1355 case CCANmode:
1356 switch (GET_CODE (code))
1357 {
1358 case EQ: return CC0;
1359 case NE: return CC1 | CC2 | CC3;
1360 case LT: return CC1;
1361 case GT: return CC2 | CC3;
1362 case LE: return CC0 | CC1;
1363 case GE: return CC0 | CC2 | CC3;
1364 default: return -1;
1365 }
1366 break;
1367
1368 case CCSmode:
1369 switch (GET_CODE (code))
1370 {
1371 case EQ: return CC0;
1372 case NE: return CC1 | CC2 | CC3;
1373 case LT: return CC1;
1374 case GT: return CC2;
1375 case LE: return CC0 | CC1;
1376 case GE: return CC0 | CC2;
1377 case UNORDERED: return CC3;
1378 case ORDERED: return CC0 | CC1 | CC2;
1379 case UNEQ: return CC0 | CC3;
1380 case UNLT: return CC1 | CC3;
1381 case UNGT: return CC2 | CC3;
1382 case UNLE: return CC0 | CC1 | CC3;
1383 case UNGE: return CC0 | CC2 | CC3;
1384 case LTGT: return CC1 | CC2;
1385 default: return -1;
1386 }
1387 break;
1388
1389 case CCSRmode:
1390 switch (GET_CODE (code))
1391 {
1392 case EQ: return CC0;
1393 case NE: return CC2 | CC1 | CC3;
1394 case LT: return CC2;
1395 case GT: return CC1;
1396 case LE: return CC0 | CC2;
1397 case GE: return CC0 | CC1;
1398 case UNORDERED: return CC3;
1399 case ORDERED: return CC0 | CC2 | CC1;
1400 case UNEQ: return CC0 | CC3;
1401 case UNLT: return CC2 | CC3;
1402 case UNGT: return CC1 | CC3;
1403 case UNLE: return CC0 | CC2 | CC3;
1404 case UNGE: return CC0 | CC1 | CC3;
1405 case LTGT: return CC2 | CC1;
1406 default: return -1;
1407 }
1408 break;
1409
1410 /* Vector comparison modes. */
1411
1412 case CCVEQmode:
1413 switch (GET_CODE (code))
1414 {
1415 case EQ: return CC0;
1416 case NE: return CC3;
1417 default: return -1;
1418 }
1419 /* FP vector compare modes. */
1420
1421 case CCVFHmode:
1422 switch (GET_CODE (code))
1423 {
1424 case GT: return CC0;
1425 case UNLE: return CC3;
1426 default: return -1;
1427 }
1428 case CCVFHEmode:
1429 switch (GET_CODE (code))
1430 {
1431 case GE: return CC0;
1432 case UNLT: return CC3;
1433 default: return -1;
1434 }
1435 case CCRAWmode:
1436 switch (GET_CODE (code))
1437 {
1438 case EQ:
1439 return INTVAL (XEXP (code, 1));
1440 case NE:
1441 return (INTVAL (XEXP (code, 1))) ^ 0xf;
1442 default:
1443 gcc_unreachable ();
1444 }
1445
1446 default:
1447 return -1;
1448 }
1449 }
1450
1451
1452 /* Return branch condition mask to implement a compare and branch
1453 specified by CODE. Return -1 for invalid comparisons. */
1454
1455 int
1456 s390_compare_and_branch_condition_mask (rtx code)
1457 {
1458 const int CC0 = 1 << 3;
1459 const int CC1 = 1 << 2;
1460 const int CC2 = 1 << 1;
1461
1462 switch (GET_CODE (code))
1463 {
1464 case EQ:
1465 return CC0;
1466 case NE:
1467 return CC1 | CC2;
1468 case LT:
1469 case LTU:
1470 return CC1;
1471 case GT:
1472 case GTU:
1473 return CC2;
1474 case LE:
1475 case LEU:
1476 return CC0 | CC1;
1477 case GE:
1478 case GEU:
1479 return CC0 | CC2;
1480 default:
1481 gcc_unreachable ();
1482 }
1483 return -1;
1484 }
1485
1486 /* If INV is false, return assembler mnemonic string to implement
1487 a branch specified by CODE. If INV is true, return mnemonic
1488 for the corresponding inverted branch. */
1489
1490 static const char *
1491 s390_branch_condition_mnemonic (rtx code, int inv)
1492 {
1493 int mask;
1494
1495 static const char *const mnemonic[16] =
1496 {
1497 NULL, "o", "h", "nle",
1498 "l", "nhe", "lh", "ne",
1499 "e", "nlh", "he", "nl",
1500 "le", "nh", "no", NULL
1501 };
1502
1503 if (GET_CODE (XEXP (code, 0)) == REG
1504 && REGNO (XEXP (code, 0)) == CC_REGNUM
1505 && (XEXP (code, 1) == const0_rtx
1506 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1507 && CONST_INT_P (XEXP (code, 1)))))
1508 mask = s390_branch_condition_mask (code);
1509 else
1510 mask = s390_compare_and_branch_condition_mask (code);
1511
1512 gcc_assert (mask >= 0);
1513
1514 if (inv)
1515 mask ^= 15;
1516
1517 gcc_assert (mask >= 1 && mask <= 14);
1518
1519 return mnemonic[mask];
1520 }
1521
1522 /* Return the part of op which has a value different from def.
1523 The size of the part is determined by mode.
1524 Use this function only if you already know that op really
1525 contains such a part. */
1526
1527 unsigned HOST_WIDE_INT
1528 s390_extract_part (rtx op, machine_mode mode, int def)
1529 {
1530 unsigned HOST_WIDE_INT value = 0;
1531 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1532 int part_bits = GET_MODE_BITSIZE (mode);
1533 unsigned HOST_WIDE_INT part_mask
1534 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1535 int i;
1536
1537 for (i = 0; i < max_parts; i++)
1538 {
1539 if (i == 0)
1540 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1541 else
1542 value >>= part_bits;
1543
1544 if ((value & part_mask) != (def & part_mask))
1545 return value & part_mask;
1546 }
1547
1548 gcc_unreachable ();
1549 }
1550
1551 /* If OP is an integer constant of mode MODE with exactly one
1552 part of mode PART_MODE unequal to DEF, return the number of that
1553 part. Otherwise, return -1. */
1554
1555 int
1556 s390_single_part (rtx op,
1557 machine_mode mode,
1558 machine_mode part_mode,
1559 int def)
1560 {
1561 unsigned HOST_WIDE_INT value = 0;
1562 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1563 unsigned HOST_WIDE_INT part_mask
1564 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1565 int i, part = -1;
1566
1567 if (GET_CODE (op) != CONST_INT)
1568 return -1;
1569
1570 for (i = 0; i < n_parts; i++)
1571 {
1572 if (i == 0)
1573 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1574 else
1575 value >>= GET_MODE_BITSIZE (part_mode);
1576
1577 if ((value & part_mask) != (def & part_mask))
1578 {
1579 if (part != -1)
1580 return -1;
1581 else
1582 part = i;
1583 }
1584 }
1585 return part == -1 ? -1 : n_parts - 1 - part;
1586 }
1587
1588 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1589 bits and no other bits are set in IN. POS and LENGTH can be used
1590 to obtain the start position and the length of the bitfield.
1591
1592 POS gives the position of the first bit of the bitfield counting
1593 from the lowest order bit starting with zero. In order to use this
1594 value for S/390 instructions this has to be converted to "bits big
1595 endian" style. */
1596
1597 bool
1598 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1599 int *pos, int *length)
1600 {
1601 int tmp_pos = 0;
1602 int tmp_length = 0;
1603 int i;
1604 unsigned HOST_WIDE_INT mask = 1ULL;
1605 bool contiguous = false;
1606
1607 for (i = 0; i < size; mask <<= 1, i++)
1608 {
1609 if (contiguous)
1610 {
1611 if (mask & in)
1612 tmp_length++;
1613 else
1614 break;
1615 }
1616 else
1617 {
1618 if (mask & in)
1619 {
1620 contiguous = true;
1621 tmp_length++;
1622 }
1623 else
1624 tmp_pos++;
1625 }
1626 }
1627
1628 if (!tmp_length)
1629 return false;
1630
1631 /* Calculate a mask for all bits beyond the contiguous bits. */
1632 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1633
1634 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
1635 mask &= (HOST_WIDE_INT_1U << size) - 1;
1636
1637 if (mask & in)
1638 return false;
1639
1640 if (tmp_length + tmp_pos - 1 > size)
1641 return false;
1642
1643 if (length)
1644 *length = tmp_length;
1645
1646 if (pos)
1647 *pos = tmp_pos;
1648
1649 return true;
1650 }
1651
1652 /* Return true if OP contains the same contiguous bitfield in *all*
1653 its elements. START and END can be used to obtain the start and
1654 end position of the bitfield.
1655
1656 START/STOP give the position of the first/last bit of the bitfield
1657 counting from the lowest order bit starting with zero. In order to
1658 use these values for S/390 instructions this has to be converted to
1659 "bits big endian" style. */
1660
1661 bool
1662 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
1663 {
1664 unsigned HOST_WIDE_INT mask;
1665 int length, size;
1666
1667 if (!VECTOR_MODE_P (GET_MODE (op))
1668 || GET_CODE (op) != CONST_VECTOR
1669 || !CONST_INT_P (XVECEXP (op, 0, 0)))
1670 return false;
1671
1672 if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
1673 {
1674 int i;
1675
1676 for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
1677 if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
1678 return false;
1679 }
1680
1681 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
1682 mask = UINTVAL (XVECEXP (op, 0, 0));
1683 if (s390_contiguous_bitmask_p (mask, size, start,
1684 end != NULL ? &length : NULL))
1685 {
1686 if (end != NULL)
1687 *end = *start + length - 1;
1688 return true;
1689 }
1690 /* 0xff00000f style immediates can be covered by swapping start and
1691 end indices in vgm. */
1692 if (s390_contiguous_bitmask_p (~mask, size, start,
1693 end != NULL ? &length : NULL))
1694 {
1695 if (end != NULL)
1696 *end = *start - 1;
1697 if (start != NULL)
1698 *start = *start + length;
1699 return true;
1700 }
1701 return false;
1702 }
1703
1704 /* Return true if C consists only of byte chunks being either 0 or
1705 0xff. If MASK is !=NULL a byte mask is generated which is
1706 appropriate for the vector generate byte mask instruction. */
1707
1708 bool
1709 s390_bytemask_vector_p (rtx op, unsigned *mask)
1710 {
1711 int i;
1712 unsigned tmp_mask = 0;
1713 int nunit, unit_size;
1714
1715 if (!VECTOR_MODE_P (GET_MODE (op))
1716 || GET_CODE (op) != CONST_VECTOR
1717 || !CONST_INT_P (XVECEXP (op, 0, 0)))
1718 return false;
1719
1720 nunit = GET_MODE_NUNITS (GET_MODE (op));
1721 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
1722
1723 for (i = 0; i < nunit; i++)
1724 {
1725 unsigned HOST_WIDE_INT c;
1726 int j;
1727
1728 if (!CONST_INT_P (XVECEXP (op, 0, i)))
1729 return false;
1730
1731 c = UINTVAL (XVECEXP (op, 0, i));
1732 for (j = 0; j < unit_size; j++)
1733 {
1734 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
1735 return false;
1736 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
1737 c = c >> BITS_PER_UNIT;
1738 }
1739 }
1740
1741 if (mask != NULL)
1742 *mask = tmp_mask;
1743
1744 return true;
1745 }
1746
1747 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1748 equivalent to a shift followed by the AND. In particular, CONTIG
1749 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1750 for ROTL indicate a rotate to the right. */
1751
1752 bool
1753 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1754 {
1755 int pos, len;
1756 bool ok;
1757
1758 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1759 gcc_assert (ok);
1760
1761 return ((rotl >= 0 && rotl <= pos)
1762 || (rotl < 0 && -rotl <= bitsize - len - pos));
1763 }
1764
1765 /* Check whether we can (and want to) split a double-word
1766 move in mode MODE from SRC to DST into two single-word
1767 moves, moving the subword FIRST_SUBWORD first. */
1768
1769 bool
1770 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
1771 {
1772 /* Floating point and vector registers cannot be split. */
1773 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
1774 return false;
1775
1776 /* We don't need to split if operands are directly accessible. */
1777 if (s_operand (src, mode) || s_operand (dst, mode))
1778 return false;
1779
1780 /* Non-offsettable memory references cannot be split. */
1781 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1782 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1783 return false;
1784
1785 /* Moving the first subword must not clobber a register
1786 needed to move the second subword. */
1787 if (register_operand (dst, mode))
1788 {
1789 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1790 if (reg_overlap_mentioned_p (subreg, src))
1791 return false;
1792 }
1793
1794 return true;
1795 }
1796
1797 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1798 and [MEM2, MEM2 + SIZE] do overlap and false
1799 otherwise. */
1800
1801 bool
1802 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1803 {
1804 rtx addr1, addr2, addr_delta;
1805 HOST_WIDE_INT delta;
1806
1807 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1808 return true;
1809
1810 if (size == 0)
1811 return false;
1812
1813 addr1 = XEXP (mem1, 0);
1814 addr2 = XEXP (mem2, 0);
1815
1816 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1817
1818 /* This overlapping check is used by peepholes merging memory block operations.
1819 Overlapping operations would otherwise be recognized by the S/390 hardware
1820 and would fall back to a slower implementation. Allowing overlapping
1821 operations would lead to slow code but not to wrong code. Therefore we are
1822 somewhat optimistic if we cannot prove that the memory blocks are
1823 overlapping.
1824 That's why we return false here although this may accept operations on
1825 overlapping memory areas. */
1826 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1827 return false;
1828
1829 delta = INTVAL (addr_delta);
1830
1831 if (delta == 0
1832 || (delta > 0 && delta < size)
1833 || (delta < 0 && -delta < size))
1834 return true;
1835
1836 return false;
1837 }
1838
1839 /* Check whether the address of memory reference MEM2 equals exactly
1840 the address of memory reference MEM1 plus DELTA. Return true if
1841 we can prove this to be the case, false otherwise. */
1842
1843 bool
1844 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1845 {
1846 rtx addr1, addr2, addr_delta;
1847
1848 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1849 return false;
1850
1851 addr1 = XEXP (mem1, 0);
1852 addr2 = XEXP (mem2, 0);
1853
1854 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1855 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1856 return false;
1857
1858 return true;
1859 }
1860
1861 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1862
1863 void
1864 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
1865 rtx *operands)
1866 {
1867 machine_mode wmode = mode;
1868 rtx dst = operands[0];
1869 rtx src1 = operands[1];
1870 rtx src2 = operands[2];
1871 rtx op, clob, tem;
1872
1873 /* If we cannot handle the operation directly, use a temp register. */
1874 if (!s390_logical_operator_ok_p (operands))
1875 dst = gen_reg_rtx (mode);
1876
1877 /* QImode and HImode patterns make sense only if we have a destination
1878 in memory. Otherwise perform the operation in SImode. */
1879 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1880 wmode = SImode;
1881
1882 /* Widen operands if required. */
1883 if (mode != wmode)
1884 {
1885 if (GET_CODE (dst) == SUBREG
1886 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1887 dst = tem;
1888 else if (REG_P (dst))
1889 dst = gen_rtx_SUBREG (wmode, dst, 0);
1890 else
1891 dst = gen_reg_rtx (wmode);
1892
1893 if (GET_CODE (src1) == SUBREG
1894 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1895 src1 = tem;
1896 else if (GET_MODE (src1) != VOIDmode)
1897 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1898
1899 if (GET_CODE (src2) == SUBREG
1900 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1901 src2 = tem;
1902 else if (GET_MODE (src2) != VOIDmode)
1903 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1904 }
1905
1906 /* Emit the instruction. */
1907 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1908 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1909 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1910
1911 /* Fix up the destination if needed. */
1912 if (dst != operands[0])
1913 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1914 }
1915
1916 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1917
1918 bool
1919 s390_logical_operator_ok_p (rtx *operands)
1920 {
1921 /* If the destination operand is in memory, it needs to coincide
1922 with one of the source operands. After reload, it has to be
1923 the first source operand. */
1924 if (GET_CODE (operands[0]) == MEM)
1925 return rtx_equal_p (operands[0], operands[1])
1926 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1927
1928 return true;
1929 }
1930
1931 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1932 operand IMMOP to switch from SS to SI type instructions. */
1933
1934 void
1935 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1936 {
1937 int def = code == AND ? -1 : 0;
1938 HOST_WIDE_INT mask;
1939 int part;
1940
1941 gcc_assert (GET_CODE (*memop) == MEM);
1942 gcc_assert (!MEM_VOLATILE_P (*memop));
1943
1944 mask = s390_extract_part (*immop, QImode, def);
1945 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1946 gcc_assert (part >= 0);
1947
1948 *memop = adjust_address (*memop, QImode, part);
1949 *immop = gen_int_mode (mask, QImode);
1950 }
1951
1952
1953 /* How to allocate a 'struct machine_function'. */
1954
1955 static struct machine_function *
1956 s390_init_machine_status (void)
1957 {
1958 return ggc_cleared_alloc<machine_function> ();
1959 }
1960
1961 /* Map for smallest class containing reg regno. */
1962
1963 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1964 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
1965 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
1966 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
1967 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
1968 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
1969 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
1970 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
1971 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
1972 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
1973 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
1974 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
1975 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
1976 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
1977 VEC_REGS, VEC_REGS /* 52 */
1978 };
1979
1980 /* Return attribute type of insn. */
1981
1982 static enum attr_type
1983 s390_safe_attr_type (rtx_insn *insn)
1984 {
1985 if (recog_memoized (insn) >= 0)
1986 return get_attr_type (insn);
1987 else
1988 return TYPE_NONE;
1989 }
1990
1991 /* Return true if DISP is a valid short displacement. */
1992
1993 static bool
1994 s390_short_displacement (rtx disp)
1995 {
1996 /* No displacement is OK. */
1997 if (!disp)
1998 return true;
1999
2000 /* Without the long displacement facility we don't need to
2001 distingiush between long and short displacement. */
2002 if (!TARGET_LONG_DISPLACEMENT)
2003 return true;
2004
2005 /* Integer displacement in range. */
2006 if (GET_CODE (disp) == CONST_INT)
2007 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2008
2009 /* GOT offset is not OK, the GOT can be large. */
2010 if (GET_CODE (disp) == CONST
2011 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2012 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2013 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2014 return false;
2015
2016 /* All other symbolic constants are literal pool references,
2017 which are OK as the literal pool must be small. */
2018 if (GET_CODE (disp) == CONST)
2019 return true;
2020
2021 return false;
2022 }
2023
2024 /* Decompose a RTL expression ADDR for a memory address into
2025 its components, returned in OUT.
2026
2027 Returns false if ADDR is not a valid memory address, true
2028 otherwise. If OUT is NULL, don't return the components,
2029 but check for validity only.
2030
2031 Note: Only addresses in canonical form are recognized.
2032 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2033 canonical form so that they will be recognized. */
2034
2035 static int
2036 s390_decompose_address (rtx addr, struct s390_address *out)
2037 {
2038 HOST_WIDE_INT offset = 0;
2039 rtx base = NULL_RTX;
2040 rtx indx = NULL_RTX;
2041 rtx disp = NULL_RTX;
2042 rtx orig_disp;
2043 bool pointer = false;
2044 bool base_ptr = false;
2045 bool indx_ptr = false;
2046 bool literal_pool = false;
2047
2048 /* We may need to substitute the literal pool base register into the address
2049 below. However, at this point we do not know which register is going to
2050 be used as base, so we substitute the arg pointer register. This is going
2051 to be treated as holding a pointer below -- it shouldn't be used for any
2052 other purpose. */
2053 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2054
2055 /* Decompose address into base + index + displacement. */
2056
2057 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2058 base = addr;
2059
2060 else if (GET_CODE (addr) == PLUS)
2061 {
2062 rtx op0 = XEXP (addr, 0);
2063 rtx op1 = XEXP (addr, 1);
2064 enum rtx_code code0 = GET_CODE (op0);
2065 enum rtx_code code1 = GET_CODE (op1);
2066
2067 if (code0 == REG || code0 == UNSPEC)
2068 {
2069 if (code1 == REG || code1 == UNSPEC)
2070 {
2071 indx = op0; /* index + base */
2072 base = op1;
2073 }
2074
2075 else
2076 {
2077 base = op0; /* base + displacement */
2078 disp = op1;
2079 }
2080 }
2081
2082 else if (code0 == PLUS)
2083 {
2084 indx = XEXP (op0, 0); /* index + base + disp */
2085 base = XEXP (op0, 1);
2086 disp = op1;
2087 }
2088
2089 else
2090 {
2091 return false;
2092 }
2093 }
2094
2095 else
2096 disp = addr; /* displacement */
2097
2098 /* Extract integer part of displacement. */
2099 orig_disp = disp;
2100 if (disp)
2101 {
2102 if (GET_CODE (disp) == CONST_INT)
2103 {
2104 offset = INTVAL (disp);
2105 disp = NULL_RTX;
2106 }
2107 else if (GET_CODE (disp) == CONST
2108 && GET_CODE (XEXP (disp, 0)) == PLUS
2109 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2110 {
2111 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2112 disp = XEXP (XEXP (disp, 0), 0);
2113 }
2114 }
2115
2116 /* Strip off CONST here to avoid special case tests later. */
2117 if (disp && GET_CODE (disp) == CONST)
2118 disp = XEXP (disp, 0);
2119
2120 /* We can convert literal pool addresses to
2121 displacements by basing them off the base register. */
2122 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2123 {
2124 /* Either base or index must be free to hold the base register. */
2125 if (!base)
2126 base = fake_pool_base, literal_pool = true;
2127 else if (!indx)
2128 indx = fake_pool_base, literal_pool = true;
2129 else
2130 return false;
2131
2132 /* Mark up the displacement. */
2133 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2134 UNSPEC_LTREL_OFFSET);
2135 }
2136
2137 /* Validate base register. */
2138 if (base)
2139 {
2140 if (GET_CODE (base) == UNSPEC)
2141 switch (XINT (base, 1))
2142 {
2143 case UNSPEC_LTREF:
2144 if (!disp)
2145 disp = gen_rtx_UNSPEC (Pmode,
2146 gen_rtvec (1, XVECEXP (base, 0, 0)),
2147 UNSPEC_LTREL_OFFSET);
2148 else
2149 return false;
2150
2151 base = XVECEXP (base, 0, 1);
2152 break;
2153
2154 case UNSPEC_LTREL_BASE:
2155 if (XVECLEN (base, 0) == 1)
2156 base = fake_pool_base, literal_pool = true;
2157 else
2158 base = XVECEXP (base, 0, 1);
2159 break;
2160
2161 default:
2162 return false;
2163 }
2164
2165 if (!REG_P (base)
2166 || (GET_MODE (base) != SImode
2167 && GET_MODE (base) != Pmode))
2168 return false;
2169
2170 if (REGNO (base) == STACK_POINTER_REGNUM
2171 || REGNO (base) == FRAME_POINTER_REGNUM
2172 || ((reload_completed || reload_in_progress)
2173 && frame_pointer_needed
2174 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2175 || REGNO (base) == ARG_POINTER_REGNUM
2176 || (flag_pic
2177 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2178 pointer = base_ptr = true;
2179
2180 if ((reload_completed || reload_in_progress)
2181 && base == cfun->machine->base_reg)
2182 pointer = base_ptr = literal_pool = true;
2183 }
2184
2185 /* Validate index register. */
2186 if (indx)
2187 {
2188 if (GET_CODE (indx) == UNSPEC)
2189 switch (XINT (indx, 1))
2190 {
2191 case UNSPEC_LTREF:
2192 if (!disp)
2193 disp = gen_rtx_UNSPEC (Pmode,
2194 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2195 UNSPEC_LTREL_OFFSET);
2196 else
2197 return false;
2198
2199 indx = XVECEXP (indx, 0, 1);
2200 break;
2201
2202 case UNSPEC_LTREL_BASE:
2203 if (XVECLEN (indx, 0) == 1)
2204 indx = fake_pool_base, literal_pool = true;
2205 else
2206 indx = XVECEXP (indx, 0, 1);
2207 break;
2208
2209 default:
2210 return false;
2211 }
2212
2213 if (!REG_P (indx)
2214 || (GET_MODE (indx) != SImode
2215 && GET_MODE (indx) != Pmode))
2216 return false;
2217
2218 if (REGNO (indx) == STACK_POINTER_REGNUM
2219 || REGNO (indx) == FRAME_POINTER_REGNUM
2220 || ((reload_completed || reload_in_progress)
2221 && frame_pointer_needed
2222 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2223 || REGNO (indx) == ARG_POINTER_REGNUM
2224 || (flag_pic
2225 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2226 pointer = indx_ptr = true;
2227
2228 if ((reload_completed || reload_in_progress)
2229 && indx == cfun->machine->base_reg)
2230 pointer = indx_ptr = literal_pool = true;
2231 }
2232
2233 /* Prefer to use pointer as base, not index. */
2234 if (base && indx && !base_ptr
2235 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2236 {
2237 rtx tmp = base;
2238 base = indx;
2239 indx = tmp;
2240 }
2241
2242 /* Validate displacement. */
2243 if (!disp)
2244 {
2245 /* If virtual registers are involved, the displacement will change later
2246 anyway as the virtual registers get eliminated. This could make a
2247 valid displacement invalid, but it is more likely to make an invalid
2248 displacement valid, because we sometimes access the register save area
2249 via negative offsets to one of those registers.
2250 Thus we don't check the displacement for validity here. If after
2251 elimination the displacement turns out to be invalid after all,
2252 this is fixed up by reload in any case. */
2253 /* LRA maintains always displacements up to date and we need to
2254 know the displacement is right during all LRA not only at the
2255 final elimination. */
2256 if (lra_in_progress
2257 || (base != arg_pointer_rtx
2258 && indx != arg_pointer_rtx
2259 && base != return_address_pointer_rtx
2260 && indx != return_address_pointer_rtx
2261 && base != frame_pointer_rtx
2262 && indx != frame_pointer_rtx
2263 && base != virtual_stack_vars_rtx
2264 && indx != virtual_stack_vars_rtx))
2265 if (!DISP_IN_RANGE (offset))
2266 return false;
2267 }
2268 else
2269 {
2270 /* All the special cases are pointers. */
2271 pointer = true;
2272
2273 /* In the small-PIC case, the linker converts @GOT
2274 and @GOTNTPOFF offsets to possible displacements. */
2275 if (GET_CODE (disp) == UNSPEC
2276 && (XINT (disp, 1) == UNSPEC_GOT
2277 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2278 && flag_pic == 1)
2279 {
2280 ;
2281 }
2282
2283 /* Accept pool label offsets. */
2284 else if (GET_CODE (disp) == UNSPEC
2285 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2286 ;
2287
2288 /* Accept literal pool references. */
2289 else if (GET_CODE (disp) == UNSPEC
2290 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2291 {
2292 /* In case CSE pulled a non literal pool reference out of
2293 the pool we have to reject the address. This is
2294 especially important when loading the GOT pointer on non
2295 zarch CPUs. In this case the literal pool contains an lt
2296 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2297 will most likely exceed the displacement. */
2298 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2299 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2300 return false;
2301
2302 orig_disp = gen_rtx_CONST (Pmode, disp);
2303 if (offset)
2304 {
2305 /* If we have an offset, make sure it does not
2306 exceed the size of the constant pool entry. */
2307 rtx sym = XVECEXP (disp, 0, 0);
2308 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2309 return false;
2310
2311 orig_disp = plus_constant (Pmode, orig_disp, offset);
2312 }
2313 }
2314
2315 else
2316 return false;
2317 }
2318
2319 if (!base && !indx)
2320 pointer = true;
2321
2322 if (out)
2323 {
2324 out->base = base;
2325 out->indx = indx;
2326 out->disp = orig_disp;
2327 out->pointer = pointer;
2328 out->literal_pool = literal_pool;
2329 }
2330
2331 return true;
2332 }
2333
2334 /* Decompose a RTL expression OP for a shift count into its components,
2335 and return the base register in BASE and the offset in OFFSET.
2336
2337 Return true if OP is a valid shift count, false if not. */
2338
2339 bool
2340 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2341 {
2342 HOST_WIDE_INT off = 0;
2343
2344 /* We can have an integer constant, an address register,
2345 or a sum of the two. */
2346 if (GET_CODE (op) == CONST_INT)
2347 {
2348 off = INTVAL (op);
2349 op = NULL_RTX;
2350 }
2351 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2352 {
2353 off = INTVAL (XEXP (op, 1));
2354 op = XEXP (op, 0);
2355 }
2356 while (op && GET_CODE (op) == SUBREG)
2357 op = SUBREG_REG (op);
2358
2359 if (op && GET_CODE (op) != REG)
2360 return false;
2361
2362 if (offset)
2363 *offset = off;
2364 if (base)
2365 *base = op;
2366
2367 return true;
2368 }
2369
2370
2371 /* Return true if CODE is a valid address without index. */
2372
2373 bool
2374 s390_legitimate_address_without_index_p (rtx op)
2375 {
2376 struct s390_address addr;
2377
2378 if (!s390_decompose_address (XEXP (op, 0), &addr))
2379 return false;
2380 if (addr.indx)
2381 return false;
2382
2383 return true;
2384 }
2385
2386
2387 /* Return TRUE if ADDR is an operand valid for a load/store relative
2388 instruction. Be aware that the alignment of the operand needs to
2389 be checked separately.
2390 Valid addresses are single references or a sum of a reference and a
2391 constant integer. Return these parts in SYMREF and ADDEND. You can
2392 pass NULL in REF and/or ADDEND if you are not interested in these
2393 values. Literal pool references are *not* considered symbol
2394 references. */
2395
2396 static bool
2397 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2398 {
2399 HOST_WIDE_INT tmpaddend = 0;
2400
2401 if (GET_CODE (addr) == CONST)
2402 addr = XEXP (addr, 0);
2403
2404 if (GET_CODE (addr) == PLUS)
2405 {
2406 if (!CONST_INT_P (XEXP (addr, 1)))
2407 return false;
2408
2409 tmpaddend = INTVAL (XEXP (addr, 1));
2410 addr = XEXP (addr, 0);
2411 }
2412
2413 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2414 || (GET_CODE (addr) == UNSPEC
2415 && (XINT (addr, 1) == UNSPEC_GOTENT
2416 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2417 {
2418 if (symref)
2419 *symref = addr;
2420 if (addend)
2421 *addend = tmpaddend;
2422
2423 return true;
2424 }
2425 return false;
2426 }
2427
2428 /* Return true if the address in OP is valid for constraint letter C
2429 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2430 pool MEMs should be accepted. Only the Q, R, S, T constraint
2431 letters are allowed for C. */
2432
2433 static int
2434 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2435 {
2436 struct s390_address addr;
2437 bool decomposed = false;
2438
2439 /* This check makes sure that no symbolic address (except literal
2440 pool references) are accepted by the R or T constraints. */
2441 if (s390_loadrelative_operand_p (op, NULL, NULL))
2442 return 0;
2443
2444 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2445 if (!lit_pool_ok)
2446 {
2447 if (!s390_decompose_address (op, &addr))
2448 return 0;
2449 if (addr.literal_pool)
2450 return 0;
2451 decomposed = true;
2452 }
2453
2454 switch (c)
2455 {
2456 case 'Q': /* no index short displacement */
2457 if (!decomposed && !s390_decompose_address (op, &addr))
2458 return 0;
2459 if (addr.indx)
2460 return 0;
2461 if (!s390_short_displacement (addr.disp))
2462 return 0;
2463 break;
2464
2465 case 'R': /* with index short displacement */
2466 if (TARGET_LONG_DISPLACEMENT)
2467 {
2468 if (!decomposed && !s390_decompose_address (op, &addr))
2469 return 0;
2470 if (!s390_short_displacement (addr.disp))
2471 return 0;
2472 }
2473 /* Any invalid address here will be fixed up by reload,
2474 so accept it for the most generic constraint. */
2475 break;
2476
2477 case 'S': /* no index long displacement */
2478 if (!TARGET_LONG_DISPLACEMENT)
2479 return 0;
2480 if (!decomposed && !s390_decompose_address (op, &addr))
2481 return 0;
2482 if (addr.indx)
2483 return 0;
2484 if (s390_short_displacement (addr.disp))
2485 return 0;
2486 break;
2487
2488 case 'T': /* with index long displacement */
2489 if (!TARGET_LONG_DISPLACEMENT)
2490 return 0;
2491 /* Any invalid address here will be fixed up by reload,
2492 so accept it for the most generic constraint. */
2493 if ((decomposed || s390_decompose_address (op, &addr))
2494 && s390_short_displacement (addr.disp))
2495 return 0;
2496 break;
2497 default:
2498 return 0;
2499 }
2500 return 1;
2501 }
2502
2503
2504 /* Evaluates constraint strings described by the regular expression
2505 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2506 the constraint given in STR, or 0 else. */
2507
2508 int
2509 s390_mem_constraint (const char *str, rtx op)
2510 {
2511 char c = str[0];
2512
2513 switch (c)
2514 {
2515 case 'A':
2516 /* Check for offsettable variants of memory constraints. */
2517 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2518 return 0;
2519 if ((reload_completed || reload_in_progress)
2520 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2521 return 0;
2522 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2523 case 'B':
2524 /* Check for non-literal-pool variants of memory constraints. */
2525 if (!MEM_P (op))
2526 return 0;
2527 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2528 case 'Q':
2529 case 'R':
2530 case 'S':
2531 case 'T':
2532 if (GET_CODE (op) != MEM)
2533 return 0;
2534 return s390_check_qrst_address (c, XEXP (op, 0), true);
2535 case 'U':
2536 return (s390_check_qrst_address ('Q', op, true)
2537 || s390_check_qrst_address ('R', op, true));
2538 case 'W':
2539 return (s390_check_qrst_address ('S', op, true)
2540 || s390_check_qrst_address ('T', op, true));
2541 case 'Y':
2542 /* Simply check for the basic form of a shift count. Reload will
2543 take care of making sure we have a proper base register. */
2544 if (!s390_decompose_shift_count (op, NULL, NULL))
2545 return 0;
2546 break;
2547 case 'Z':
2548 return s390_check_qrst_address (str[1], op, true);
2549 default:
2550 return 0;
2551 }
2552 return 1;
2553 }
2554
2555
2556 /* Evaluates constraint strings starting with letter O. Input
2557 parameter C is the second letter following the "O" in the constraint
2558 string. Returns 1 if VALUE meets the respective constraint and 0
2559 otherwise. */
2560
2561 int
2562 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2563 {
2564 if (!TARGET_EXTIMM)
2565 return 0;
2566
2567 switch (c)
2568 {
2569 case 's':
2570 return trunc_int_for_mode (value, SImode) == value;
2571
2572 case 'p':
2573 return value == 0
2574 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2575
2576 case 'n':
2577 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2578
2579 default:
2580 gcc_unreachable ();
2581 }
2582 }
2583
2584
2585 /* Evaluates constraint strings starting with letter N. Parameter STR
2586 contains the letters following letter "N" in the constraint string.
2587 Returns true if VALUE matches the constraint. */
2588
2589 int
2590 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2591 {
2592 machine_mode mode, part_mode;
2593 int def;
2594 int part, part_goal;
2595
2596
2597 if (str[0] == 'x')
2598 part_goal = -1;
2599 else
2600 part_goal = str[0] - '0';
2601
2602 switch (str[1])
2603 {
2604 case 'Q':
2605 part_mode = QImode;
2606 break;
2607 case 'H':
2608 part_mode = HImode;
2609 break;
2610 case 'S':
2611 part_mode = SImode;
2612 break;
2613 default:
2614 return 0;
2615 }
2616
2617 switch (str[2])
2618 {
2619 case 'H':
2620 mode = HImode;
2621 break;
2622 case 'S':
2623 mode = SImode;
2624 break;
2625 case 'D':
2626 mode = DImode;
2627 break;
2628 default:
2629 return 0;
2630 }
2631
2632 switch (str[3])
2633 {
2634 case '0':
2635 def = 0;
2636 break;
2637 case 'F':
2638 def = -1;
2639 break;
2640 default:
2641 return 0;
2642 }
2643
2644 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2645 return 0;
2646
2647 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2648 if (part < 0)
2649 return 0;
2650 if (part_goal != -1 && part_goal != part)
2651 return 0;
2652
2653 return 1;
2654 }
2655
2656
2657 /* Returns true if the input parameter VALUE is a float zero. */
2658
2659 int
2660 s390_float_const_zero_p (rtx value)
2661 {
2662 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2663 && value == CONST0_RTX (GET_MODE (value)));
2664 }
2665
2666 /* Implement TARGET_REGISTER_MOVE_COST. */
2667
2668 static int
2669 s390_register_move_cost (machine_mode mode,
2670 reg_class_t from, reg_class_t to)
2671 {
2672 /* On s390, copy between fprs and gprs is expensive. */
2673
2674 /* It becomes somewhat faster having ldgr/lgdr. */
2675 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
2676 {
2677 /* ldgr is single cycle. */
2678 if (reg_classes_intersect_p (from, GENERAL_REGS)
2679 && reg_classes_intersect_p (to, FP_REGS))
2680 return 1;
2681 /* lgdr needs 3 cycles. */
2682 if (reg_classes_intersect_p (to, GENERAL_REGS)
2683 && reg_classes_intersect_p (from, FP_REGS))
2684 return 3;
2685 }
2686
2687 /* Otherwise copying is done via memory. */
2688 if ((reg_classes_intersect_p (from, GENERAL_REGS)
2689 && reg_classes_intersect_p (to, FP_REGS))
2690 || (reg_classes_intersect_p (from, FP_REGS)
2691 && reg_classes_intersect_p (to, GENERAL_REGS)))
2692 return 10;
2693
2694 return 1;
2695 }
2696
2697 /* Implement TARGET_MEMORY_MOVE_COST. */
2698
2699 static int
2700 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
2701 reg_class_t rclass ATTRIBUTE_UNUSED,
2702 bool in ATTRIBUTE_UNUSED)
2703 {
2704 return 2;
2705 }
2706
2707 /* Compute a (partial) cost for rtx X. Return true if the complete
2708 cost has been computed, and false if subexpressions should be
2709 scanned. In either case, *TOTAL contains the cost result.
2710 CODE contains GET_CODE (x), OUTER_CODE contains the code
2711 of the superexpression of x. */
2712
2713 static bool
2714 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2715 int *total, bool speed ATTRIBUTE_UNUSED)
2716 {
2717 switch (code)
2718 {
2719 case CONST:
2720 case CONST_INT:
2721 case LABEL_REF:
2722 case SYMBOL_REF:
2723 case CONST_DOUBLE:
2724 case MEM:
2725 *total = 0;
2726 return true;
2727
2728 case ASHIFT:
2729 case ASHIFTRT:
2730 case LSHIFTRT:
2731 case ROTATE:
2732 case ROTATERT:
2733 case AND:
2734 case IOR:
2735 case XOR:
2736 case NEG:
2737 case NOT:
2738 *total = COSTS_N_INSNS (1);
2739 return false;
2740
2741 case PLUS:
2742 case MINUS:
2743 *total = COSTS_N_INSNS (1);
2744 return false;
2745
2746 case MULT:
2747 switch (GET_MODE (x))
2748 {
2749 case SImode:
2750 {
2751 rtx left = XEXP (x, 0);
2752 rtx right = XEXP (x, 1);
2753 if (GET_CODE (right) == CONST_INT
2754 && CONST_OK_FOR_K (INTVAL (right)))
2755 *total = s390_cost->mhi;
2756 else if (GET_CODE (left) == SIGN_EXTEND)
2757 *total = s390_cost->mh;
2758 else
2759 *total = s390_cost->ms; /* msr, ms, msy */
2760 break;
2761 }
2762 case DImode:
2763 {
2764 rtx left = XEXP (x, 0);
2765 rtx right = XEXP (x, 1);
2766 if (TARGET_ZARCH)
2767 {
2768 if (GET_CODE (right) == CONST_INT
2769 && CONST_OK_FOR_K (INTVAL (right)))
2770 *total = s390_cost->mghi;
2771 else if (GET_CODE (left) == SIGN_EXTEND)
2772 *total = s390_cost->msgf;
2773 else
2774 *total = s390_cost->msg; /* msgr, msg */
2775 }
2776 else /* TARGET_31BIT */
2777 {
2778 if (GET_CODE (left) == SIGN_EXTEND
2779 && GET_CODE (right) == SIGN_EXTEND)
2780 /* mulsidi case: mr, m */
2781 *total = s390_cost->m;
2782 else if (GET_CODE (left) == ZERO_EXTEND
2783 && GET_CODE (right) == ZERO_EXTEND
2784 && TARGET_CPU_ZARCH)
2785 /* umulsidi case: ml, mlr */
2786 *total = s390_cost->ml;
2787 else
2788 /* Complex calculation is required. */
2789 *total = COSTS_N_INSNS (40);
2790 }
2791 break;
2792 }
2793 case SFmode:
2794 case DFmode:
2795 *total = s390_cost->mult_df;
2796 break;
2797 case TFmode:
2798 *total = s390_cost->mxbr;
2799 break;
2800 default:
2801 return false;
2802 }
2803 return false;
2804
2805 case FMA:
2806 switch (GET_MODE (x))
2807 {
2808 case DFmode:
2809 *total = s390_cost->madbr;
2810 break;
2811 case SFmode:
2812 *total = s390_cost->maebr;
2813 break;
2814 default:
2815 return false;
2816 }
2817 /* Negate in the third argument is free: FMSUB. */
2818 if (GET_CODE (XEXP (x, 2)) == NEG)
2819 {
2820 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2821 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2822 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2823 return true;
2824 }
2825 return false;
2826
2827 case UDIV:
2828 case UMOD:
2829 if (GET_MODE (x) == TImode) /* 128 bit division */
2830 *total = s390_cost->dlgr;
2831 else if (GET_MODE (x) == DImode)
2832 {
2833 rtx right = XEXP (x, 1);
2834 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2835 *total = s390_cost->dlr;
2836 else /* 64 by 64 bit division */
2837 *total = s390_cost->dlgr;
2838 }
2839 else if (GET_MODE (x) == SImode) /* 32 bit division */
2840 *total = s390_cost->dlr;
2841 return false;
2842
2843 case DIV:
2844 case MOD:
2845 if (GET_MODE (x) == DImode)
2846 {
2847 rtx right = XEXP (x, 1);
2848 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2849 if (TARGET_ZARCH)
2850 *total = s390_cost->dsgfr;
2851 else
2852 *total = s390_cost->dr;
2853 else /* 64 by 64 bit division */
2854 *total = s390_cost->dsgr;
2855 }
2856 else if (GET_MODE (x) == SImode) /* 32 bit division */
2857 *total = s390_cost->dlr;
2858 else if (GET_MODE (x) == SFmode)
2859 {
2860 *total = s390_cost->debr;
2861 }
2862 else if (GET_MODE (x) == DFmode)
2863 {
2864 *total = s390_cost->ddbr;
2865 }
2866 else if (GET_MODE (x) == TFmode)
2867 {
2868 *total = s390_cost->dxbr;
2869 }
2870 return false;
2871
2872 case SQRT:
2873 if (GET_MODE (x) == SFmode)
2874 *total = s390_cost->sqebr;
2875 else if (GET_MODE (x) == DFmode)
2876 *total = s390_cost->sqdbr;
2877 else /* TFmode */
2878 *total = s390_cost->sqxbr;
2879 return false;
2880
2881 case SIGN_EXTEND:
2882 case ZERO_EXTEND:
2883 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2884 || outer_code == PLUS || outer_code == MINUS
2885 || outer_code == COMPARE)
2886 *total = 0;
2887 return false;
2888
2889 case COMPARE:
2890 *total = COSTS_N_INSNS (1);
2891 if (GET_CODE (XEXP (x, 0)) == AND
2892 && GET_CODE (XEXP (x, 1)) == CONST_INT
2893 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2894 {
2895 rtx op0 = XEXP (XEXP (x, 0), 0);
2896 rtx op1 = XEXP (XEXP (x, 0), 1);
2897 rtx op2 = XEXP (x, 1);
2898
2899 if (memory_operand (op0, GET_MODE (op0))
2900 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2901 return true;
2902 if (register_operand (op0, GET_MODE (op0))
2903 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2904 return true;
2905 }
2906 return false;
2907
2908 default:
2909 return false;
2910 }
2911 }
2912
2913 /* Return the cost of an address rtx ADDR. */
2914
2915 static int
2916 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
2917 addr_space_t as ATTRIBUTE_UNUSED,
2918 bool speed ATTRIBUTE_UNUSED)
2919 {
2920 struct s390_address ad;
2921 if (!s390_decompose_address (addr, &ad))
2922 return 1000;
2923
2924 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2925 }
2926
2927 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2928 otherwise return 0. */
2929
2930 int
2931 tls_symbolic_operand (rtx op)
2932 {
2933 if (GET_CODE (op) != SYMBOL_REF)
2934 return 0;
2935 return SYMBOL_REF_TLS_MODEL (op);
2936 }
2937 \f
2938 /* Split DImode access register reference REG (on 64-bit) into its constituent
2939 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2940 gen_highpart cannot be used as they assume all registers are word-sized,
2941 while our access registers have only half that size. */
2942
2943 void
2944 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2945 {
2946 gcc_assert (TARGET_64BIT);
2947 gcc_assert (ACCESS_REG_P (reg));
2948 gcc_assert (GET_MODE (reg) == DImode);
2949 gcc_assert (!(REGNO (reg) & 1));
2950
2951 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2952 *hi = gen_rtx_REG (SImode, REGNO (reg));
2953 }
2954
2955 /* Return true if OP contains a symbol reference */
2956
2957 bool
2958 symbolic_reference_mentioned_p (rtx op)
2959 {
2960 const char *fmt;
2961 int i;
2962
2963 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2964 return 1;
2965
2966 fmt = GET_RTX_FORMAT (GET_CODE (op));
2967 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2968 {
2969 if (fmt[i] == 'E')
2970 {
2971 int j;
2972
2973 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2974 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2975 return 1;
2976 }
2977
2978 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2979 return 1;
2980 }
2981
2982 return 0;
2983 }
2984
2985 /* Return true if OP contains a reference to a thread-local symbol. */
2986
2987 bool
2988 tls_symbolic_reference_mentioned_p (rtx op)
2989 {
2990 const char *fmt;
2991 int i;
2992
2993 if (GET_CODE (op) == SYMBOL_REF)
2994 return tls_symbolic_operand (op);
2995
2996 fmt = GET_RTX_FORMAT (GET_CODE (op));
2997 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2998 {
2999 if (fmt[i] == 'E')
3000 {
3001 int j;
3002
3003 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3004 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3005 return true;
3006 }
3007
3008 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3009 return true;
3010 }
3011
3012 return false;
3013 }
3014
3015
3016 /* Return true if OP is a legitimate general operand when
3017 generating PIC code. It is given that flag_pic is on
3018 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3019
3020 int
3021 legitimate_pic_operand_p (rtx op)
3022 {
3023 /* Accept all non-symbolic constants. */
3024 if (!SYMBOLIC_CONST (op))
3025 return 1;
3026
3027 /* Reject everything else; must be handled
3028 via emit_symbolic_move. */
3029 return 0;
3030 }
3031
3032 /* Returns true if the constant value OP is a legitimate general operand.
3033 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3034
3035 static bool
3036 s390_legitimate_constant_p (machine_mode mode, rtx op)
3037 {
3038 if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3039 {
3040 if (GET_MODE_SIZE (mode) != 16)
3041 return 0;
3042
3043 if (!const0_operand (op, mode)
3044 && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3045 && !s390_bytemask_vector_p (op, NULL))
3046 return 0;
3047 }
3048
3049 /* Accept all non-symbolic constants. */
3050 if (!SYMBOLIC_CONST (op))
3051 return 1;
3052
3053 /* Accept immediate LARL operands. */
3054 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3055 return 1;
3056
3057 /* Thread-local symbols are never legal constants. This is
3058 so that emit_call knows that computing such addresses
3059 might require a function call. */
3060 if (TLS_SYMBOLIC_CONST (op))
3061 return 0;
3062
3063 /* In the PIC case, symbolic constants must *not* be
3064 forced into the literal pool. We accept them here,
3065 so that they will be handled by emit_symbolic_move. */
3066 if (flag_pic)
3067 return 1;
3068
3069 /* All remaining non-PIC symbolic constants are
3070 forced into the literal pool. */
3071 return 0;
3072 }
3073
3074 /* Determine if it's legal to put X into the constant pool. This
3075 is not possible if X contains the address of a symbol that is
3076 not constant (TLS) or not known at final link time (PIC). */
3077
3078 static bool
3079 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3080 {
3081 switch (GET_CODE (x))
3082 {
3083 case CONST_INT:
3084 case CONST_DOUBLE:
3085 case CONST_VECTOR:
3086 /* Accept all non-symbolic constants. */
3087 return false;
3088
3089 case LABEL_REF:
3090 /* Labels are OK iff we are non-PIC. */
3091 return flag_pic != 0;
3092
3093 case SYMBOL_REF:
3094 /* 'Naked' TLS symbol references are never OK,
3095 non-TLS symbols are OK iff we are non-PIC. */
3096 if (tls_symbolic_operand (x))
3097 return true;
3098 else
3099 return flag_pic != 0;
3100
3101 case CONST:
3102 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3103 case PLUS:
3104 case MINUS:
3105 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3106 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3107
3108 case UNSPEC:
3109 switch (XINT (x, 1))
3110 {
3111 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3112 case UNSPEC_LTREL_OFFSET:
3113 case UNSPEC_GOT:
3114 case UNSPEC_GOTOFF:
3115 case UNSPEC_PLTOFF:
3116 case UNSPEC_TLSGD:
3117 case UNSPEC_TLSLDM:
3118 case UNSPEC_NTPOFF:
3119 case UNSPEC_DTPOFF:
3120 case UNSPEC_GOTNTPOFF:
3121 case UNSPEC_INDNTPOFF:
3122 return false;
3123
3124 /* If the literal pool shares the code section, be put
3125 execute template placeholders into the pool as well. */
3126 case UNSPEC_INSN:
3127 return TARGET_CPU_ZARCH;
3128
3129 default:
3130 return true;
3131 }
3132 break;
3133
3134 default:
3135 gcc_unreachable ();
3136 }
3137 }
3138
3139 /* Returns true if the constant value OP is a legitimate general
3140 operand during and after reload. The difference to
3141 legitimate_constant_p is that this function will not accept
3142 a constant that would need to be forced to the literal pool
3143 before it can be used as operand.
3144 This function accepts all constants which can be loaded directly
3145 into a GPR. */
3146
3147 bool
3148 legitimate_reload_constant_p (rtx op)
3149 {
3150 /* Accept la(y) operands. */
3151 if (GET_CODE (op) == CONST_INT
3152 && DISP_IN_RANGE (INTVAL (op)))
3153 return true;
3154
3155 /* Accept l(g)hi/l(g)fi operands. */
3156 if (GET_CODE (op) == CONST_INT
3157 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3158 return true;
3159
3160 /* Accept lliXX operands. */
3161 if (TARGET_ZARCH
3162 && GET_CODE (op) == CONST_INT
3163 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3164 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3165 return true;
3166
3167 if (TARGET_EXTIMM
3168 && GET_CODE (op) == CONST_INT
3169 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3170 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3171 return true;
3172
3173 /* Accept larl operands. */
3174 if (TARGET_CPU_ZARCH
3175 && larl_operand (op, VOIDmode))
3176 return true;
3177
3178 /* Accept floating-point zero operands that fit into a single GPR. */
3179 if (GET_CODE (op) == CONST_DOUBLE
3180 && s390_float_const_zero_p (op)
3181 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3182 return true;
3183
3184 /* Accept double-word operands that can be split. */
3185 if (GET_CODE (op) == CONST_INT
3186 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
3187 {
3188 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3189 rtx hi = operand_subword (op, 0, 0, dword_mode);
3190 rtx lo = operand_subword (op, 1, 0, dword_mode);
3191 return legitimate_reload_constant_p (hi)
3192 && legitimate_reload_constant_p (lo);
3193 }
3194
3195 /* Everything else cannot be handled without reload. */
3196 return false;
3197 }
3198
3199 /* Returns true if the constant value OP is a legitimate fp operand
3200 during and after reload.
3201 This function accepts all constants which can be loaded directly
3202 into an FPR. */
3203
3204 static bool
3205 legitimate_reload_fp_constant_p (rtx op)
3206 {
3207 /* Accept floating-point zero operands if the load zero instruction
3208 can be used. Prior to z196 the load fp zero instruction caused a
3209 performance penalty if the result is used as BFP number. */
3210 if (TARGET_Z196
3211 && GET_CODE (op) == CONST_DOUBLE
3212 && s390_float_const_zero_p (op))
3213 return true;
3214
3215 return false;
3216 }
3217
3218 /* Returns true if the constant value OP is a legitimate vector operand
3219 during and after reload.
3220 This function accepts all constants which can be loaded directly
3221 into an VR. */
3222
3223 static bool
3224 legitimate_reload_vector_constant_p (rtx op)
3225 {
3226 /* FIXME: Support constant vectors with all the same 16 bit unsigned
3227 operands. These can be loaded with vrepi. */
3228
3229 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3230 && (const0_operand (op, GET_MODE (op))
3231 || constm1_operand (op, GET_MODE (op))
3232 || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3233 || s390_bytemask_vector_p (op, NULL)))
3234 return true;
3235
3236 return false;
3237 }
3238
3239 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3240 return the class of reg to actually use. */
3241
3242 static reg_class_t
3243 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3244 {
3245 switch (GET_CODE (op))
3246 {
3247 /* Constants we cannot reload into general registers
3248 must be forced into the literal pool. */
3249 case CONST_VECTOR:
3250 case CONST_DOUBLE:
3251 case CONST_INT:
3252 if (reg_class_subset_p (GENERAL_REGS, rclass)
3253 && legitimate_reload_constant_p (op))
3254 return GENERAL_REGS;
3255 else if (reg_class_subset_p (ADDR_REGS, rclass)
3256 && legitimate_reload_constant_p (op))
3257 return ADDR_REGS;
3258 else if (reg_class_subset_p (FP_REGS, rclass)
3259 && legitimate_reload_fp_constant_p (op))
3260 return FP_REGS;
3261 else if (reg_class_subset_p (VEC_REGS, rclass)
3262 && legitimate_reload_vector_constant_p (op))
3263 return VEC_REGS;
3264
3265 return NO_REGS;
3266
3267 /* If a symbolic constant or a PLUS is reloaded,
3268 it is most likely being used as an address, so
3269 prefer ADDR_REGS. If 'class' is not a superset
3270 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3271 case CONST:
3272 /* Symrefs cannot be pushed into the literal pool with -fPIC
3273 so we *MUST NOT* return NO_REGS for these cases
3274 (s390_cannot_force_const_mem will return true).
3275
3276 On the other hand we MUST return NO_REGS for symrefs with
3277 invalid addend which might have been pushed to the literal
3278 pool (no -fPIC). Usually we would expect them to be
3279 handled via secondary reload but this does not happen if
3280 they are used as literal pool slot replacement in reload
3281 inheritance (see emit_input_reload_insns). */
3282 if (TARGET_CPU_ZARCH
3283 && GET_CODE (XEXP (op, 0)) == PLUS
3284 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3285 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3286 {
3287 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3288 return ADDR_REGS;
3289 else
3290 return NO_REGS;
3291 }
3292 /* fallthrough */
3293 case LABEL_REF:
3294 case SYMBOL_REF:
3295 if (!legitimate_reload_constant_p (op))
3296 return NO_REGS;
3297 /* fallthrough */
3298 case PLUS:
3299 /* load address will be used. */
3300 if (reg_class_subset_p (ADDR_REGS, rclass))
3301 return ADDR_REGS;
3302 else
3303 return NO_REGS;
3304
3305 default:
3306 break;
3307 }
3308
3309 return rclass;
3310 }
3311
3312 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3313 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3314 aligned. */
3315
3316 bool
3317 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3318 {
3319 HOST_WIDE_INT addend;
3320 rtx symref;
3321
3322 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3323 return false;
3324
3325 if (addend & (alignment - 1))
3326 return false;
3327
3328 if (GET_CODE (symref) == SYMBOL_REF
3329 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3330 return true;
3331
3332 if (GET_CODE (symref) == UNSPEC
3333 && alignment <= UNITS_PER_LONG)
3334 return true;
3335
3336 return false;
3337 }
3338
3339 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3340 operand SCRATCH is used to reload the even part of the address and
3341 adding one. */
3342
3343 void
3344 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3345 {
3346 HOST_WIDE_INT addend;
3347 rtx symref;
3348
3349 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3350 gcc_unreachable ();
3351
3352 if (!(addend & 1))
3353 /* Easy case. The addend is even so larl will do fine. */
3354 emit_move_insn (reg, addr);
3355 else
3356 {
3357 /* We can leave the scratch register untouched if the target
3358 register is a valid base register. */
3359 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3360 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3361 scratch = reg;
3362
3363 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3364 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3365
3366 if (addend != 1)
3367 emit_move_insn (scratch,
3368 gen_rtx_CONST (Pmode,
3369 gen_rtx_PLUS (Pmode, symref,
3370 GEN_INT (addend - 1))));
3371 else
3372 emit_move_insn (scratch, symref);
3373
3374 /* Increment the address using la in order to avoid clobbering cc. */
3375 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3376 }
3377 }
3378
3379 /* Generate what is necessary to move between REG and MEM using
3380 SCRATCH. The direction is given by TOMEM. */
3381
3382 void
3383 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3384 {
3385 /* Reload might have pulled a constant out of the literal pool.
3386 Force it back in. */
3387 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3388 || GET_CODE (mem) == CONST_VECTOR
3389 || GET_CODE (mem) == CONST)
3390 mem = force_const_mem (GET_MODE (reg), mem);
3391
3392 gcc_assert (MEM_P (mem));
3393
3394 /* For a load from memory we can leave the scratch register
3395 untouched if the target register is a valid base register. */
3396 if (!tomem
3397 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3398 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3399 && GET_MODE (reg) == GET_MODE (scratch))
3400 scratch = reg;
3401
3402 /* Load address into scratch register. Since we can't have a
3403 secondary reload for a secondary reload we have to cover the case
3404 where larl would need a secondary reload here as well. */
3405 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3406
3407 /* Now we can use a standard load/store to do the move. */
3408 if (tomem)
3409 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3410 else
3411 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3412 }
3413
3414 /* Inform reload about cases where moving X with a mode MODE to a register in
3415 RCLASS requires an extra scratch or immediate register. Return the class
3416 needed for the immediate register. */
3417
3418 static reg_class_t
3419 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3420 machine_mode mode, secondary_reload_info *sri)
3421 {
3422 enum reg_class rclass = (enum reg_class) rclass_i;
3423
3424 /* Intermediate register needed. */
3425 if (reg_classes_intersect_p (CC_REGS, rclass))
3426 return GENERAL_REGS;
3427
3428 if (TARGET_VX)
3429 {
3430 /* The vst/vl vector move instructions allow only for short
3431 displacements. */
3432 if (MEM_P (x)
3433 && GET_CODE (XEXP (x, 0)) == PLUS
3434 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3435 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
3436 && reg_class_subset_p (rclass, VEC_REGS)
3437 && (!reg_class_subset_p (rclass, FP_REGS)
3438 || (GET_MODE_SIZE (mode) > 8
3439 && s390_class_max_nregs (FP_REGS, mode) == 1)))
3440 {
3441 if (in_p)
3442 sri->icode = (TARGET_64BIT ?
3443 CODE_FOR_reloaddi_la_in :
3444 CODE_FOR_reloadsi_la_in);
3445 else
3446 sri->icode = (TARGET_64BIT ?
3447 CODE_FOR_reloaddi_la_out :
3448 CODE_FOR_reloadsi_la_out);
3449 }
3450 }
3451
3452 if (TARGET_Z10)
3453 {
3454 HOST_WIDE_INT offset;
3455 rtx symref;
3456
3457 /* On z10 several optimizer steps may generate larl operands with
3458 an odd addend. */
3459 if (in_p
3460 && s390_loadrelative_operand_p (x, &symref, &offset)
3461 && mode == Pmode
3462 && !SYMBOL_REF_ALIGN1_P (symref)
3463 && (offset & 1) == 1)
3464 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3465 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3466
3467 /* Handle all the (mem (symref)) accesses we cannot use the z10
3468 instructions for. */
3469 if (MEM_P (x)
3470 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3471 && (mode == QImode
3472 || !reg_classes_intersect_p (GENERAL_REGS, rclass)
3473 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
3474 || !s390_check_symref_alignment (XEXP (x, 0),
3475 GET_MODE_SIZE (mode))))
3476 {
3477 #define __SECONDARY_RELOAD_CASE(M,m) \
3478 case M##mode: \
3479 if (TARGET_64BIT) \
3480 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3481 CODE_FOR_reload##m##di_tomem_z10; \
3482 else \
3483 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3484 CODE_FOR_reload##m##si_tomem_z10; \
3485 break;
3486
3487 switch (GET_MODE (x))
3488 {
3489 __SECONDARY_RELOAD_CASE (QI, qi);
3490 __SECONDARY_RELOAD_CASE (HI, hi);
3491 __SECONDARY_RELOAD_CASE (SI, si);
3492 __SECONDARY_RELOAD_CASE (DI, di);
3493 __SECONDARY_RELOAD_CASE (TI, ti);
3494 __SECONDARY_RELOAD_CASE (SF, sf);
3495 __SECONDARY_RELOAD_CASE (DF, df);
3496 __SECONDARY_RELOAD_CASE (TF, tf);
3497 __SECONDARY_RELOAD_CASE (SD, sd);
3498 __SECONDARY_RELOAD_CASE (DD, dd);
3499 __SECONDARY_RELOAD_CASE (TD, td);
3500 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
3501 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
3502 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
3503 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
3504 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
3505 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
3506 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
3507 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
3508 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
3509 __SECONDARY_RELOAD_CASE (V1SI, v1si);
3510 __SECONDARY_RELOAD_CASE (V2SI, v2si);
3511 __SECONDARY_RELOAD_CASE (V4SI, v4si);
3512 __SECONDARY_RELOAD_CASE (V1DI, v1di);
3513 __SECONDARY_RELOAD_CASE (V2DI, v2di);
3514 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
3515 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
3516 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
3517 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
3518 __SECONDARY_RELOAD_CASE (V1DF, v1df);
3519 __SECONDARY_RELOAD_CASE (V2DF, v2df);
3520 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
3521 default:
3522 gcc_unreachable ();
3523 }
3524 #undef __SECONDARY_RELOAD_CASE
3525 }
3526 }
3527
3528 /* We need a scratch register when loading a PLUS expression which
3529 is not a legitimate operand of the LOAD ADDRESS instruction. */
3530 /* LRA can deal with transformation of plus op very well -- so we
3531 don't need to prompt LRA in this case. */
3532 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
3533 sri->icode = (TARGET_64BIT ?
3534 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3535
3536 /* Performing a multiword move from or to memory we have to make sure the
3537 second chunk in memory is addressable without causing a displacement
3538 overflow. If that would be the case we calculate the address in
3539 a scratch register. */
3540 if (MEM_P (x)
3541 && GET_CODE (XEXP (x, 0)) == PLUS
3542 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3543 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3544 + GET_MODE_SIZE (mode) - 1))
3545 {
3546 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3547 in a s_operand address since we may fallback to lm/stm. So we only
3548 have to care about overflows in the b+i+d case. */
3549 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3550 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3551 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3552 /* For FP_REGS no lm/stm is available so this check is triggered
3553 for displacement overflows in b+i+d and b+d like addresses. */
3554 || (reg_classes_intersect_p (FP_REGS, rclass)
3555 && s390_class_max_nregs (FP_REGS, mode) > 1))
3556 {
3557 if (in_p)
3558 sri->icode = (TARGET_64BIT ?
3559 CODE_FOR_reloaddi_la_in :
3560 CODE_FOR_reloadsi_la_in);
3561 else
3562 sri->icode = (TARGET_64BIT ?
3563 CODE_FOR_reloaddi_la_out :
3564 CODE_FOR_reloadsi_la_out);
3565 }
3566 }
3567
3568 /* A scratch address register is needed when a symbolic constant is
3569 copied to r0 compiling with -fPIC. In other cases the target
3570 register might be used as temporary (see legitimize_pic_address). */
3571 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3572 sri->icode = (TARGET_64BIT ?
3573 CODE_FOR_reloaddi_PIC_addr :
3574 CODE_FOR_reloadsi_PIC_addr);
3575
3576 /* Either scratch or no register needed. */
3577 return NO_REGS;
3578 }
3579
3580 /* Generate code to load SRC, which is PLUS that is not a
3581 legitimate operand for the LA instruction, into TARGET.
3582 SCRATCH may be used as scratch register. */
3583
3584 void
3585 s390_expand_plus_operand (rtx target, rtx src,
3586 rtx scratch)
3587 {
3588 rtx sum1, sum2;
3589 struct s390_address ad;
3590
3591 /* src must be a PLUS; get its two operands. */
3592 gcc_assert (GET_CODE (src) == PLUS);
3593 gcc_assert (GET_MODE (src) == Pmode);
3594
3595 /* Check if any of the two operands is already scheduled
3596 for replacement by reload. This can happen e.g. when
3597 float registers occur in an address. */
3598 sum1 = find_replacement (&XEXP (src, 0));
3599 sum2 = find_replacement (&XEXP (src, 1));
3600 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3601
3602 /* If the address is already strictly valid, there's nothing to do. */
3603 if (!s390_decompose_address (src, &ad)
3604 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3605 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3606 {
3607 /* Otherwise, one of the operands cannot be an address register;
3608 we reload its value into the scratch register. */
3609 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3610 {
3611 emit_move_insn (scratch, sum1);
3612 sum1 = scratch;
3613 }
3614 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3615 {
3616 emit_move_insn (scratch, sum2);
3617 sum2 = scratch;
3618 }
3619
3620 /* According to the way these invalid addresses are generated
3621 in reload.c, it should never happen (at least on s390) that
3622 *neither* of the PLUS components, after find_replacements
3623 was applied, is an address register. */
3624 if (sum1 == scratch && sum2 == scratch)
3625 {
3626 debug_rtx (src);
3627 gcc_unreachable ();
3628 }
3629
3630 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3631 }
3632
3633 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3634 is only ever performed on addresses, so we can mark the
3635 sum as legitimate for LA in any case. */
3636 s390_load_address (target, src);
3637 }
3638
3639
3640 /* Return true if ADDR is a valid memory address.
3641 STRICT specifies whether strict register checking applies. */
3642
3643 static bool
3644 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
3645 {
3646 struct s390_address ad;
3647
3648 if (TARGET_Z10
3649 && larl_operand (addr, VOIDmode)
3650 && (mode == VOIDmode
3651 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3652 return true;
3653
3654 if (!s390_decompose_address (addr, &ad))
3655 return false;
3656
3657 if (strict)
3658 {
3659 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3660 return false;
3661
3662 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3663 return false;
3664 }
3665 else
3666 {
3667 if (ad.base
3668 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3669 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3670 return false;
3671
3672 if (ad.indx
3673 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3674 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3675 return false;
3676 }
3677 return true;
3678 }
3679
3680 /* Return true if OP is a valid operand for the LA instruction.
3681 In 31-bit, we need to prove that the result is used as an
3682 address, as LA performs only a 31-bit addition. */
3683
3684 bool
3685 legitimate_la_operand_p (rtx op)
3686 {
3687 struct s390_address addr;
3688 if (!s390_decompose_address (op, &addr))
3689 return false;
3690
3691 return (TARGET_64BIT || addr.pointer);
3692 }
3693
3694 /* Return true if it is valid *and* preferable to use LA to
3695 compute the sum of OP1 and OP2. */
3696
3697 bool
3698 preferred_la_operand_p (rtx op1, rtx op2)
3699 {
3700 struct s390_address addr;
3701
3702 if (op2 != const0_rtx)
3703 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3704
3705 if (!s390_decompose_address (op1, &addr))
3706 return false;
3707 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3708 return false;
3709 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3710 return false;
3711
3712 /* Avoid LA instructions with index register on z196; it is
3713 preferable to use regular add instructions when possible.
3714 Starting with zEC12 the la with index register is "uncracked"
3715 again. */
3716 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3717 return false;
3718
3719 if (!TARGET_64BIT && !addr.pointer)
3720 return false;
3721
3722 if (addr.pointer)
3723 return true;
3724
3725 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3726 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3727 return true;
3728
3729 return false;
3730 }
3731
3732 /* Emit a forced load-address operation to load SRC into DST.
3733 This will use the LOAD ADDRESS instruction even in situations
3734 where legitimate_la_operand_p (SRC) returns false. */
3735
3736 void
3737 s390_load_address (rtx dst, rtx src)
3738 {
3739 if (TARGET_64BIT)
3740 emit_move_insn (dst, src);
3741 else
3742 emit_insn (gen_force_la_31 (dst, src));
3743 }
3744
3745 /* Return a legitimate reference for ORIG (an address) using the
3746 register REG. If REG is 0, a new pseudo is generated.
3747
3748 There are two types of references that must be handled:
3749
3750 1. Global data references must load the address from the GOT, via
3751 the PIC reg. An insn is emitted to do this load, and the reg is
3752 returned.
3753
3754 2. Static data references, constant pool addresses, and code labels
3755 compute the address as an offset from the GOT, whose base is in
3756 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3757 differentiate them from global data objects. The returned
3758 address is the PIC reg + an unspec constant.
3759
3760 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3761 reg also appears in the address. */
3762
3763 rtx
3764 legitimize_pic_address (rtx orig, rtx reg)
3765 {
3766 rtx addr = orig;
3767 rtx addend = const0_rtx;
3768 rtx new_rtx = orig;
3769
3770 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3771
3772 if (GET_CODE (addr) == CONST)
3773 addr = XEXP (addr, 0);
3774
3775 if (GET_CODE (addr) == PLUS)
3776 {
3777 addend = XEXP (addr, 1);
3778 addr = XEXP (addr, 0);
3779 }
3780
3781 if ((GET_CODE (addr) == LABEL_REF
3782 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3783 || (GET_CODE (addr) == UNSPEC &&
3784 (XINT (addr, 1) == UNSPEC_GOTENT
3785 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3786 && GET_CODE (addend) == CONST_INT)
3787 {
3788 /* This can be locally addressed. */
3789
3790 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3791 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3792 gen_rtx_CONST (Pmode, addr) : addr);
3793
3794 if (TARGET_CPU_ZARCH
3795 && larl_operand (const_addr, VOIDmode)
3796 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3797 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3798 {
3799 if (INTVAL (addend) & 1)
3800 {
3801 /* LARL can't handle odd offsets, so emit a pair of LARL
3802 and LA. */
3803 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3804
3805 if (!DISP_IN_RANGE (INTVAL (addend)))
3806 {
3807 HOST_WIDE_INT even = INTVAL (addend) - 1;
3808 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3809 addr = gen_rtx_CONST (Pmode, addr);
3810 addend = const1_rtx;
3811 }
3812
3813 emit_move_insn (temp, addr);
3814 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3815
3816 if (reg != 0)
3817 {
3818 s390_load_address (reg, new_rtx);
3819 new_rtx = reg;
3820 }
3821 }
3822 else
3823 {
3824 /* If the offset is even, we can just use LARL. This
3825 will happen automatically. */
3826 }
3827 }
3828 else
3829 {
3830 /* No larl - Access local symbols relative to the GOT. */
3831
3832 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3833
3834 if (reload_in_progress || reload_completed)
3835 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3836
3837 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3838 if (addend != const0_rtx)
3839 addr = gen_rtx_PLUS (Pmode, addr, addend);
3840 addr = gen_rtx_CONST (Pmode, addr);
3841 addr = force_const_mem (Pmode, addr);
3842 emit_move_insn (temp, addr);
3843
3844 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3845 if (reg != 0)
3846 {
3847 s390_load_address (reg, new_rtx);
3848 new_rtx = reg;
3849 }
3850 }
3851 }
3852 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3853 {
3854 /* A non-local symbol reference without addend.
3855
3856 The symbol ref is wrapped into an UNSPEC to make sure the
3857 proper operand modifier (@GOT or @GOTENT) will be emitted.
3858 This will tell the linker to put the symbol into the GOT.
3859
3860 Additionally the code dereferencing the GOT slot is emitted here.
3861
3862 An addend to the symref needs to be added afterwards.
3863 legitimize_pic_address calls itself recursively to handle
3864 that case. So no need to do it here. */
3865
3866 if (reg == 0)
3867 reg = gen_reg_rtx (Pmode);
3868
3869 if (TARGET_Z10)
3870 {
3871 /* Use load relative if possible.
3872 lgrl <target>, sym@GOTENT */
3873 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3874 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3875 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3876
3877 emit_move_insn (reg, new_rtx);
3878 new_rtx = reg;
3879 }
3880 else if (flag_pic == 1)
3881 {
3882 /* Assume GOT offset is a valid displacement operand (< 4k
3883 or < 512k with z990). This is handled the same way in
3884 both 31- and 64-bit code (@GOT).
3885 lg <target>, sym@GOT(r12) */
3886
3887 if (reload_in_progress || reload_completed)
3888 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3889
3890 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3891 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3892 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3893 new_rtx = gen_const_mem (Pmode, new_rtx);
3894 emit_move_insn (reg, new_rtx);
3895 new_rtx = reg;
3896 }
3897 else if (TARGET_CPU_ZARCH)
3898 {
3899 /* If the GOT offset might be >= 4k, we determine the position
3900 of the GOT entry via a PC-relative LARL (@GOTENT).
3901 larl temp, sym@GOTENT
3902 lg <target>, 0(temp) */
3903
3904 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3905
3906 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3907 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3908
3909 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3910 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3911 emit_move_insn (temp, new_rtx);
3912
3913 new_rtx = gen_const_mem (Pmode, temp);
3914 emit_move_insn (reg, new_rtx);
3915
3916 new_rtx = reg;
3917 }
3918 else
3919 {
3920 /* If the GOT offset might be >= 4k, we have to load it
3921 from the literal pool (@GOT).
3922
3923 lg temp, lit-litbase(r13)
3924 lg <target>, 0(temp)
3925 lit: .long sym@GOT */
3926
3927 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3928
3929 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3930 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3931
3932 if (reload_in_progress || reload_completed)
3933 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3934
3935 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3936 addr = gen_rtx_CONST (Pmode, addr);
3937 addr = force_const_mem (Pmode, addr);
3938 emit_move_insn (temp, addr);
3939
3940 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3941 new_rtx = gen_const_mem (Pmode, new_rtx);
3942 emit_move_insn (reg, new_rtx);
3943 new_rtx = reg;
3944 }
3945 }
3946 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3947 {
3948 gcc_assert (XVECLEN (addr, 0) == 1);
3949 switch (XINT (addr, 1))
3950 {
3951 /* These address symbols (or PLT slots) relative to the GOT
3952 (not GOT slots!). In general this will exceed the
3953 displacement range so these value belong into the literal
3954 pool. */
3955 case UNSPEC_GOTOFF:
3956 case UNSPEC_PLTOFF:
3957 new_rtx = force_const_mem (Pmode, orig);
3958 break;
3959
3960 /* For -fPIC the GOT size might exceed the displacement
3961 range so make sure the value is in the literal pool. */
3962 case UNSPEC_GOT:
3963 if (flag_pic == 2)
3964 new_rtx = force_const_mem (Pmode, orig);
3965 break;
3966
3967 /* For @GOTENT larl is used. This is handled like local
3968 symbol refs. */
3969 case UNSPEC_GOTENT:
3970 gcc_unreachable ();
3971 break;
3972
3973 /* @PLT is OK as is on 64-bit, must be converted to
3974 GOT-relative @PLTOFF on 31-bit. */
3975 case UNSPEC_PLT:
3976 if (!TARGET_CPU_ZARCH)
3977 {
3978 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3979
3980 if (reload_in_progress || reload_completed)
3981 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3982
3983 addr = XVECEXP (addr, 0, 0);
3984 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3985 UNSPEC_PLTOFF);
3986 if (addend != const0_rtx)
3987 addr = gen_rtx_PLUS (Pmode, addr, addend);
3988 addr = gen_rtx_CONST (Pmode, addr);
3989 addr = force_const_mem (Pmode, addr);
3990 emit_move_insn (temp, addr);
3991
3992 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3993 if (reg != 0)
3994 {
3995 s390_load_address (reg, new_rtx);
3996 new_rtx = reg;
3997 }
3998 }
3999 else
4000 /* On 64 bit larl can be used. This case is handled like
4001 local symbol refs. */
4002 gcc_unreachable ();
4003 break;
4004
4005 /* Everything else cannot happen. */
4006 default:
4007 gcc_unreachable ();
4008 }
4009 }
4010 else if (addend != const0_rtx)
4011 {
4012 /* Otherwise, compute the sum. */
4013
4014 rtx base = legitimize_pic_address (addr, reg);
4015 new_rtx = legitimize_pic_address (addend,
4016 base == reg ? NULL_RTX : reg);
4017 if (GET_CODE (new_rtx) == CONST_INT)
4018 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4019 else
4020 {
4021 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4022 {
4023 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4024 new_rtx = XEXP (new_rtx, 1);
4025 }
4026 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4027 }
4028
4029 if (GET_CODE (new_rtx) == CONST)
4030 new_rtx = XEXP (new_rtx, 0);
4031 new_rtx = force_operand (new_rtx, 0);
4032 }
4033
4034 return new_rtx;
4035 }
4036
4037 /* Load the thread pointer into a register. */
4038
4039 rtx
4040 s390_get_thread_pointer (void)
4041 {
4042 rtx tp = gen_reg_rtx (Pmode);
4043
4044 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4045 mark_reg_pointer (tp, BITS_PER_WORD);
4046
4047 return tp;
4048 }
4049
4050 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4051 in s390_tls_symbol which always refers to __tls_get_offset.
4052 The returned offset is written to RESULT_REG and an USE rtx is
4053 generated for TLS_CALL. */
4054
4055 static GTY(()) rtx s390_tls_symbol;
4056
4057 static void
4058 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4059 {
4060 rtx insn;
4061
4062 if (!flag_pic)
4063 emit_insn (s390_load_got ());
4064
4065 if (!s390_tls_symbol)
4066 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4067
4068 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4069 gen_rtx_REG (Pmode, RETURN_REGNUM));
4070
4071 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4072 RTL_CONST_CALL_P (insn) = 1;
4073 }
4074
4075 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4076 this (thread-local) address. REG may be used as temporary. */
4077
4078 static rtx
4079 legitimize_tls_address (rtx addr, rtx reg)
4080 {
4081 rtx new_rtx, tls_call, temp, base, r2, insn;
4082
4083 if (GET_CODE (addr) == SYMBOL_REF)
4084 switch (tls_symbolic_operand (addr))
4085 {
4086 case TLS_MODEL_GLOBAL_DYNAMIC:
4087 start_sequence ();
4088 r2 = gen_rtx_REG (Pmode, 2);
4089 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4090 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4091 new_rtx = force_const_mem (Pmode, new_rtx);
4092 emit_move_insn (r2, new_rtx);
4093 s390_emit_tls_call_insn (r2, tls_call);
4094 insn = get_insns ();
4095 end_sequence ();
4096
4097 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4098 temp = gen_reg_rtx (Pmode);
4099 emit_libcall_block (insn, temp, r2, new_rtx);
4100
4101 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4102 if (reg != 0)
4103 {
4104 s390_load_address (reg, new_rtx);
4105 new_rtx = reg;
4106 }
4107 break;
4108
4109 case TLS_MODEL_LOCAL_DYNAMIC:
4110 start_sequence ();
4111 r2 = gen_rtx_REG (Pmode, 2);
4112 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4113 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4114 new_rtx = force_const_mem (Pmode, new_rtx);
4115 emit_move_insn (r2, new_rtx);
4116 s390_emit_tls_call_insn (r2, tls_call);
4117 insn = get_insns ();
4118 end_sequence ();
4119
4120 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4121 temp = gen_reg_rtx (Pmode);
4122 emit_libcall_block (insn, temp, r2, new_rtx);
4123
4124 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4125 base = gen_reg_rtx (Pmode);
4126 s390_load_address (base, new_rtx);
4127
4128 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4129 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4130 new_rtx = force_const_mem (Pmode, new_rtx);
4131 temp = gen_reg_rtx (Pmode);
4132 emit_move_insn (temp, new_rtx);
4133
4134 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4135 if (reg != 0)
4136 {
4137 s390_load_address (reg, new_rtx);
4138 new_rtx = reg;
4139 }
4140 break;
4141
4142 case TLS_MODEL_INITIAL_EXEC:
4143 if (flag_pic == 1)
4144 {
4145 /* Assume GOT offset < 4k. This is handled the same way
4146 in both 31- and 64-bit code. */
4147
4148 if (reload_in_progress || reload_completed)
4149 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4150
4151 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4152 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4153 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4154 new_rtx = gen_const_mem (Pmode, new_rtx);
4155 temp = gen_reg_rtx (Pmode);
4156 emit_move_insn (temp, new_rtx);
4157 }
4158 else if (TARGET_CPU_ZARCH)
4159 {
4160 /* If the GOT offset might be >= 4k, we determine the position
4161 of the GOT entry via a PC-relative LARL. */
4162
4163 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4164 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4165 temp = gen_reg_rtx (Pmode);
4166 emit_move_insn (temp, new_rtx);
4167
4168 new_rtx = gen_const_mem (Pmode, temp);
4169 temp = gen_reg_rtx (Pmode);
4170 emit_move_insn (temp, new_rtx);
4171 }
4172 else if (flag_pic)
4173 {
4174 /* If the GOT offset might be >= 4k, we have to load it
4175 from the literal pool. */
4176
4177 if (reload_in_progress || reload_completed)
4178 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4179
4180 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4181 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4182 new_rtx = force_const_mem (Pmode, new_rtx);
4183 temp = gen_reg_rtx (Pmode);
4184 emit_move_insn (temp, new_rtx);
4185
4186 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4187 new_rtx = gen_const_mem (Pmode, new_rtx);
4188
4189 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4190 temp = gen_reg_rtx (Pmode);
4191 emit_insn (gen_rtx_SET (temp, new_rtx));
4192 }
4193 else
4194 {
4195 /* In position-dependent code, load the absolute address of
4196 the GOT entry from the literal pool. */
4197
4198 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4199 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4200 new_rtx = force_const_mem (Pmode, new_rtx);
4201 temp = gen_reg_rtx (Pmode);
4202 emit_move_insn (temp, new_rtx);
4203
4204 new_rtx = temp;
4205 new_rtx = gen_const_mem (Pmode, new_rtx);
4206 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4207 temp = gen_reg_rtx (Pmode);
4208 emit_insn (gen_rtx_SET (temp, new_rtx));
4209 }
4210
4211 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4212 if (reg != 0)
4213 {
4214 s390_load_address (reg, new_rtx);
4215 new_rtx = reg;
4216 }
4217 break;
4218
4219 case TLS_MODEL_LOCAL_EXEC:
4220 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4221 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4222 new_rtx = force_const_mem (Pmode, new_rtx);
4223 temp = gen_reg_rtx (Pmode);
4224 emit_move_insn (temp, new_rtx);
4225
4226 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4227 if (reg != 0)
4228 {
4229 s390_load_address (reg, new_rtx);
4230 new_rtx = reg;
4231 }
4232 break;
4233
4234 default:
4235 gcc_unreachable ();
4236 }
4237
4238 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4239 {
4240 switch (XINT (XEXP (addr, 0), 1))
4241 {
4242 case UNSPEC_INDNTPOFF:
4243 gcc_assert (TARGET_CPU_ZARCH);
4244 new_rtx = addr;
4245 break;
4246
4247 default:
4248 gcc_unreachable ();
4249 }
4250 }
4251
4252 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4253 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4254 {
4255 new_rtx = XEXP (XEXP (addr, 0), 0);
4256 if (GET_CODE (new_rtx) != SYMBOL_REF)
4257 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4258
4259 new_rtx = legitimize_tls_address (new_rtx, reg);
4260 new_rtx = plus_constant (Pmode, new_rtx,
4261 INTVAL (XEXP (XEXP (addr, 0), 1)));
4262 new_rtx = force_operand (new_rtx, 0);
4263 }
4264
4265 else
4266 gcc_unreachable (); /* for now ... */
4267
4268 return new_rtx;
4269 }
4270
4271 /* Emit insns making the address in operands[1] valid for a standard
4272 move to operands[0]. operands[1] is replaced by an address which
4273 should be used instead of the former RTX to emit the move
4274 pattern. */
4275
4276 void
4277 emit_symbolic_move (rtx *operands)
4278 {
4279 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4280
4281 if (GET_CODE (operands[0]) == MEM)
4282 operands[1] = force_reg (Pmode, operands[1]);
4283 else if (TLS_SYMBOLIC_CONST (operands[1]))
4284 operands[1] = legitimize_tls_address (operands[1], temp);
4285 else if (flag_pic)
4286 operands[1] = legitimize_pic_address (operands[1], temp);
4287 }
4288
4289 /* Try machine-dependent ways of modifying an illegitimate address X
4290 to be legitimate. If we find one, return the new, valid address.
4291
4292 OLDX is the address as it was before break_out_memory_refs was called.
4293 In some cases it is useful to look at this to decide what needs to be done.
4294
4295 MODE is the mode of the operand pointed to by X.
4296
4297 When -fpic is used, special handling is needed for symbolic references.
4298 See comments by legitimize_pic_address for details. */
4299
4300 static rtx
4301 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4302 machine_mode mode ATTRIBUTE_UNUSED)
4303 {
4304 rtx constant_term = const0_rtx;
4305
4306 if (TLS_SYMBOLIC_CONST (x))
4307 {
4308 x = legitimize_tls_address (x, 0);
4309
4310 if (s390_legitimate_address_p (mode, x, FALSE))
4311 return x;
4312 }
4313 else if (GET_CODE (x) == PLUS
4314 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4315 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4316 {
4317 return x;
4318 }
4319 else if (flag_pic)
4320 {
4321 if (SYMBOLIC_CONST (x)
4322 || (GET_CODE (x) == PLUS
4323 && (SYMBOLIC_CONST (XEXP (x, 0))
4324 || SYMBOLIC_CONST (XEXP (x, 1)))))
4325 x = legitimize_pic_address (x, 0);
4326
4327 if (s390_legitimate_address_p (mode, x, FALSE))
4328 return x;
4329 }
4330
4331 x = eliminate_constant_term (x, &constant_term);
4332
4333 /* Optimize loading of large displacements by splitting them
4334 into the multiple of 4K and the rest; this allows the
4335 former to be CSE'd if possible.
4336
4337 Don't do this if the displacement is added to a register
4338 pointing into the stack frame, as the offsets will
4339 change later anyway. */
4340
4341 if (GET_CODE (constant_term) == CONST_INT
4342 && !TARGET_LONG_DISPLACEMENT
4343 && !DISP_IN_RANGE (INTVAL (constant_term))
4344 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4345 {
4346 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4347 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4348
4349 rtx temp = gen_reg_rtx (Pmode);
4350 rtx val = force_operand (GEN_INT (upper), temp);
4351 if (val != temp)
4352 emit_move_insn (temp, val);
4353
4354 x = gen_rtx_PLUS (Pmode, x, temp);
4355 constant_term = GEN_INT (lower);
4356 }
4357
4358 if (GET_CODE (x) == PLUS)
4359 {
4360 if (GET_CODE (XEXP (x, 0)) == REG)
4361 {
4362 rtx temp = gen_reg_rtx (Pmode);
4363 rtx val = force_operand (XEXP (x, 1), temp);
4364 if (val != temp)
4365 emit_move_insn (temp, val);
4366
4367 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4368 }
4369
4370 else if (GET_CODE (XEXP (x, 1)) == REG)
4371 {
4372 rtx temp = gen_reg_rtx (Pmode);
4373 rtx val = force_operand (XEXP (x, 0), temp);
4374 if (val != temp)
4375 emit_move_insn (temp, val);
4376
4377 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4378 }
4379 }
4380
4381 if (constant_term != const0_rtx)
4382 x = gen_rtx_PLUS (Pmode, x, constant_term);
4383
4384 return x;
4385 }
4386
4387 /* Try a machine-dependent way of reloading an illegitimate address AD
4388 operand. If we find one, push the reload and return the new address.
4389
4390 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4391 and TYPE is the reload type of the current reload. */
4392
4393 rtx
4394 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
4395 int opnum, int type)
4396 {
4397 if (!optimize || TARGET_LONG_DISPLACEMENT)
4398 return NULL_RTX;
4399
4400 if (GET_CODE (ad) == PLUS)
4401 {
4402 rtx tem = simplify_binary_operation (PLUS, Pmode,
4403 XEXP (ad, 0), XEXP (ad, 1));
4404 if (tem)
4405 ad = tem;
4406 }
4407
4408 if (GET_CODE (ad) == PLUS
4409 && GET_CODE (XEXP (ad, 0)) == REG
4410 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4411 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4412 {
4413 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4414 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4415 rtx cst, tem, new_rtx;
4416
4417 cst = GEN_INT (upper);
4418 if (!legitimate_reload_constant_p (cst))
4419 cst = force_const_mem (Pmode, cst);
4420
4421 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4422 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4423
4424 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4425 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4426 opnum, (enum reload_type) type);
4427 return new_rtx;
4428 }
4429
4430 return NULL_RTX;
4431 }
4432
4433 /* Emit code to move LEN bytes from DST to SRC. */
4434
4435 bool
4436 s390_expand_movmem (rtx dst, rtx src, rtx len)
4437 {
4438 /* When tuning for z10 or higher we rely on the Glibc functions to
4439 do the right thing. Only for constant lengths below 64k we will
4440 generate inline code. */
4441 if (s390_tune >= PROCESSOR_2097_Z10
4442 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4443 return false;
4444
4445 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4446 {
4447 if (INTVAL (len) > 0)
4448 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4449 }
4450
4451 else if (TARGET_MVCLE)
4452 {
4453 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4454 }
4455
4456 else
4457 {
4458 rtx dst_addr, src_addr, count, blocks, temp;
4459 rtx_code_label *loop_start_label = gen_label_rtx ();
4460 rtx_code_label *loop_end_label = gen_label_rtx ();
4461 rtx_code_label *end_label = gen_label_rtx ();
4462 machine_mode mode;
4463
4464 mode = GET_MODE (len);
4465 if (mode == VOIDmode)
4466 mode = Pmode;
4467
4468 dst_addr = gen_reg_rtx (Pmode);
4469 src_addr = gen_reg_rtx (Pmode);
4470 count = gen_reg_rtx (mode);
4471 blocks = gen_reg_rtx (mode);
4472
4473 convert_move (count, len, 1);
4474 emit_cmp_and_jump_insns (count, const0_rtx,
4475 EQ, NULL_RTX, mode, 1, end_label);
4476
4477 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4478 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4479 dst = change_address (dst, VOIDmode, dst_addr);
4480 src = change_address (src, VOIDmode, src_addr);
4481
4482 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4483 OPTAB_DIRECT);
4484 if (temp != count)
4485 emit_move_insn (count, temp);
4486
4487 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4488 OPTAB_DIRECT);
4489 if (temp != blocks)
4490 emit_move_insn (blocks, temp);
4491
4492 emit_cmp_and_jump_insns (blocks, const0_rtx,
4493 EQ, NULL_RTX, mode, 1, loop_end_label);
4494
4495 emit_label (loop_start_label);
4496
4497 if (TARGET_Z10
4498 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4499 {
4500 rtx prefetch;
4501
4502 /* Issue a read prefetch for the +3 cache line. */
4503 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4504 const0_rtx, const0_rtx);
4505 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4506 emit_insn (prefetch);
4507
4508 /* Issue a write prefetch for the +3 cache line. */
4509 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4510 const1_rtx, const0_rtx);
4511 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4512 emit_insn (prefetch);
4513 }
4514
4515 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4516 s390_load_address (dst_addr,
4517 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4518 s390_load_address (src_addr,
4519 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4520
4521 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4522 OPTAB_DIRECT);
4523 if (temp != blocks)
4524 emit_move_insn (blocks, temp);
4525
4526 emit_cmp_and_jump_insns (blocks, const0_rtx,
4527 EQ, NULL_RTX, mode, 1, loop_end_label);
4528
4529 emit_jump (loop_start_label);
4530 emit_label (loop_end_label);
4531
4532 emit_insn (gen_movmem_short (dst, src,
4533 convert_to_mode (Pmode, count, 1)));
4534 emit_label (end_label);
4535 }
4536 return true;
4537 }
4538
4539 /* Emit code to set LEN bytes at DST to VAL.
4540 Make use of clrmem if VAL is zero. */
4541
4542 void
4543 s390_expand_setmem (rtx dst, rtx len, rtx val)
4544 {
4545 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4546 return;
4547
4548 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4549
4550 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4551 {
4552 if (val == const0_rtx && INTVAL (len) <= 256)
4553 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4554 else
4555 {
4556 /* Initialize memory by storing the first byte. */
4557 emit_move_insn (adjust_address (dst, QImode, 0), val);
4558
4559 if (INTVAL (len) > 1)
4560 {
4561 /* Initiate 1 byte overlap move.
4562 The first byte of DST is propagated through DSTP1.
4563 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4564 DST is set to size 1 so the rest of the memory location
4565 does not count as source operand. */
4566 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4567 set_mem_size (dst, 1);
4568
4569 emit_insn (gen_movmem_short (dstp1, dst,
4570 GEN_INT (INTVAL (len) - 2)));
4571 }
4572 }
4573 }
4574
4575 else if (TARGET_MVCLE)
4576 {
4577 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4578 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4579 }
4580
4581 else
4582 {
4583 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4584 rtx_code_label *loop_start_label = gen_label_rtx ();
4585 rtx_code_label *loop_end_label = gen_label_rtx ();
4586 rtx_code_label *end_label = gen_label_rtx ();
4587 machine_mode mode;
4588
4589 mode = GET_MODE (len);
4590 if (mode == VOIDmode)
4591 mode = Pmode;
4592
4593 dst_addr = gen_reg_rtx (Pmode);
4594 count = gen_reg_rtx (mode);
4595 blocks = gen_reg_rtx (mode);
4596
4597 convert_move (count, len, 1);
4598 emit_cmp_and_jump_insns (count, const0_rtx,
4599 EQ, NULL_RTX, mode, 1, end_label);
4600
4601 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4602 dst = change_address (dst, VOIDmode, dst_addr);
4603
4604 if (val == const0_rtx)
4605 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4606 OPTAB_DIRECT);
4607 else
4608 {
4609 dstp1 = adjust_address (dst, VOIDmode, 1);
4610 set_mem_size (dst, 1);
4611
4612 /* Initialize memory by storing the first byte. */
4613 emit_move_insn (adjust_address (dst, QImode, 0), val);
4614
4615 /* If count is 1 we are done. */
4616 emit_cmp_and_jump_insns (count, const1_rtx,
4617 EQ, NULL_RTX, mode, 1, end_label);
4618
4619 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4620 OPTAB_DIRECT);
4621 }
4622 if (temp != count)
4623 emit_move_insn (count, temp);
4624
4625 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4626 OPTAB_DIRECT);
4627 if (temp != blocks)
4628 emit_move_insn (blocks, temp);
4629
4630 emit_cmp_and_jump_insns (blocks, const0_rtx,
4631 EQ, NULL_RTX, mode, 1, loop_end_label);
4632
4633 emit_label (loop_start_label);
4634
4635 if (TARGET_Z10
4636 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4637 {
4638 /* Issue a write prefetch for the +4 cache line. */
4639 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4640 GEN_INT (1024)),
4641 const1_rtx, const0_rtx);
4642 emit_insn (prefetch);
4643 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4644 }
4645
4646 if (val == const0_rtx)
4647 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4648 else
4649 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4650 s390_load_address (dst_addr,
4651 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4652
4653 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4654 OPTAB_DIRECT);
4655 if (temp != blocks)
4656 emit_move_insn (blocks, temp);
4657
4658 emit_cmp_and_jump_insns (blocks, const0_rtx,
4659 EQ, NULL_RTX, mode, 1, loop_end_label);
4660
4661 emit_jump (loop_start_label);
4662 emit_label (loop_end_label);
4663
4664 if (val == const0_rtx)
4665 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4666 else
4667 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4668 emit_label (end_label);
4669 }
4670 }
4671
4672 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4673 and return the result in TARGET. */
4674
4675 bool
4676 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4677 {
4678 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4679 rtx tmp;
4680
4681 /* When tuning for z10 or higher we rely on the Glibc functions to
4682 do the right thing. Only for constant lengths below 64k we will
4683 generate inline code. */
4684 if (s390_tune >= PROCESSOR_2097_Z10
4685 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4686 return false;
4687
4688 /* As the result of CMPINT is inverted compared to what we need,
4689 we have to swap the operands. */
4690 tmp = op0; op0 = op1; op1 = tmp;
4691
4692 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4693 {
4694 if (INTVAL (len) > 0)
4695 {
4696 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4697 emit_insn (gen_cmpint (target, ccreg));
4698 }
4699 else
4700 emit_move_insn (target, const0_rtx);
4701 }
4702 else if (TARGET_MVCLE)
4703 {
4704 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4705 emit_insn (gen_cmpint (target, ccreg));
4706 }
4707 else
4708 {
4709 rtx addr0, addr1, count, blocks, temp;
4710 rtx_code_label *loop_start_label = gen_label_rtx ();
4711 rtx_code_label *loop_end_label = gen_label_rtx ();
4712 rtx_code_label *end_label = gen_label_rtx ();
4713 machine_mode mode;
4714
4715 mode = GET_MODE (len);
4716 if (mode == VOIDmode)
4717 mode = Pmode;
4718
4719 addr0 = gen_reg_rtx (Pmode);
4720 addr1 = gen_reg_rtx (Pmode);
4721 count = gen_reg_rtx (mode);
4722 blocks = gen_reg_rtx (mode);
4723
4724 convert_move (count, len, 1);
4725 emit_cmp_and_jump_insns (count, const0_rtx,
4726 EQ, NULL_RTX, mode, 1, end_label);
4727
4728 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4729 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4730 op0 = change_address (op0, VOIDmode, addr0);
4731 op1 = change_address (op1, VOIDmode, addr1);
4732
4733 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4734 OPTAB_DIRECT);
4735 if (temp != count)
4736 emit_move_insn (count, temp);
4737
4738 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4739 OPTAB_DIRECT);
4740 if (temp != blocks)
4741 emit_move_insn (blocks, temp);
4742
4743 emit_cmp_and_jump_insns (blocks, const0_rtx,
4744 EQ, NULL_RTX, mode, 1, loop_end_label);
4745
4746 emit_label (loop_start_label);
4747
4748 if (TARGET_Z10
4749 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4750 {
4751 rtx prefetch;
4752
4753 /* Issue a read prefetch for the +2 cache line of operand 1. */
4754 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4755 const0_rtx, const0_rtx);
4756 emit_insn (prefetch);
4757 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4758
4759 /* Issue a read prefetch for the +2 cache line of operand 2. */
4760 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4761 const0_rtx, const0_rtx);
4762 emit_insn (prefetch);
4763 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4764 }
4765
4766 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4767 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4768 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4769 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4770 temp = gen_rtx_SET (pc_rtx, temp);
4771 emit_jump_insn (temp);
4772
4773 s390_load_address (addr0,
4774 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4775 s390_load_address (addr1,
4776 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4777
4778 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4779 OPTAB_DIRECT);
4780 if (temp != blocks)
4781 emit_move_insn (blocks, temp);
4782
4783 emit_cmp_and_jump_insns (blocks, const0_rtx,
4784 EQ, NULL_RTX, mode, 1, loop_end_label);
4785
4786 emit_jump (loop_start_label);
4787 emit_label (loop_end_label);
4788
4789 emit_insn (gen_cmpmem_short (op0, op1,
4790 convert_to_mode (Pmode, count, 1)));
4791 emit_label (end_label);
4792
4793 emit_insn (gen_cmpint (target, ccreg));
4794 }
4795 return true;
4796 }
4797
4798 /* Emit a conditional jump to LABEL for condition code mask MASK using
4799 comparsion operator COMPARISON. Return the emitted jump insn. */
4800
4801 static rtx
4802 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
4803 {
4804 rtx temp;
4805
4806 gcc_assert (comparison == EQ || comparison == NE);
4807 gcc_assert (mask > 0 && mask < 15);
4808
4809 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
4810 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
4811 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4812 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
4813 temp = gen_rtx_SET (pc_rtx, temp);
4814 return emit_jump_insn (temp);
4815 }
4816
4817 /* Emit the instructions to implement strlen of STRING and store the
4818 result in TARGET. The string has the known ALIGNMENT. This
4819 version uses vector instructions and is therefore not appropriate
4820 for targets prior to z13. */
4821
4822 void
4823 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
4824 {
4825 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4826 int very_likely = REG_BR_PROB_BASE - 1;
4827 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
4828 rtx str_reg = gen_reg_rtx (V16QImode);
4829 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
4830 rtx str_idx_reg = gen_reg_rtx (Pmode);
4831 rtx result_reg = gen_reg_rtx (V16QImode);
4832 rtx is_aligned_label = gen_label_rtx ();
4833 rtx into_loop_label = NULL_RTX;
4834 rtx loop_start_label = gen_label_rtx ();
4835 rtx temp;
4836 rtx len = gen_reg_rtx (QImode);
4837 rtx cond;
4838
4839 s390_load_address (str_addr_base_reg, XEXP (string, 0));
4840 emit_move_insn (str_idx_reg, const0_rtx);
4841
4842 if (INTVAL (alignment) < 16)
4843 {
4844 /* Check whether the address happens to be aligned properly so
4845 jump directly to the aligned loop. */
4846 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
4847 str_addr_base_reg, GEN_INT (15)),
4848 const0_rtx, EQ, NULL_RTX,
4849 Pmode, 1, is_aligned_label);
4850
4851 temp = gen_reg_rtx (Pmode);
4852 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
4853 GEN_INT (15), temp, 1, OPTAB_DIRECT);
4854 gcc_assert (REG_P (temp));
4855 highest_index_to_load_reg =
4856 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
4857 highest_index_to_load_reg, 1, OPTAB_DIRECT);
4858 gcc_assert (REG_P (highest_index_to_load_reg));
4859 emit_insn (gen_vllv16qi (str_reg,
4860 convert_to_mode (SImode, highest_index_to_load_reg, 1),
4861 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
4862
4863 into_loop_label = gen_label_rtx ();
4864 s390_emit_jump (into_loop_label, NULL_RTX);
4865 emit_barrier ();
4866 }
4867
4868 emit_label (is_aligned_label);
4869 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
4870
4871 /* Reaching this point we are only performing 16 bytes aligned
4872 loads. */
4873 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
4874
4875 emit_label (loop_start_label);
4876 LABEL_NUSES (loop_start_label) = 1;
4877
4878 /* Load 16 bytes of the string into VR. */
4879 emit_move_insn (str_reg,
4880 gen_rtx_MEM (V16QImode,
4881 gen_rtx_PLUS (Pmode, str_idx_reg,
4882 str_addr_base_reg)));
4883 if (into_loop_label != NULL_RTX)
4884 {
4885 emit_label (into_loop_label);
4886 LABEL_NUSES (into_loop_label) = 1;
4887 }
4888
4889 /* Increment string index by 16 bytes. */
4890 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
4891 str_idx_reg, 1, OPTAB_DIRECT);
4892
4893 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
4894 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
4895
4896 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
4897 REG_BR_PROB, very_likely);
4898 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
4899
4900 /* If the string pointer wasn't aligned we have loaded less then 16
4901 bytes and the remaining bytes got filled with zeros (by vll).
4902 Now we have to check whether the resulting index lies within the
4903 bytes actually part of the string. */
4904
4905 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
4906 highest_index_to_load_reg);
4907 s390_load_address (highest_index_to_load_reg,
4908 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
4909 const1_rtx));
4910 if (TARGET_64BIT)
4911 emit_insn (gen_movdicc (str_idx_reg, cond,
4912 highest_index_to_load_reg, str_idx_reg));
4913 else
4914 emit_insn (gen_movsicc (str_idx_reg, cond,
4915 highest_index_to_load_reg, str_idx_reg));
4916
4917 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
4918 very_unlikely);
4919
4920 expand_binop (Pmode, add_optab, str_idx_reg,
4921 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
4922 /* FIXME: len is already zero extended - so avoid the llgcr emitted
4923 here. */
4924 temp = expand_binop (Pmode, add_optab, str_idx_reg,
4925 convert_to_mode (Pmode, len, 1),
4926 target, 1, OPTAB_DIRECT);
4927 if (temp != target)
4928 emit_move_insn (target, temp);
4929 }
4930
4931 /* Expand conditional increment or decrement using alc/slb instructions.
4932 Should generate code setting DST to either SRC or SRC + INCREMENT,
4933 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4934 Returns true if successful, false otherwise.
4935
4936 That makes it possible to implement some if-constructs without jumps e.g.:
4937 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4938 unsigned int a, b, c;
4939 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4940 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4941 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4942 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4943
4944 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4945 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4946 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4947 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4948 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4949
4950 bool
4951 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4952 rtx dst, rtx src, rtx increment)
4953 {
4954 machine_mode cmp_mode;
4955 machine_mode cc_mode;
4956 rtx op_res;
4957 rtx insn;
4958 rtvec p;
4959 int ret;
4960
4961 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4962 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4963 cmp_mode = SImode;
4964 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4965 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4966 cmp_mode = DImode;
4967 else
4968 return false;
4969
4970 /* Try ADD LOGICAL WITH CARRY. */
4971 if (increment == const1_rtx)
4972 {
4973 /* Determine CC mode to use. */
4974 if (cmp_code == EQ || cmp_code == NE)
4975 {
4976 if (cmp_op1 != const0_rtx)
4977 {
4978 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4979 NULL_RTX, 0, OPTAB_WIDEN);
4980 cmp_op1 = const0_rtx;
4981 }
4982
4983 cmp_code = cmp_code == EQ ? LEU : GTU;
4984 }
4985
4986 if (cmp_code == LTU || cmp_code == LEU)
4987 {
4988 rtx tem = cmp_op0;
4989 cmp_op0 = cmp_op1;
4990 cmp_op1 = tem;
4991 cmp_code = swap_condition (cmp_code);
4992 }
4993
4994 switch (cmp_code)
4995 {
4996 case GTU:
4997 cc_mode = CCUmode;
4998 break;
4999
5000 case GEU:
5001 cc_mode = CCL3mode;
5002 break;
5003
5004 default:
5005 return false;
5006 }
5007
5008 /* Emit comparison instruction pattern. */
5009 if (!register_operand (cmp_op0, cmp_mode))
5010 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5011
5012 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5013 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5014 /* We use insn_invalid_p here to add clobbers if required. */
5015 ret = insn_invalid_p (emit_insn (insn), false);
5016 gcc_assert (!ret);
5017
5018 /* Emit ALC instruction pattern. */
5019 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5020 gen_rtx_REG (cc_mode, CC_REGNUM),
5021 const0_rtx);
5022
5023 if (src != const0_rtx)
5024 {
5025 if (!register_operand (src, GET_MODE (dst)))
5026 src = force_reg (GET_MODE (dst), src);
5027
5028 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5029 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5030 }
5031
5032 p = rtvec_alloc (2);
5033 RTVEC_ELT (p, 0) =
5034 gen_rtx_SET (dst, op_res);
5035 RTVEC_ELT (p, 1) =
5036 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5037 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5038
5039 return true;
5040 }
5041
5042 /* Try SUBTRACT LOGICAL WITH BORROW. */
5043 if (increment == constm1_rtx)
5044 {
5045 /* Determine CC mode to use. */
5046 if (cmp_code == EQ || cmp_code == NE)
5047 {
5048 if (cmp_op1 != const0_rtx)
5049 {
5050 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5051 NULL_RTX, 0, OPTAB_WIDEN);
5052 cmp_op1 = const0_rtx;
5053 }
5054
5055 cmp_code = cmp_code == EQ ? LEU : GTU;
5056 }
5057
5058 if (cmp_code == GTU || cmp_code == GEU)
5059 {
5060 rtx tem = cmp_op0;
5061 cmp_op0 = cmp_op1;
5062 cmp_op1 = tem;
5063 cmp_code = swap_condition (cmp_code);
5064 }
5065
5066 switch (cmp_code)
5067 {
5068 case LEU:
5069 cc_mode = CCUmode;
5070 break;
5071
5072 case LTU:
5073 cc_mode = CCL3mode;
5074 break;
5075
5076 default:
5077 return false;
5078 }
5079
5080 /* Emit comparison instruction pattern. */
5081 if (!register_operand (cmp_op0, cmp_mode))
5082 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5083
5084 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5085 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5086 /* We use insn_invalid_p here to add clobbers if required. */
5087 ret = insn_invalid_p (emit_insn (insn), false);
5088 gcc_assert (!ret);
5089
5090 /* Emit SLB instruction pattern. */
5091 if (!register_operand (src, GET_MODE (dst)))
5092 src = force_reg (GET_MODE (dst), src);
5093
5094 op_res = gen_rtx_MINUS (GET_MODE (dst),
5095 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5096 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5097 gen_rtx_REG (cc_mode, CC_REGNUM),
5098 const0_rtx));
5099 p = rtvec_alloc (2);
5100 RTVEC_ELT (p, 0) =
5101 gen_rtx_SET (dst, op_res);
5102 RTVEC_ELT (p, 1) =
5103 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5104 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5105
5106 return true;
5107 }
5108
5109 return false;
5110 }
5111
5112 /* Expand code for the insv template. Return true if successful. */
5113
5114 bool
5115 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5116 {
5117 int bitsize = INTVAL (op1);
5118 int bitpos = INTVAL (op2);
5119 machine_mode mode = GET_MODE (dest);
5120 machine_mode smode;
5121 int smode_bsize, mode_bsize;
5122 rtx op, clobber;
5123
5124 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5125 return false;
5126
5127 /* Generate INSERT IMMEDIATE (IILL et al). */
5128 /* (set (ze (reg)) (const_int)). */
5129 if (TARGET_ZARCH
5130 && register_operand (dest, word_mode)
5131 && (bitpos % 16) == 0
5132 && (bitsize % 16) == 0
5133 && const_int_operand (src, VOIDmode))
5134 {
5135 HOST_WIDE_INT val = INTVAL (src);
5136 int regpos = bitpos + bitsize;
5137
5138 while (regpos > bitpos)
5139 {
5140 machine_mode putmode;
5141 int putsize;
5142
5143 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5144 putmode = SImode;
5145 else
5146 putmode = HImode;
5147
5148 putsize = GET_MODE_BITSIZE (putmode);
5149 regpos -= putsize;
5150 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5151 GEN_INT (putsize),
5152 GEN_INT (regpos)),
5153 gen_int_mode (val, putmode));
5154 val >>= putsize;
5155 }
5156 gcc_assert (regpos == bitpos);
5157 return true;
5158 }
5159
5160 smode = smallest_mode_for_size (bitsize, MODE_INT);
5161 smode_bsize = GET_MODE_BITSIZE (smode);
5162 mode_bsize = GET_MODE_BITSIZE (mode);
5163
5164 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5165 if (bitpos == 0
5166 && (bitsize % BITS_PER_UNIT) == 0
5167 && MEM_P (dest)
5168 && (register_operand (src, word_mode)
5169 || const_int_operand (src, VOIDmode)))
5170 {
5171 /* Emit standard pattern if possible. */
5172 if (smode_bsize == bitsize)
5173 {
5174 emit_move_insn (adjust_address (dest, smode, 0),
5175 gen_lowpart (smode, src));
5176 return true;
5177 }
5178
5179 /* (set (ze (mem)) (const_int)). */
5180 else if (const_int_operand (src, VOIDmode))
5181 {
5182 int size = bitsize / BITS_PER_UNIT;
5183 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5184 BLKmode,
5185 UNITS_PER_WORD - size);
5186
5187 dest = adjust_address (dest, BLKmode, 0);
5188 set_mem_size (dest, size);
5189 s390_expand_movmem (dest, src_mem, GEN_INT (size));
5190 return true;
5191 }
5192
5193 /* (set (ze (mem)) (reg)). */
5194 else if (register_operand (src, word_mode))
5195 {
5196 if (bitsize <= 32)
5197 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5198 const0_rtx), src);
5199 else
5200 {
5201 /* Emit st,stcmh sequence. */
5202 int stcmh_width = bitsize - 32;
5203 int size = stcmh_width / BITS_PER_UNIT;
5204
5205 emit_move_insn (adjust_address (dest, SImode, size),
5206 gen_lowpart (SImode, src));
5207 set_mem_size (dest, size);
5208 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5209 GEN_INT (stcmh_width),
5210 const0_rtx),
5211 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5212 }
5213 return true;
5214 }
5215 }
5216
5217 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
5218 if ((bitpos % BITS_PER_UNIT) == 0
5219 && (bitsize % BITS_PER_UNIT) == 0
5220 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5221 && MEM_P (src)
5222 && (mode == DImode || mode == SImode)
5223 && register_operand (dest, mode))
5224 {
5225 /* Emit a strict_low_part pattern if possible. */
5226 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5227 {
5228 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5229 op = gen_rtx_SET (op, gen_lowpart (smode, src));
5230 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5231 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5232 return true;
5233 }
5234
5235 /* ??? There are more powerful versions of ICM that are not
5236 completely represented in the md file. */
5237 }
5238
5239 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
5240 if (TARGET_Z10 && (mode == DImode || mode == SImode))
5241 {
5242 machine_mode mode_s = GET_MODE (src);
5243
5244 if (mode_s == VOIDmode)
5245 {
5246 /* Assume const_int etc already in the proper mode. */
5247 src = force_reg (mode, src);
5248 }
5249 else if (mode_s != mode)
5250 {
5251 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
5252 src = force_reg (mode_s, src);
5253 src = gen_lowpart (mode, src);
5254 }
5255
5256 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
5257 op = gen_rtx_SET (op, src);
5258
5259 if (!TARGET_ZEC12)
5260 {
5261 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5262 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
5263 }
5264 emit_insn (op);
5265
5266 return true;
5267 }
5268
5269 return false;
5270 }
5271
5272 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
5273 register that holds VAL of mode MODE shifted by COUNT bits. */
5274
5275 static inline rtx
5276 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
5277 {
5278 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
5279 NULL_RTX, 1, OPTAB_DIRECT);
5280 return expand_simple_binop (SImode, ASHIFT, val, count,
5281 NULL_RTX, 1, OPTAB_DIRECT);
5282 }
5283
5284 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
5285 the result in TARGET. */
5286
5287 void
5288 s390_expand_vec_compare (rtx target, enum rtx_code cond,
5289 rtx cmp_op1, rtx cmp_op2)
5290 {
5291 machine_mode mode = GET_MODE (target);
5292 bool neg_p = false, swap_p = false;
5293 rtx tmp;
5294
5295 if (GET_MODE (cmp_op1) == V2DFmode)
5296 {
5297 switch (cond)
5298 {
5299 /* NE a != b -> !(a == b) */
5300 case NE: cond = EQ; neg_p = true; break;
5301 /* UNGT a u> b -> !(b >= a) */
5302 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
5303 /* UNGE a u>= b -> !(b > a) */
5304 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
5305 /* LE: a <= b -> b >= a */
5306 case LE: cond = GE; swap_p = true; break;
5307 /* UNLE: a u<= b -> !(a > b) */
5308 case UNLE: cond = GT; neg_p = true; break;
5309 /* LT: a < b -> b > a */
5310 case LT: cond = GT; swap_p = true; break;
5311 /* UNLT: a u< b -> !(a >= b) */
5312 case UNLT: cond = GE; neg_p = true; break;
5313 case UNEQ:
5314 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
5315 return;
5316 case LTGT:
5317 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
5318 return;
5319 case ORDERED:
5320 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
5321 return;
5322 case UNORDERED:
5323 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
5324 return;
5325 default: break;
5326 }
5327 }
5328 else
5329 {
5330 switch (cond)
5331 {
5332 /* NE: a != b -> !(a == b) */
5333 case NE: cond = EQ; neg_p = true; break;
5334 /* GE: a >= b -> !(b > a) */
5335 case GE: cond = GT; neg_p = true; swap_p = true; break;
5336 /* GEU: a >= b -> !(b > a) */
5337 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
5338 /* LE: a <= b -> !(a > b) */
5339 case LE: cond = GT; neg_p = true; break;
5340 /* LEU: a <= b -> !(a > b) */
5341 case LEU: cond = GTU; neg_p = true; break;
5342 /* LT: a < b -> b > a */
5343 case LT: cond = GT; swap_p = true; break;
5344 /* LTU: a < b -> b > a */
5345 case LTU: cond = GTU; swap_p = true; break;
5346 default: break;
5347 }
5348 }
5349
5350 if (swap_p)
5351 {
5352 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
5353 }
5354
5355 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
5356 mode,
5357 cmp_op1, cmp_op2)));
5358 if (neg_p)
5359 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
5360 }
5361
5362 /* Generate a vector comparison expression loading either elements of
5363 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
5364 and CMP_OP2. */
5365
5366 void
5367 s390_expand_vcond (rtx target, rtx then, rtx els,
5368 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
5369 {
5370 rtx tmp;
5371 machine_mode result_mode;
5372 rtx result_target;
5373
5374 /* We always use an integral type vector to hold the comparison
5375 result. */
5376 result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
5377 result_target = gen_reg_rtx (result_mode);
5378
5379 /* Alternatively this could be done by reload by lowering the cmp*
5380 predicates. But it appears to be better for scheduling etc. to
5381 have that in early. */
5382 if (!REG_P (cmp_op1))
5383 cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
5384
5385 if (!REG_P (cmp_op2))
5386 cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
5387
5388 s390_expand_vec_compare (result_target, cond,
5389 cmp_op1, cmp_op2);
5390
5391 /* If the results are supposed to be either -1 or 0 we are done
5392 since this is what our compare instructions generate anyway. */
5393 if (constm1_operand (then, GET_MODE (then))
5394 && const0_operand (els, GET_MODE (els)))
5395 {
5396 emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
5397 result_target, 0));
5398 return;
5399 }
5400
5401 /* Otherwise we will do a vsel afterwards. */
5402 /* This gets triggered e.g.
5403 with gcc.c-torture/compile/pr53410-1.c */
5404 if (!REG_P (then))
5405 then = force_reg (GET_MODE (target), then);
5406
5407 if (!REG_P (els))
5408 els = force_reg (GET_MODE (target), els);
5409
5410 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
5411 result_target,
5412 CONST0_RTX (result_mode));
5413
5414 /* We compared the result against zero above so we have to swap then
5415 and els here. */
5416 tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
5417
5418 gcc_assert (GET_MODE (target) == GET_MODE (then));
5419 emit_insn (gen_rtx_SET (target, tmp));
5420 }
5421
5422 /* Emit the RTX necessary to initialize the vector TARGET with values
5423 in VALS. */
5424 void
5425 s390_expand_vec_init (rtx target, rtx vals)
5426 {
5427 machine_mode mode = GET_MODE (target);
5428 machine_mode inner_mode = GET_MODE_INNER (mode);
5429 int n_elts = GET_MODE_NUNITS (mode);
5430 bool all_same = true, all_regs = true, all_const_int = true;
5431 rtx x;
5432 int i;
5433
5434 for (i = 0; i < n_elts; ++i)
5435 {
5436 x = XVECEXP (vals, 0, i);
5437
5438 if (!CONST_INT_P (x))
5439 all_const_int = false;
5440
5441 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5442 all_same = false;
5443
5444 if (!REG_P (x))
5445 all_regs = false;
5446 }
5447
5448 /* Use vector gen mask or vector gen byte mask if possible. */
5449 if (all_same && all_const_int
5450 && (XVECEXP (vals, 0, 0) == const0_rtx
5451 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
5452 NULL, NULL)
5453 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
5454 {
5455 emit_insn (gen_rtx_SET (target,
5456 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
5457 return;
5458 }
5459
5460 if (all_same)
5461 {
5462 emit_insn (gen_rtx_SET (target,
5463 gen_rtx_VEC_DUPLICATE (mode,
5464 XVECEXP (vals, 0, 0))));
5465 return;
5466 }
5467
5468 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
5469 {
5470 /* Use vector load pair. */
5471 emit_insn (gen_rtx_SET (target,
5472 gen_rtx_VEC_CONCAT (mode,
5473 XVECEXP (vals, 0, 0),
5474 XVECEXP (vals, 0, 1))));
5475 return;
5476 }
5477
5478 /* We are about to set the vector elements one by one. Zero out the
5479 full register first in order to help the data flow framework to
5480 detect it as full VR set. */
5481 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
5482
5483 /* Unfortunately the vec_init expander is not allowed to fail. So
5484 we have to implement the fallback ourselves. */
5485 for (i = 0; i < n_elts; i++)
5486 emit_insn (gen_rtx_SET (target,
5487 gen_rtx_UNSPEC (mode,
5488 gen_rtvec (3, XVECEXP (vals, 0, i),
5489 GEN_INT (i), target),
5490 UNSPEC_VEC_SET)));
5491 }
5492
5493 /* Structure to hold the initial parameters for a compare_and_swap operation
5494 in HImode and QImode. */
5495
5496 struct alignment_context
5497 {
5498 rtx memsi; /* SI aligned memory location. */
5499 rtx shift; /* Bit offset with regard to lsb. */
5500 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
5501 rtx modemaski; /* ~modemask */
5502 bool aligned; /* True if memory is aligned, false else. */
5503 };
5504
5505 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
5506 structure AC for transparent simplifying, if the memory alignment is known
5507 to be at least 32bit. MEM is the memory location for the actual operation
5508 and MODE its mode. */
5509
5510 static void
5511 init_alignment_context (struct alignment_context *ac, rtx mem,
5512 machine_mode mode)
5513 {
5514 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
5515 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
5516
5517 if (ac->aligned)
5518 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
5519 else
5520 {
5521 /* Alignment is unknown. */
5522 rtx byteoffset, addr, align;
5523
5524 /* Force the address into a register. */
5525 addr = force_reg (Pmode, XEXP (mem, 0));
5526
5527 /* Align it to SImode. */
5528 align = expand_simple_binop (Pmode, AND, addr,
5529 GEN_INT (-GET_MODE_SIZE (SImode)),
5530 NULL_RTX, 1, OPTAB_DIRECT);
5531 /* Generate MEM. */
5532 ac->memsi = gen_rtx_MEM (SImode, align);
5533 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
5534 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
5535 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
5536
5537 /* Calculate shiftcount. */
5538 byteoffset = expand_simple_binop (Pmode, AND, addr,
5539 GEN_INT (GET_MODE_SIZE (SImode) - 1),
5540 NULL_RTX, 1, OPTAB_DIRECT);
5541 /* As we already have some offset, evaluate the remaining distance. */
5542 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
5543 NULL_RTX, 1, OPTAB_DIRECT);
5544 }
5545
5546 /* Shift is the byte count, but we need the bitcount. */
5547 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
5548 NULL_RTX, 1, OPTAB_DIRECT);
5549
5550 /* Calculate masks. */
5551 ac->modemask = expand_simple_binop (SImode, ASHIFT,
5552 GEN_INT (GET_MODE_MASK (mode)),
5553 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
5554 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
5555 NULL_RTX, 1);
5556 }
5557
5558 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
5559 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
5560 perform the merge in SEQ2. */
5561
5562 static rtx
5563 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
5564 machine_mode mode, rtx val, rtx ins)
5565 {
5566 rtx tmp;
5567
5568 if (ac->aligned)
5569 {
5570 start_sequence ();
5571 tmp = copy_to_mode_reg (SImode, val);
5572 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
5573 const0_rtx, ins))
5574 {
5575 *seq1 = NULL;
5576 *seq2 = get_insns ();
5577 end_sequence ();
5578 return tmp;
5579 }
5580 end_sequence ();
5581 }
5582
5583 /* Failed to use insv. Generate a two part shift and mask. */
5584 start_sequence ();
5585 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
5586 *seq1 = get_insns ();
5587 end_sequence ();
5588
5589 start_sequence ();
5590 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
5591 *seq2 = get_insns ();
5592 end_sequence ();
5593
5594 return tmp;
5595 }
5596
5597 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
5598 the memory location, CMP the old value to compare MEM with and NEW_RTX the
5599 value to set if CMP == MEM. */
5600
5601 void
5602 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
5603 rtx cmp, rtx new_rtx, bool is_weak)
5604 {
5605 struct alignment_context ac;
5606 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
5607 rtx res = gen_reg_rtx (SImode);
5608 rtx_code_label *csloop = NULL, *csend = NULL;
5609
5610 gcc_assert (MEM_P (mem));
5611
5612 init_alignment_context (&ac, mem, mode);
5613
5614 /* Load full word. Subsequent loads are performed by CS. */
5615 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
5616 NULL_RTX, 1, OPTAB_DIRECT);
5617
5618 /* Prepare insertions of cmp and new_rtx into the loaded value. When
5619 possible, we try to use insv to make this happen efficiently. If
5620 that fails we'll generate code both inside and outside the loop. */
5621 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
5622 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
5623
5624 if (seq0)
5625 emit_insn (seq0);
5626 if (seq1)
5627 emit_insn (seq1);
5628
5629 /* Start CS loop. */
5630 if (!is_weak)
5631 {
5632 /* Begin assuming success. */
5633 emit_move_insn (btarget, const1_rtx);
5634
5635 csloop = gen_label_rtx ();
5636 csend = gen_label_rtx ();
5637 emit_label (csloop);
5638 }
5639
5640 /* val = "<mem>00..0<mem>"
5641 * cmp = "00..0<cmp>00..0"
5642 * new = "00..0<new>00..0"
5643 */
5644
5645 emit_insn (seq2);
5646 emit_insn (seq3);
5647
5648 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
5649 if (is_weak)
5650 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
5651 else
5652 {
5653 rtx tmp;
5654
5655 /* Jump to end if we're done (likely?). */
5656 s390_emit_jump (csend, cc);
5657
5658 /* Check for changes outside mode, and loop internal if so.
5659 Arrange the moves so that the compare is adjacent to the
5660 branch so that we can generate CRJ. */
5661 tmp = copy_to_reg (val);
5662 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
5663 1, OPTAB_DIRECT);
5664 cc = s390_emit_compare (NE, val, tmp);
5665 s390_emit_jump (csloop, cc);
5666
5667 /* Failed. */
5668 emit_move_insn (btarget, const0_rtx);
5669 emit_label (csend);
5670 }
5671
5672 /* Return the correct part of the bitfield. */
5673 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
5674 NULL_RTX, 1, OPTAB_DIRECT), 1);
5675 }
5676
5677 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
5678 and VAL the value to play with. If AFTER is true then store the value
5679 MEM holds after the operation, if AFTER is false then store the value MEM
5680 holds before the operation. If TARGET is zero then discard that value, else
5681 store it to TARGET. */
5682
5683 void
5684 s390_expand_atomic (machine_mode mode, enum rtx_code code,
5685 rtx target, rtx mem, rtx val, bool after)
5686 {
5687 struct alignment_context ac;
5688 rtx cmp;
5689 rtx new_rtx = gen_reg_rtx (SImode);
5690 rtx orig = gen_reg_rtx (SImode);
5691 rtx_code_label *csloop = gen_label_rtx ();
5692
5693 gcc_assert (!target || register_operand (target, VOIDmode));
5694 gcc_assert (MEM_P (mem));
5695
5696 init_alignment_context (&ac, mem, mode);
5697
5698 /* Shift val to the correct bit positions.
5699 Preserve "icm", but prevent "ex icm". */
5700 if (!(ac.aligned && code == SET && MEM_P (val)))
5701 val = s390_expand_mask_and_shift (val, mode, ac.shift);
5702
5703 /* Further preparation insns. */
5704 if (code == PLUS || code == MINUS)
5705 emit_move_insn (orig, val);
5706 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
5707 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
5708 NULL_RTX, 1, OPTAB_DIRECT);
5709
5710 /* Load full word. Subsequent loads are performed by CS. */
5711 cmp = force_reg (SImode, ac.memsi);
5712
5713 /* Start CS loop. */
5714 emit_label (csloop);
5715 emit_move_insn (new_rtx, cmp);
5716
5717 /* Patch new with val at correct position. */
5718 switch (code)
5719 {
5720 case PLUS:
5721 case MINUS:
5722 val = expand_simple_binop (SImode, code, new_rtx, orig,
5723 NULL_RTX, 1, OPTAB_DIRECT);
5724 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5725 NULL_RTX, 1, OPTAB_DIRECT);
5726 /* FALLTHRU */
5727 case SET:
5728 if (ac.aligned && MEM_P (val))
5729 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5730 0, 0, SImode, val);
5731 else
5732 {
5733 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5734 NULL_RTX, 1, OPTAB_DIRECT);
5735 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5736 NULL_RTX, 1, OPTAB_DIRECT);
5737 }
5738 break;
5739 case AND:
5740 case IOR:
5741 case XOR:
5742 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5743 NULL_RTX, 1, OPTAB_DIRECT);
5744 break;
5745 case MULT: /* NAND */
5746 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5747 NULL_RTX, 1, OPTAB_DIRECT);
5748 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5749 NULL_RTX, 1, OPTAB_DIRECT);
5750 break;
5751 default:
5752 gcc_unreachable ();
5753 }
5754
5755 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5756 ac.memsi, cmp, new_rtx));
5757
5758 /* Return the correct part of the bitfield. */
5759 if (target)
5760 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5761 after ? new_rtx : cmp, ac.shift,
5762 NULL_RTX, 1, OPTAB_DIRECT), 1);
5763 }
5764
5765 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5766 We need to emit DTP-relative relocations. */
5767
5768 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5769
5770 static void
5771 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5772 {
5773 switch (size)
5774 {
5775 case 4:
5776 fputs ("\t.long\t", file);
5777 break;
5778 case 8:
5779 fputs ("\t.quad\t", file);
5780 break;
5781 default:
5782 gcc_unreachable ();
5783 }
5784 output_addr_const (file, x);
5785 fputs ("@DTPOFF", file);
5786 }
5787
5788 /* Return the proper mode for REGNO being represented in the dwarf
5789 unwind table. */
5790 machine_mode
5791 s390_dwarf_frame_reg_mode (int regno)
5792 {
5793 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
5794
5795 /* The rightmost 64 bits of vector registers are call-clobbered. */
5796 if (GET_MODE_SIZE (save_mode) > 8)
5797 save_mode = DImode;
5798
5799 return save_mode;
5800 }
5801
5802 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5803 /* Implement TARGET_MANGLE_TYPE. */
5804
5805 static const char *
5806 s390_mangle_type (const_tree type)
5807 {
5808 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5809 && TARGET_LONG_DOUBLE_128)
5810 return "g";
5811
5812 /* For all other types, use normal C++ mangling. */
5813 return NULL;
5814 }
5815 #endif
5816
5817 /* In the name of slightly smaller debug output, and to cater to
5818 general assembler lossage, recognize various UNSPEC sequences
5819 and turn them back into a direct symbol reference. */
5820
5821 static rtx
5822 s390_delegitimize_address (rtx orig_x)
5823 {
5824 rtx x, y;
5825
5826 orig_x = delegitimize_mem_from_attrs (orig_x);
5827 x = orig_x;
5828
5829 /* Extract the symbol ref from:
5830 (plus:SI (reg:SI 12 %r12)
5831 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5832 UNSPEC_GOTOFF/PLTOFF)))
5833 and
5834 (plus:SI (reg:SI 12 %r12)
5835 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5836 UNSPEC_GOTOFF/PLTOFF)
5837 (const_int 4 [0x4])))) */
5838 if (GET_CODE (x) == PLUS
5839 && REG_P (XEXP (x, 0))
5840 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5841 && GET_CODE (XEXP (x, 1)) == CONST)
5842 {
5843 HOST_WIDE_INT offset = 0;
5844
5845 /* The const operand. */
5846 y = XEXP (XEXP (x, 1), 0);
5847
5848 if (GET_CODE (y) == PLUS
5849 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5850 {
5851 offset = INTVAL (XEXP (y, 1));
5852 y = XEXP (y, 0);
5853 }
5854
5855 if (GET_CODE (y) == UNSPEC
5856 && (XINT (y, 1) == UNSPEC_GOTOFF
5857 || XINT (y, 1) == UNSPEC_PLTOFF))
5858 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5859 }
5860
5861 if (GET_CODE (x) != MEM)
5862 return orig_x;
5863
5864 x = XEXP (x, 0);
5865 if (GET_CODE (x) == PLUS
5866 && GET_CODE (XEXP (x, 1)) == CONST
5867 && GET_CODE (XEXP (x, 0)) == REG
5868 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5869 {
5870 y = XEXP (XEXP (x, 1), 0);
5871 if (GET_CODE (y) == UNSPEC
5872 && XINT (y, 1) == UNSPEC_GOT)
5873 y = XVECEXP (y, 0, 0);
5874 else
5875 return orig_x;
5876 }
5877 else if (GET_CODE (x) == CONST)
5878 {
5879 /* Extract the symbol ref from:
5880 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5881 UNSPEC_PLT/GOTENT))) */
5882
5883 y = XEXP (x, 0);
5884 if (GET_CODE (y) == UNSPEC
5885 && (XINT (y, 1) == UNSPEC_GOTENT
5886 || XINT (y, 1) == UNSPEC_PLT))
5887 y = XVECEXP (y, 0, 0);
5888 else
5889 return orig_x;
5890 }
5891 else
5892 return orig_x;
5893
5894 if (GET_MODE (orig_x) != Pmode)
5895 {
5896 if (GET_MODE (orig_x) == BLKmode)
5897 return orig_x;
5898 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5899 if (y == NULL_RTX)
5900 return orig_x;
5901 }
5902 return y;
5903 }
5904
5905 /* Output operand OP to stdio stream FILE.
5906 OP is an address (register + offset) which is not used to address data;
5907 instead the rightmost bits are interpreted as the value. */
5908
5909 static void
5910 print_shift_count_operand (FILE *file, rtx op)
5911 {
5912 HOST_WIDE_INT offset;
5913 rtx base;
5914
5915 /* Extract base register and offset. */
5916 if (!s390_decompose_shift_count (op, &base, &offset))
5917 gcc_unreachable ();
5918
5919 /* Sanity check. */
5920 if (base)
5921 {
5922 gcc_assert (GET_CODE (base) == REG);
5923 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5924 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5925 }
5926
5927 /* Offsets are constricted to twelve bits. */
5928 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5929 if (base)
5930 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5931 }
5932
5933 /* Assigns the number of NOP halfwords to be emitted before and after the
5934 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
5935 If hotpatching is disabled for the function, the values are set to zero.
5936 */
5937
5938 static void
5939 s390_function_num_hotpatch_hw (tree decl,
5940 int *hw_before,
5941 int *hw_after)
5942 {
5943 tree attr;
5944
5945 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
5946
5947 /* Handle the arguments of the hotpatch attribute. The values
5948 specified via attribute might override the cmdline argument
5949 values. */
5950 if (attr)
5951 {
5952 tree args = TREE_VALUE (attr);
5953
5954 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
5955 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
5956 }
5957 else
5958 {
5959 /* Use the values specified by the cmdline arguments. */
5960 *hw_before = s390_hotpatch_hw_before_label;
5961 *hw_after = s390_hotpatch_hw_after_label;
5962 }
5963 }
5964
5965 /* Write the extra assembler code needed to declare a function properly. */
5966
5967 void
5968 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
5969 tree decl)
5970 {
5971 int hw_before, hw_after;
5972
5973 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
5974 if (hw_before > 0)
5975 {
5976 unsigned int function_alignment;
5977 int i;
5978
5979 /* Add a trampoline code area before the function label and initialize it
5980 with two-byte nop instructions. This area can be overwritten with code
5981 that jumps to a patched version of the function. */
5982 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
5983 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
5984 hw_before);
5985 for (i = 1; i < hw_before; i++)
5986 fputs ("\tnopr\t%r7\n", asm_out_file);
5987
5988 /* Note: The function label must be aligned so that (a) the bytes of the
5989 following nop do not cross a cacheline boundary, and (b) a jump address
5990 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
5991 stored directly before the label without crossing a cacheline
5992 boundary. All this is necessary to make sure the trampoline code can
5993 be changed atomically.
5994 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
5995 if there are NOPs before the function label, the alignment is placed
5996 before them. So it is necessary to duplicate the alignment after the
5997 NOPs. */
5998 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
5999 if (! DECL_USER_ALIGN (decl))
6000 function_alignment = MAX (function_alignment,
6001 (unsigned int) align_functions);
6002 fputs ("\t# alignment for hotpatch\n", asm_out_file);
6003 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6004 }
6005
6006 ASM_OUTPUT_LABEL (asm_out_file, fname);
6007 if (hw_after > 0)
6008 asm_fprintf (asm_out_file,
6009 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6010 hw_after);
6011 }
6012
6013 /* Output machine-dependent UNSPECs occurring in address constant X
6014 in assembler syntax to stdio stream FILE. Returns true if the
6015 constant X could be recognized, false otherwise. */
6016
6017 static bool
6018 s390_output_addr_const_extra (FILE *file, rtx x)
6019 {
6020 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6021 switch (XINT (x, 1))
6022 {
6023 case UNSPEC_GOTENT:
6024 output_addr_const (file, XVECEXP (x, 0, 0));
6025 fprintf (file, "@GOTENT");
6026 return true;
6027 case UNSPEC_GOT:
6028 output_addr_const (file, XVECEXP (x, 0, 0));
6029 fprintf (file, "@GOT");
6030 return true;
6031 case UNSPEC_GOTOFF:
6032 output_addr_const (file, XVECEXP (x, 0, 0));
6033 fprintf (file, "@GOTOFF");
6034 return true;
6035 case UNSPEC_PLT:
6036 output_addr_const (file, XVECEXP (x, 0, 0));
6037 fprintf (file, "@PLT");
6038 return true;
6039 case UNSPEC_PLTOFF:
6040 output_addr_const (file, XVECEXP (x, 0, 0));
6041 fprintf (file, "@PLTOFF");
6042 return true;
6043 case UNSPEC_TLSGD:
6044 output_addr_const (file, XVECEXP (x, 0, 0));
6045 fprintf (file, "@TLSGD");
6046 return true;
6047 case UNSPEC_TLSLDM:
6048 assemble_name (file, get_some_local_dynamic_name ());
6049 fprintf (file, "@TLSLDM");
6050 return true;
6051 case UNSPEC_DTPOFF:
6052 output_addr_const (file, XVECEXP (x, 0, 0));
6053 fprintf (file, "@DTPOFF");
6054 return true;
6055 case UNSPEC_NTPOFF:
6056 output_addr_const (file, XVECEXP (x, 0, 0));
6057 fprintf (file, "@NTPOFF");
6058 return true;
6059 case UNSPEC_GOTNTPOFF:
6060 output_addr_const (file, XVECEXP (x, 0, 0));
6061 fprintf (file, "@GOTNTPOFF");
6062 return true;
6063 case UNSPEC_INDNTPOFF:
6064 output_addr_const (file, XVECEXP (x, 0, 0));
6065 fprintf (file, "@INDNTPOFF");
6066 return true;
6067 }
6068
6069 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6070 switch (XINT (x, 1))
6071 {
6072 case UNSPEC_POOL_OFFSET:
6073 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6074 output_addr_const (file, x);
6075 return true;
6076 }
6077 return false;
6078 }
6079
6080 /* Output address operand ADDR in assembler syntax to
6081 stdio stream FILE. */
6082
6083 void
6084 print_operand_address (FILE *file, rtx addr)
6085 {
6086 struct s390_address ad;
6087
6088 if (s390_loadrelative_operand_p (addr, NULL, NULL))
6089 {
6090 if (!TARGET_Z10)
6091 {
6092 output_operand_lossage ("symbolic memory references are "
6093 "only supported on z10 or later");
6094 return;
6095 }
6096 output_addr_const (file, addr);
6097 return;
6098 }
6099
6100 if (!s390_decompose_address (addr, &ad)
6101 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6102 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
6103 output_operand_lossage ("cannot decompose address");
6104
6105 if (ad.disp)
6106 output_addr_const (file, ad.disp);
6107 else
6108 fprintf (file, "0");
6109
6110 if (ad.base && ad.indx)
6111 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
6112 reg_names[REGNO (ad.base)]);
6113 else if (ad.base)
6114 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6115 }
6116
6117 /* Output operand X in assembler syntax to stdio stream FILE.
6118 CODE specified the format flag. The following format flags
6119 are recognized:
6120
6121 'C': print opcode suffix for branch condition.
6122 'D': print opcode suffix for inverse branch condition.
6123 'E': print opcode suffix for branch on index instruction.
6124 'G': print the size of the operand in bytes.
6125 'J': print tls_load/tls_gdcall/tls_ldcall suffix
6126 'M': print the second word of a TImode operand.
6127 'N': print the second word of a DImode operand.
6128 'O': print only the displacement of a memory reference or address.
6129 'R': print only the base register of a memory reference or address.
6130 'S': print S-type memory reference (base+displacement).
6131 'Y': print shift count operand.
6132
6133 'b': print integer X as if it's an unsigned byte.
6134 'c': print integer X as if it's an signed byte.
6135 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
6136 'f': "end" contiguous bitmask X in SImode.
6137 'h': print integer X as if it's a signed halfword.
6138 'i': print the first nonzero HImode part of X.
6139 'j': print the first HImode part unequal to -1 of X.
6140 'k': print the first nonzero SImode part of X.
6141 'm': print the first SImode part unequal to -1 of X.
6142 'o': print integer X as if it's an unsigned 32bit word.
6143 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
6144 't': CONST_INT: "start" of contiguous bitmask X in SImode.
6145 CONST_VECTOR: Generate a bitmask for vgbm instruction.
6146 'x': print integer X as if it's an unsigned halfword.
6147 'v': print register number as vector register (v1 instead of f1).
6148 */
6149
6150 void
6151 print_operand (FILE *file, rtx x, int code)
6152 {
6153 HOST_WIDE_INT ival;
6154
6155 switch (code)
6156 {
6157 case 'C':
6158 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
6159 return;
6160
6161 case 'D':
6162 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
6163 return;
6164
6165 case 'E':
6166 if (GET_CODE (x) == LE)
6167 fprintf (file, "l");
6168 else if (GET_CODE (x) == GT)
6169 fprintf (file, "h");
6170 else
6171 output_operand_lossage ("invalid comparison operator "
6172 "for 'E' output modifier");
6173 return;
6174
6175 case 'J':
6176 if (GET_CODE (x) == SYMBOL_REF)
6177 {
6178 fprintf (file, "%s", ":tls_load:");
6179 output_addr_const (file, x);
6180 }
6181 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
6182 {
6183 fprintf (file, "%s", ":tls_gdcall:");
6184 output_addr_const (file, XVECEXP (x, 0, 0));
6185 }
6186 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
6187 {
6188 fprintf (file, "%s", ":tls_ldcall:");
6189 const char *name = get_some_local_dynamic_name ();
6190 gcc_assert (name);
6191 assemble_name (file, name);
6192 }
6193 else
6194 output_operand_lossage ("invalid reference for 'J' output modifier");
6195 return;
6196
6197 case 'G':
6198 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
6199 return;
6200
6201 case 'O':
6202 {
6203 struct s390_address ad;
6204 int ret;
6205
6206 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6207
6208 if (!ret
6209 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6210 || ad.indx)
6211 {
6212 output_operand_lossage ("invalid address for 'O' output modifier");
6213 return;
6214 }
6215
6216 if (ad.disp)
6217 output_addr_const (file, ad.disp);
6218 else
6219 fprintf (file, "0");
6220 }
6221 return;
6222
6223 case 'R':
6224 {
6225 struct s390_address ad;
6226 int ret;
6227
6228 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6229
6230 if (!ret
6231 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6232 || ad.indx)
6233 {
6234 output_operand_lossage ("invalid address for 'R' output modifier");
6235 return;
6236 }
6237
6238 if (ad.base)
6239 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
6240 else
6241 fprintf (file, "0");
6242 }
6243 return;
6244
6245 case 'S':
6246 {
6247 struct s390_address ad;
6248 int ret;
6249
6250 if (!MEM_P (x))
6251 {
6252 output_operand_lossage ("memory reference expected for "
6253 "'S' output modifier");
6254 return;
6255 }
6256 ret = s390_decompose_address (XEXP (x, 0), &ad);
6257
6258 if (!ret
6259 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6260 || ad.indx)
6261 {
6262 output_operand_lossage ("invalid address for 'S' output modifier");
6263 return;
6264 }
6265
6266 if (ad.disp)
6267 output_addr_const (file, ad.disp);
6268 else
6269 fprintf (file, "0");
6270
6271 if (ad.base)
6272 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6273 }
6274 return;
6275
6276 case 'N':
6277 if (GET_CODE (x) == REG)
6278 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6279 else if (GET_CODE (x) == MEM)
6280 x = change_address (x, VOIDmode,
6281 plus_constant (Pmode, XEXP (x, 0), 4));
6282 else
6283 output_operand_lossage ("register or memory expression expected "
6284 "for 'N' output modifier");
6285 break;
6286
6287 case 'M':
6288 if (GET_CODE (x) == REG)
6289 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6290 else if (GET_CODE (x) == MEM)
6291 x = change_address (x, VOIDmode,
6292 plus_constant (Pmode, XEXP (x, 0), 8));
6293 else
6294 output_operand_lossage ("register or memory expression expected "
6295 "for 'M' output modifier");
6296 break;
6297
6298 case 'Y':
6299 print_shift_count_operand (file, x);
6300 return;
6301 }
6302
6303 switch (GET_CODE (x))
6304 {
6305 case REG:
6306 /* Print FP regs as fx instead of vx when they are accessed
6307 through non-vector mode. */
6308 if (code == 'v'
6309 || VECTOR_NOFP_REG_P (x)
6310 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
6311 || (VECTOR_REG_P (x)
6312 && (GET_MODE_SIZE (GET_MODE (x)) /
6313 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
6314 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
6315 else
6316 fprintf (file, "%s", reg_names[REGNO (x)]);
6317 break;
6318
6319 case MEM:
6320 output_address (XEXP (x, 0));
6321 break;
6322
6323 case CONST:
6324 case CODE_LABEL:
6325 case LABEL_REF:
6326 case SYMBOL_REF:
6327 output_addr_const (file, x);
6328 break;
6329
6330 case CONST_INT:
6331 ival = INTVAL (x);
6332 switch (code)
6333 {
6334 case 0:
6335 break;
6336 case 'b':
6337 ival &= 0xff;
6338 break;
6339 case 'c':
6340 ival = ((ival & 0xff) ^ 0x80) - 0x80;
6341 break;
6342 case 'x':
6343 ival &= 0xffff;
6344 break;
6345 case 'h':
6346 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
6347 break;
6348 case 'i':
6349 ival = s390_extract_part (x, HImode, 0);
6350 break;
6351 case 'j':
6352 ival = s390_extract_part (x, HImode, -1);
6353 break;
6354 case 'k':
6355 ival = s390_extract_part (x, SImode, 0);
6356 break;
6357 case 'm':
6358 ival = s390_extract_part (x, SImode, -1);
6359 break;
6360 case 'o':
6361 ival &= 0xffffffff;
6362 break;
6363 case 'e': case 'f':
6364 case 's': case 't':
6365 {
6366 int pos, len;
6367 bool ok;
6368
6369 len = (code == 's' || code == 'e' ? 64 : 32);
6370 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
6371 gcc_assert (ok);
6372 if (code == 's' || code == 't')
6373 ival = 64 - pos - len;
6374 else
6375 ival = 64 - 1 - pos;
6376 }
6377 break;
6378 default:
6379 output_operand_lossage ("invalid constant for output modifier '%c'", code);
6380 }
6381 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
6382 break;
6383
6384 case CONST_DOUBLE:
6385 gcc_assert (GET_MODE (x) == VOIDmode);
6386 if (code == 'b')
6387 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
6388 else if (code == 'x')
6389 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
6390 else if (code == 'h')
6391 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
6392 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
6393 else
6394 {
6395 if (code == 0)
6396 output_operand_lossage ("invalid constant - try using "
6397 "an output modifier");
6398 else
6399 output_operand_lossage ("invalid constant for output modifier '%c'",
6400 code);
6401 }
6402 break;
6403 case CONST_VECTOR:
6404 switch (code)
6405 {
6406 case 'e':
6407 case 's':
6408 {
6409 int start, stop, inner_len;
6410 bool ok;
6411
6412 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
6413 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
6414 gcc_assert (ok);
6415 if (code == 's' || code == 't')
6416 ival = inner_len - stop - 1;
6417 else
6418 ival = inner_len - start - 1;
6419 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
6420 }
6421 break;
6422 case 't':
6423 {
6424 unsigned mask;
6425 bool ok = s390_bytemask_vector_p (x, &mask);
6426 gcc_assert (ok);
6427 fprintf (file, "%u", mask);
6428 }
6429 break;
6430
6431 default:
6432 output_operand_lossage ("invalid constant vector for output "
6433 "modifier '%c'", code);
6434 }
6435 break;
6436
6437 default:
6438 if (code == 0)
6439 output_operand_lossage ("invalid expression - try using "
6440 "an output modifier");
6441 else
6442 output_operand_lossage ("invalid expression for output "
6443 "modifier '%c'", code);
6444 break;
6445 }
6446 }
6447
6448 /* Target hook for assembling integer objects. We need to define it
6449 here to work a round a bug in some versions of GAS, which couldn't
6450 handle values smaller than INT_MIN when printed in decimal. */
6451
6452 static bool
6453 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
6454 {
6455 if (size == 8 && aligned_p
6456 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
6457 {
6458 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
6459 INTVAL (x));
6460 return true;
6461 }
6462 return default_assemble_integer (x, size, aligned_p);
6463 }
6464
6465 /* Returns true if register REGNO is used for forming
6466 a memory address in expression X. */
6467
6468 static bool
6469 reg_used_in_mem_p (int regno, rtx x)
6470 {
6471 enum rtx_code code = GET_CODE (x);
6472 int i, j;
6473 const char *fmt;
6474
6475 if (code == MEM)
6476 {
6477 if (refers_to_regno_p (regno, XEXP (x, 0)))
6478 return true;
6479 }
6480 else if (code == SET
6481 && GET_CODE (SET_DEST (x)) == PC)
6482 {
6483 if (refers_to_regno_p (regno, SET_SRC (x)))
6484 return true;
6485 }
6486
6487 fmt = GET_RTX_FORMAT (code);
6488 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6489 {
6490 if (fmt[i] == 'e'
6491 && reg_used_in_mem_p (regno, XEXP (x, i)))
6492 return true;
6493
6494 else if (fmt[i] == 'E')
6495 for (j = 0; j < XVECLEN (x, i); j++)
6496 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
6497 return true;
6498 }
6499 return false;
6500 }
6501
6502 /* Returns true if expression DEP_RTX sets an address register
6503 used by instruction INSN to address memory. */
6504
6505 static bool
6506 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
6507 {
6508 rtx target, pat;
6509
6510 if (NONJUMP_INSN_P (dep_rtx))
6511 dep_rtx = PATTERN (dep_rtx);
6512
6513 if (GET_CODE (dep_rtx) == SET)
6514 {
6515 target = SET_DEST (dep_rtx);
6516 if (GET_CODE (target) == STRICT_LOW_PART)
6517 target = XEXP (target, 0);
6518 while (GET_CODE (target) == SUBREG)
6519 target = SUBREG_REG (target);
6520
6521 if (GET_CODE (target) == REG)
6522 {
6523 int regno = REGNO (target);
6524
6525 if (s390_safe_attr_type (insn) == TYPE_LA)
6526 {
6527 pat = PATTERN (insn);
6528 if (GET_CODE (pat) == PARALLEL)
6529 {
6530 gcc_assert (XVECLEN (pat, 0) == 2);
6531 pat = XVECEXP (pat, 0, 0);
6532 }
6533 gcc_assert (GET_CODE (pat) == SET);
6534 return refers_to_regno_p (regno, SET_SRC (pat));
6535 }
6536 else if (get_attr_atype (insn) == ATYPE_AGEN)
6537 return reg_used_in_mem_p (regno, PATTERN (insn));
6538 }
6539 }
6540 return false;
6541 }
6542
6543 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
6544
6545 int
6546 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
6547 {
6548 rtx dep_rtx = PATTERN (dep_insn);
6549 int i;
6550
6551 if (GET_CODE (dep_rtx) == SET
6552 && addr_generation_dependency_p (dep_rtx, insn))
6553 return 1;
6554 else if (GET_CODE (dep_rtx) == PARALLEL)
6555 {
6556 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
6557 {
6558 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
6559 return 1;
6560 }
6561 }
6562 return 0;
6563 }
6564
6565
6566 /* A C statement (sans semicolon) to update the integer scheduling priority
6567 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
6568 reduce the priority to execute INSN later. Do not define this macro if
6569 you do not need to adjust the scheduling priorities of insns.
6570
6571 A STD instruction should be scheduled earlier,
6572 in order to use the bypass. */
6573 static int
6574 s390_adjust_priority (rtx_insn *insn, int priority)
6575 {
6576 if (! INSN_P (insn))
6577 return priority;
6578
6579 if (s390_tune != PROCESSOR_2084_Z990
6580 && s390_tune != PROCESSOR_2094_Z9_109
6581 && s390_tune != PROCESSOR_2097_Z10
6582 && s390_tune != PROCESSOR_2817_Z196
6583 && s390_tune != PROCESSOR_2827_ZEC12
6584 && s390_tune != PROCESSOR_2964_Z13)
6585 return priority;
6586
6587 switch (s390_safe_attr_type (insn))
6588 {
6589 case TYPE_FSTOREDF:
6590 case TYPE_FSTORESF:
6591 priority = priority << 3;
6592 break;
6593 case TYPE_STORE:
6594 case TYPE_STM:
6595 priority = priority << 1;
6596 break;
6597 default:
6598 break;
6599 }
6600 return priority;
6601 }
6602
6603
6604 /* The number of instructions that can be issued per cycle. */
6605
6606 static int
6607 s390_issue_rate (void)
6608 {
6609 switch (s390_tune)
6610 {
6611 case PROCESSOR_2084_Z990:
6612 case PROCESSOR_2094_Z9_109:
6613 case PROCESSOR_2817_Z196:
6614 return 3;
6615 case PROCESSOR_2097_Z10:
6616 return 2;
6617 /* Starting with EC12 we use the sched_reorder hook to take care
6618 of instruction dispatch constraints. The algorithm only
6619 picks the best instruction and assumes only a single
6620 instruction gets issued per cycle. */
6621 case PROCESSOR_2827_ZEC12:
6622 default:
6623 return 1;
6624 }
6625 }
6626
6627 static int
6628 s390_first_cycle_multipass_dfa_lookahead (void)
6629 {
6630 return 4;
6631 }
6632
6633 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
6634 Fix up MEMs as required. */
6635
6636 static void
6637 annotate_constant_pool_refs (rtx *x)
6638 {
6639 int i, j;
6640 const char *fmt;
6641
6642 gcc_assert (GET_CODE (*x) != SYMBOL_REF
6643 || !CONSTANT_POOL_ADDRESS_P (*x));
6644
6645 /* Literal pool references can only occur inside a MEM ... */
6646 if (GET_CODE (*x) == MEM)
6647 {
6648 rtx memref = XEXP (*x, 0);
6649
6650 if (GET_CODE (memref) == SYMBOL_REF
6651 && CONSTANT_POOL_ADDRESS_P (memref))
6652 {
6653 rtx base = cfun->machine->base_reg;
6654 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
6655 UNSPEC_LTREF);
6656
6657 *x = replace_equiv_address (*x, addr);
6658 return;
6659 }
6660
6661 if (GET_CODE (memref) == CONST
6662 && GET_CODE (XEXP (memref, 0)) == PLUS
6663 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
6664 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
6665 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
6666 {
6667 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
6668 rtx sym = XEXP (XEXP (memref, 0), 0);
6669 rtx base = cfun->machine->base_reg;
6670 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
6671 UNSPEC_LTREF);
6672
6673 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
6674 return;
6675 }
6676 }
6677
6678 /* ... or a load-address type pattern. */
6679 if (GET_CODE (*x) == SET)
6680 {
6681 rtx addrref = SET_SRC (*x);
6682
6683 if (GET_CODE (addrref) == SYMBOL_REF
6684 && CONSTANT_POOL_ADDRESS_P (addrref))
6685 {
6686 rtx base = cfun->machine->base_reg;
6687 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
6688 UNSPEC_LTREF);
6689
6690 SET_SRC (*x) = addr;
6691 return;
6692 }
6693
6694 if (GET_CODE (addrref) == CONST
6695 && GET_CODE (XEXP (addrref, 0)) == PLUS
6696 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
6697 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
6698 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
6699 {
6700 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
6701 rtx sym = XEXP (XEXP (addrref, 0), 0);
6702 rtx base = cfun->machine->base_reg;
6703 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
6704 UNSPEC_LTREF);
6705
6706 SET_SRC (*x) = plus_constant (Pmode, addr, off);
6707 return;
6708 }
6709 }
6710
6711 /* Annotate LTREL_BASE as well. */
6712 if (GET_CODE (*x) == UNSPEC
6713 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6714 {
6715 rtx base = cfun->machine->base_reg;
6716 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
6717 UNSPEC_LTREL_BASE);
6718 return;
6719 }
6720
6721 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6722 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6723 {
6724 if (fmt[i] == 'e')
6725 {
6726 annotate_constant_pool_refs (&XEXP (*x, i));
6727 }
6728 else if (fmt[i] == 'E')
6729 {
6730 for (j = 0; j < XVECLEN (*x, i); j++)
6731 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
6732 }
6733 }
6734 }
6735
6736 /* Split all branches that exceed the maximum distance.
6737 Returns true if this created a new literal pool entry. */
6738
6739 static int
6740 s390_split_branches (void)
6741 {
6742 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
6743 int new_literal = 0, ret;
6744 rtx_insn *insn;
6745 rtx pat, target;
6746 rtx *label;
6747
6748 /* We need correct insn addresses. */
6749
6750 shorten_branches (get_insns ());
6751
6752 /* Find all branches that exceed 64KB, and split them. */
6753
6754 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6755 {
6756 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
6757 continue;
6758
6759 pat = PATTERN (insn);
6760 if (GET_CODE (pat) == PARALLEL)
6761 pat = XVECEXP (pat, 0, 0);
6762 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
6763 continue;
6764
6765 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
6766 {
6767 label = &SET_SRC (pat);
6768 }
6769 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
6770 {
6771 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6772 label = &XEXP (SET_SRC (pat), 1);
6773 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6774 label = &XEXP (SET_SRC (pat), 2);
6775 else
6776 continue;
6777 }
6778 else
6779 continue;
6780
6781 if (get_attr_length (insn) <= 4)
6782 continue;
6783
6784 /* We are going to use the return register as scratch register,
6785 make sure it will be saved/restored by the prologue/epilogue. */
6786 cfun_frame_layout.save_return_addr_p = 1;
6787
6788 if (!flag_pic)
6789 {
6790 new_literal = 1;
6791 rtx mem = force_const_mem (Pmode, *label);
6792 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
6793 insn);
6794 INSN_ADDRESSES_NEW (set_insn, -1);
6795 annotate_constant_pool_refs (&PATTERN (set_insn));
6796
6797 target = temp_reg;
6798 }
6799 else
6800 {
6801 new_literal = 1;
6802 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6803 UNSPEC_LTREL_OFFSET);
6804 target = gen_rtx_CONST (Pmode, target);
6805 target = force_const_mem (Pmode, target);
6806 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
6807 insn);
6808 INSN_ADDRESSES_NEW (set_insn, -1);
6809 annotate_constant_pool_refs (&PATTERN (set_insn));
6810
6811 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6812 cfun->machine->base_reg),
6813 UNSPEC_LTREL_BASE);
6814 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6815 }
6816
6817 ret = validate_change (insn, label, target, 0);
6818 gcc_assert (ret);
6819 }
6820
6821 return new_literal;
6822 }
6823
6824
6825 /* Find an annotated literal pool symbol referenced in RTX X,
6826 and store it at REF. Will abort if X contains references to
6827 more than one such pool symbol; multiple references to the same
6828 symbol are allowed, however.
6829
6830 The rtx pointed to by REF must be initialized to NULL_RTX
6831 by the caller before calling this routine. */
6832
6833 static void
6834 find_constant_pool_ref (rtx x, rtx *ref)
6835 {
6836 int i, j;
6837 const char *fmt;
6838
6839 /* Ignore LTREL_BASE references. */
6840 if (GET_CODE (x) == UNSPEC
6841 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6842 return;
6843 /* Likewise POOL_ENTRY insns. */
6844 if (GET_CODE (x) == UNSPEC_VOLATILE
6845 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6846 return;
6847
6848 gcc_assert (GET_CODE (x) != SYMBOL_REF
6849 || !CONSTANT_POOL_ADDRESS_P (x));
6850
6851 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6852 {
6853 rtx sym = XVECEXP (x, 0, 0);
6854 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6855 && CONSTANT_POOL_ADDRESS_P (sym));
6856
6857 if (*ref == NULL_RTX)
6858 *ref = sym;
6859 else
6860 gcc_assert (*ref == sym);
6861
6862 return;
6863 }
6864
6865 fmt = GET_RTX_FORMAT (GET_CODE (x));
6866 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6867 {
6868 if (fmt[i] == 'e')
6869 {
6870 find_constant_pool_ref (XEXP (x, i), ref);
6871 }
6872 else if (fmt[i] == 'E')
6873 {
6874 for (j = 0; j < XVECLEN (x, i); j++)
6875 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6876 }
6877 }
6878 }
6879
6880 /* Replace every reference to the annotated literal pool
6881 symbol REF in X by its base plus OFFSET. */
6882
6883 static void
6884 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6885 {
6886 int i, j;
6887 const char *fmt;
6888
6889 gcc_assert (*x != ref);
6890
6891 if (GET_CODE (*x) == UNSPEC
6892 && XINT (*x, 1) == UNSPEC_LTREF
6893 && XVECEXP (*x, 0, 0) == ref)
6894 {
6895 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6896 return;
6897 }
6898
6899 if (GET_CODE (*x) == PLUS
6900 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6901 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6902 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6903 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6904 {
6905 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6906 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6907 return;
6908 }
6909
6910 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6911 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6912 {
6913 if (fmt[i] == 'e')
6914 {
6915 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6916 }
6917 else if (fmt[i] == 'E')
6918 {
6919 for (j = 0; j < XVECLEN (*x, i); j++)
6920 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6921 }
6922 }
6923 }
6924
6925 /* Check whether X contains an UNSPEC_LTREL_BASE.
6926 Return its constant pool symbol if found, NULL_RTX otherwise. */
6927
6928 static rtx
6929 find_ltrel_base (rtx x)
6930 {
6931 int i, j;
6932 const char *fmt;
6933
6934 if (GET_CODE (x) == UNSPEC
6935 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6936 return XVECEXP (x, 0, 0);
6937
6938 fmt = GET_RTX_FORMAT (GET_CODE (x));
6939 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6940 {
6941 if (fmt[i] == 'e')
6942 {
6943 rtx fnd = find_ltrel_base (XEXP (x, i));
6944 if (fnd)
6945 return fnd;
6946 }
6947 else if (fmt[i] == 'E')
6948 {
6949 for (j = 0; j < XVECLEN (x, i); j++)
6950 {
6951 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6952 if (fnd)
6953 return fnd;
6954 }
6955 }
6956 }
6957
6958 return NULL_RTX;
6959 }
6960
6961 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6962
6963 static void
6964 replace_ltrel_base (rtx *x)
6965 {
6966 int i, j;
6967 const char *fmt;
6968
6969 if (GET_CODE (*x) == UNSPEC
6970 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6971 {
6972 *x = XVECEXP (*x, 0, 1);
6973 return;
6974 }
6975
6976 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6977 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6978 {
6979 if (fmt[i] == 'e')
6980 {
6981 replace_ltrel_base (&XEXP (*x, i));
6982 }
6983 else if (fmt[i] == 'E')
6984 {
6985 for (j = 0; j < XVECLEN (*x, i); j++)
6986 replace_ltrel_base (&XVECEXP (*x, i, j));
6987 }
6988 }
6989 }
6990
6991
6992 /* We keep a list of constants which we have to add to internal
6993 constant tables in the middle of large functions. */
6994
6995 #define NR_C_MODES 31
6996 machine_mode constant_modes[NR_C_MODES] =
6997 {
6998 TFmode, TImode, TDmode,
6999 V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode,
7000 DFmode, DImode, DDmode,
7001 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7002 SFmode, SImode, SDmode,
7003 V4QImode, V2HImode, V1SImode, V1SFmode,
7004 HImode,
7005 V2QImode, V1HImode,
7006 QImode,
7007 V1QImode
7008 };
7009
7010 struct constant
7011 {
7012 struct constant *next;
7013 rtx value;
7014 rtx_code_label *label;
7015 };
7016
7017 struct constant_pool
7018 {
7019 struct constant_pool *next;
7020 rtx_insn *first_insn;
7021 rtx_insn *pool_insn;
7022 bitmap insns;
7023 rtx_insn *emit_pool_after;
7024
7025 struct constant *constants[NR_C_MODES];
7026 struct constant *execute;
7027 rtx_code_label *label;
7028 int size;
7029 };
7030
7031 /* Allocate new constant_pool structure. */
7032
7033 static struct constant_pool *
7034 s390_alloc_pool (void)
7035 {
7036 struct constant_pool *pool;
7037 int i;
7038
7039 pool = (struct constant_pool *) xmalloc (sizeof *pool);
7040 pool->next = NULL;
7041 for (i = 0; i < NR_C_MODES; i++)
7042 pool->constants[i] = NULL;
7043
7044 pool->execute = NULL;
7045 pool->label = gen_label_rtx ();
7046 pool->first_insn = NULL;
7047 pool->pool_insn = NULL;
7048 pool->insns = BITMAP_ALLOC (NULL);
7049 pool->size = 0;
7050 pool->emit_pool_after = NULL;
7051
7052 return pool;
7053 }
7054
7055 /* Create new constant pool covering instructions starting at INSN
7056 and chain it to the end of POOL_LIST. */
7057
7058 static struct constant_pool *
7059 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7060 {
7061 struct constant_pool *pool, **prev;
7062
7063 pool = s390_alloc_pool ();
7064 pool->first_insn = insn;
7065
7066 for (prev = pool_list; *prev; prev = &(*prev)->next)
7067 ;
7068 *prev = pool;
7069
7070 return pool;
7071 }
7072
7073 /* End range of instructions covered by POOL at INSN and emit
7074 placeholder insn representing the pool. */
7075
7076 static void
7077 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7078 {
7079 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
7080
7081 if (!insn)
7082 insn = get_last_insn ();
7083
7084 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
7085 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7086 }
7087
7088 /* Add INSN to the list of insns covered by POOL. */
7089
7090 static void
7091 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
7092 {
7093 bitmap_set_bit (pool->insns, INSN_UID (insn));
7094 }
7095
7096 /* Return pool out of POOL_LIST that covers INSN. */
7097
7098 static struct constant_pool *
7099 s390_find_pool (struct constant_pool *pool_list, rtx insn)
7100 {
7101 struct constant_pool *pool;
7102
7103 for (pool = pool_list; pool; pool = pool->next)
7104 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
7105 break;
7106
7107 return pool;
7108 }
7109
7110 /* Add constant VAL of mode MODE to the constant pool POOL. */
7111
7112 static void
7113 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
7114 {
7115 struct constant *c;
7116 int i;
7117
7118 for (i = 0; i < NR_C_MODES; i++)
7119 if (constant_modes[i] == mode)
7120 break;
7121 gcc_assert (i != NR_C_MODES);
7122
7123 for (c = pool->constants[i]; c != NULL; c = c->next)
7124 if (rtx_equal_p (val, c->value))
7125 break;
7126
7127 if (c == NULL)
7128 {
7129 c = (struct constant *) xmalloc (sizeof *c);
7130 c->value = val;
7131 c->label = gen_label_rtx ();
7132 c->next = pool->constants[i];
7133 pool->constants[i] = c;
7134 pool->size += GET_MODE_SIZE (mode);
7135 }
7136 }
7137
7138 /* Return an rtx that represents the offset of X from the start of
7139 pool POOL. */
7140
7141 static rtx
7142 s390_pool_offset (struct constant_pool *pool, rtx x)
7143 {
7144 rtx label;
7145
7146 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
7147 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
7148 UNSPEC_POOL_OFFSET);
7149 return gen_rtx_CONST (GET_MODE (x), x);
7150 }
7151
7152 /* Find constant VAL of mode MODE in the constant pool POOL.
7153 Return an RTX describing the distance from the start of
7154 the pool to the location of the new constant. */
7155
7156 static rtx
7157 s390_find_constant (struct constant_pool *pool, rtx val,
7158 machine_mode mode)
7159 {
7160 struct constant *c;
7161 int i;
7162
7163 for (i = 0; i < NR_C_MODES; i++)
7164 if (constant_modes[i] == mode)
7165 break;
7166 gcc_assert (i != NR_C_MODES);
7167
7168 for (c = pool->constants[i]; c != NULL; c = c->next)
7169 if (rtx_equal_p (val, c->value))
7170 break;
7171
7172 gcc_assert (c);
7173
7174 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7175 }
7176
7177 /* Check whether INSN is an execute. Return the label_ref to its
7178 execute target template if so, NULL_RTX otherwise. */
7179
7180 static rtx
7181 s390_execute_label (rtx insn)
7182 {
7183 if (NONJUMP_INSN_P (insn)
7184 && GET_CODE (PATTERN (insn)) == PARALLEL
7185 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
7186 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
7187 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
7188
7189 return NULL_RTX;
7190 }
7191
7192 /* Add execute target for INSN to the constant pool POOL. */
7193
7194 static void
7195 s390_add_execute (struct constant_pool *pool, rtx insn)
7196 {
7197 struct constant *c;
7198
7199 for (c = pool->execute; c != NULL; c = c->next)
7200 if (INSN_UID (insn) == INSN_UID (c->value))
7201 break;
7202
7203 if (c == NULL)
7204 {
7205 c = (struct constant *) xmalloc (sizeof *c);
7206 c->value = insn;
7207 c->label = gen_label_rtx ();
7208 c->next = pool->execute;
7209 pool->execute = c;
7210 pool->size += 6;
7211 }
7212 }
7213
7214 /* Find execute target for INSN in the constant pool POOL.
7215 Return an RTX describing the distance from the start of
7216 the pool to the location of the execute target. */
7217
7218 static rtx
7219 s390_find_execute (struct constant_pool *pool, rtx insn)
7220 {
7221 struct constant *c;
7222
7223 for (c = pool->execute; c != NULL; c = c->next)
7224 if (INSN_UID (insn) == INSN_UID (c->value))
7225 break;
7226
7227 gcc_assert (c);
7228
7229 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7230 }
7231
7232 /* For an execute INSN, extract the execute target template. */
7233
7234 static rtx
7235 s390_execute_target (rtx insn)
7236 {
7237 rtx pattern = PATTERN (insn);
7238 gcc_assert (s390_execute_label (insn));
7239
7240 if (XVECLEN (pattern, 0) == 2)
7241 {
7242 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
7243 }
7244 else
7245 {
7246 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
7247 int i;
7248
7249 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
7250 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
7251
7252 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
7253 }
7254
7255 return pattern;
7256 }
7257
7258 /* Indicate that INSN cannot be duplicated. This is the case for
7259 execute insns that carry a unique label. */
7260
7261 static bool
7262 s390_cannot_copy_insn_p (rtx_insn *insn)
7263 {
7264 rtx label = s390_execute_label (insn);
7265 return label && label != const0_rtx;
7266 }
7267
7268 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
7269 do not emit the pool base label. */
7270
7271 static void
7272 s390_dump_pool (struct constant_pool *pool, bool remote_label)
7273 {
7274 struct constant *c;
7275 rtx_insn *insn = pool->pool_insn;
7276 int i;
7277
7278 /* Switch to rodata section. */
7279 if (TARGET_CPU_ZARCH)
7280 {
7281 insn = emit_insn_after (gen_pool_section_start (), insn);
7282 INSN_ADDRESSES_NEW (insn, -1);
7283 }
7284
7285 /* Ensure minimum pool alignment. */
7286 if (TARGET_CPU_ZARCH)
7287 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
7288 else
7289 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
7290 INSN_ADDRESSES_NEW (insn, -1);
7291
7292 /* Emit pool base label. */
7293 if (!remote_label)
7294 {
7295 insn = emit_label_after (pool->label, insn);
7296 INSN_ADDRESSES_NEW (insn, -1);
7297 }
7298
7299 /* Dump constants in descending alignment requirement order,
7300 ensuring proper alignment for every constant. */
7301 for (i = 0; i < NR_C_MODES; i++)
7302 for (c = pool->constants[i]; c; c = c->next)
7303 {
7304 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
7305 rtx value = copy_rtx (c->value);
7306 if (GET_CODE (value) == CONST
7307 && GET_CODE (XEXP (value, 0)) == UNSPEC
7308 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
7309 && XVECLEN (XEXP (value, 0), 0) == 1)
7310 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
7311
7312 insn = emit_label_after (c->label, insn);
7313 INSN_ADDRESSES_NEW (insn, -1);
7314
7315 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
7316 gen_rtvec (1, value),
7317 UNSPECV_POOL_ENTRY);
7318 insn = emit_insn_after (value, insn);
7319 INSN_ADDRESSES_NEW (insn, -1);
7320 }
7321
7322 /* Ensure minimum alignment for instructions. */
7323 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
7324 INSN_ADDRESSES_NEW (insn, -1);
7325
7326 /* Output in-pool execute template insns. */
7327 for (c = pool->execute; c; c = c->next)
7328 {
7329 insn = emit_label_after (c->label, insn);
7330 INSN_ADDRESSES_NEW (insn, -1);
7331
7332 insn = emit_insn_after (s390_execute_target (c->value), insn);
7333 INSN_ADDRESSES_NEW (insn, -1);
7334 }
7335
7336 /* Switch back to previous section. */
7337 if (TARGET_CPU_ZARCH)
7338 {
7339 insn = emit_insn_after (gen_pool_section_end (), insn);
7340 INSN_ADDRESSES_NEW (insn, -1);
7341 }
7342
7343 insn = emit_barrier_after (insn);
7344 INSN_ADDRESSES_NEW (insn, -1);
7345
7346 /* Remove placeholder insn. */
7347 remove_insn (pool->pool_insn);
7348 }
7349
7350 /* Free all memory used by POOL. */
7351
7352 static void
7353 s390_free_pool (struct constant_pool *pool)
7354 {
7355 struct constant *c, *next;
7356 int i;
7357
7358 for (i = 0; i < NR_C_MODES; i++)
7359 for (c = pool->constants[i]; c; c = next)
7360 {
7361 next = c->next;
7362 free (c);
7363 }
7364
7365 for (c = pool->execute; c; c = next)
7366 {
7367 next = c->next;
7368 free (c);
7369 }
7370
7371 BITMAP_FREE (pool->insns);
7372 free (pool);
7373 }
7374
7375
7376 /* Collect main literal pool. Return NULL on overflow. */
7377
7378 static struct constant_pool *
7379 s390_mainpool_start (void)
7380 {
7381 struct constant_pool *pool;
7382 rtx_insn *insn;
7383
7384 pool = s390_alloc_pool ();
7385
7386 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7387 {
7388 if (NONJUMP_INSN_P (insn)
7389 && GET_CODE (PATTERN (insn)) == SET
7390 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
7391 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
7392 {
7393 /* There might be two main_pool instructions if base_reg
7394 is call-clobbered; one for shrink-wrapped code and one
7395 for the rest. We want to keep the first. */
7396 if (pool->pool_insn)
7397 {
7398 insn = PREV_INSN (insn);
7399 delete_insn (NEXT_INSN (insn));
7400 continue;
7401 }
7402 pool->pool_insn = insn;
7403 }
7404
7405 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
7406 {
7407 s390_add_execute (pool, insn);
7408 }
7409 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7410 {
7411 rtx pool_ref = NULL_RTX;
7412 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7413 if (pool_ref)
7414 {
7415 rtx constant = get_pool_constant (pool_ref);
7416 machine_mode mode = get_pool_mode (pool_ref);
7417 s390_add_constant (pool, constant, mode);
7418 }
7419 }
7420
7421 /* If hot/cold partitioning is enabled we have to make sure that
7422 the literal pool is emitted in the same section where the
7423 initialization of the literal pool base pointer takes place.
7424 emit_pool_after is only used in the non-overflow case on non
7425 Z cpus where we can emit the literal pool at the end of the
7426 function body within the text section. */
7427 if (NOTE_P (insn)
7428 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
7429 && !pool->emit_pool_after)
7430 pool->emit_pool_after = PREV_INSN (insn);
7431 }
7432
7433 gcc_assert (pool->pool_insn || pool->size == 0);
7434
7435 if (pool->size >= 4096)
7436 {
7437 /* We're going to chunkify the pool, so remove the main
7438 pool placeholder insn. */
7439 remove_insn (pool->pool_insn);
7440
7441 s390_free_pool (pool);
7442 pool = NULL;
7443 }
7444
7445 /* If the functions ends with the section where the literal pool
7446 should be emitted set the marker to its end. */
7447 if (pool && !pool->emit_pool_after)
7448 pool->emit_pool_after = get_last_insn ();
7449
7450 return pool;
7451 }
7452
7453 /* POOL holds the main literal pool as collected by s390_mainpool_start.
7454 Modify the current function to output the pool constants as well as
7455 the pool register setup instruction. */
7456
7457 static void
7458 s390_mainpool_finish (struct constant_pool *pool)
7459 {
7460 rtx base_reg = cfun->machine->base_reg;
7461
7462 /* If the pool is empty, we're done. */
7463 if (pool->size == 0)
7464 {
7465 /* We don't actually need a base register after all. */
7466 cfun->machine->base_reg = NULL_RTX;
7467
7468 if (pool->pool_insn)
7469 remove_insn (pool->pool_insn);
7470 s390_free_pool (pool);
7471 return;
7472 }
7473
7474 /* We need correct insn addresses. */
7475 shorten_branches (get_insns ());
7476
7477 /* On zSeries, we use a LARL to load the pool register. The pool is
7478 located in the .rodata section, so we emit it after the function. */
7479 if (TARGET_CPU_ZARCH)
7480 {
7481 rtx set = gen_main_base_64 (base_reg, pool->label);
7482 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
7483 INSN_ADDRESSES_NEW (insn, -1);
7484 remove_insn (pool->pool_insn);
7485
7486 insn = get_last_insn ();
7487 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
7488 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7489
7490 s390_dump_pool (pool, 0);
7491 }
7492
7493 /* On S/390, if the total size of the function's code plus literal pool
7494 does not exceed 4096 bytes, we use BASR to set up a function base
7495 pointer, and emit the literal pool at the end of the function. */
7496 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
7497 + pool->size + 8 /* alignment slop */ < 4096)
7498 {
7499 rtx set = gen_main_base_31_small (base_reg, pool->label);
7500 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
7501 INSN_ADDRESSES_NEW (insn, -1);
7502 remove_insn (pool->pool_insn);
7503
7504 insn = emit_label_after (pool->label, insn);
7505 INSN_ADDRESSES_NEW (insn, -1);
7506
7507 /* emit_pool_after will be set by s390_mainpool_start to the
7508 last insn of the section where the literal pool should be
7509 emitted. */
7510 insn = pool->emit_pool_after;
7511
7512 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
7513 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7514
7515 s390_dump_pool (pool, 1);
7516 }
7517
7518 /* Otherwise, we emit an inline literal pool and use BASR to branch
7519 over it, setting up the pool register at the same time. */
7520 else
7521 {
7522 rtx_code_label *pool_end = gen_label_rtx ();
7523
7524 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
7525 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
7526 JUMP_LABEL (insn) = pool_end;
7527 INSN_ADDRESSES_NEW (insn, -1);
7528 remove_insn (pool->pool_insn);
7529
7530 insn = emit_label_after (pool->label, insn);
7531 INSN_ADDRESSES_NEW (insn, -1);
7532
7533 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
7534 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7535
7536 insn = emit_label_after (pool_end, pool->pool_insn);
7537 INSN_ADDRESSES_NEW (insn, -1);
7538
7539 s390_dump_pool (pool, 1);
7540 }
7541
7542
7543 /* Replace all literal pool references. */
7544
7545 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
7546 {
7547 if (INSN_P (insn))
7548 replace_ltrel_base (&PATTERN (insn));
7549
7550 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7551 {
7552 rtx addr, pool_ref = NULL_RTX;
7553 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7554 if (pool_ref)
7555 {
7556 if (s390_execute_label (insn))
7557 addr = s390_find_execute (pool, insn);
7558 else
7559 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
7560 get_pool_mode (pool_ref));
7561
7562 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7563 INSN_CODE (insn) = -1;
7564 }
7565 }
7566 }
7567
7568
7569 /* Free the pool. */
7570 s390_free_pool (pool);
7571 }
7572
7573 /* POOL holds the main literal pool as collected by s390_mainpool_start.
7574 We have decided we cannot use this pool, so revert all changes
7575 to the current function that were done by s390_mainpool_start. */
7576 static void
7577 s390_mainpool_cancel (struct constant_pool *pool)
7578 {
7579 /* We didn't actually change the instruction stream, so simply
7580 free the pool memory. */
7581 s390_free_pool (pool);
7582 }
7583
7584
7585 /* Chunkify the literal pool. */
7586
7587 #define S390_POOL_CHUNK_MIN 0xc00
7588 #define S390_POOL_CHUNK_MAX 0xe00
7589
7590 static struct constant_pool *
7591 s390_chunkify_start (void)
7592 {
7593 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
7594 int extra_size = 0;
7595 bitmap far_labels;
7596 rtx pending_ltrel = NULL_RTX;
7597 rtx_insn *insn;
7598
7599 rtx (*gen_reload_base) (rtx, rtx) =
7600 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
7601
7602
7603 /* We need correct insn addresses. */
7604
7605 shorten_branches (get_insns ());
7606
7607 /* Scan all insns and move literals to pool chunks. */
7608
7609 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7610 {
7611 bool section_switch_p = false;
7612
7613 /* Check for pending LTREL_BASE. */
7614 if (INSN_P (insn))
7615 {
7616 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
7617 if (ltrel_base)
7618 {
7619 gcc_assert (ltrel_base == pending_ltrel);
7620 pending_ltrel = NULL_RTX;
7621 }
7622 }
7623
7624 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
7625 {
7626 if (!curr_pool)
7627 curr_pool = s390_start_pool (&pool_list, insn);
7628
7629 s390_add_execute (curr_pool, insn);
7630 s390_add_pool_insn (curr_pool, insn);
7631 }
7632 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7633 {
7634 rtx pool_ref = NULL_RTX;
7635 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7636 if (pool_ref)
7637 {
7638 rtx constant = get_pool_constant (pool_ref);
7639 machine_mode mode = get_pool_mode (pool_ref);
7640
7641 if (!curr_pool)
7642 curr_pool = s390_start_pool (&pool_list, insn);
7643
7644 s390_add_constant (curr_pool, constant, mode);
7645 s390_add_pool_insn (curr_pool, insn);
7646
7647 /* Don't split the pool chunk between a LTREL_OFFSET load
7648 and the corresponding LTREL_BASE. */
7649 if (GET_CODE (constant) == CONST
7650 && GET_CODE (XEXP (constant, 0)) == UNSPEC
7651 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
7652 {
7653 gcc_assert (!pending_ltrel);
7654 pending_ltrel = pool_ref;
7655 }
7656 }
7657 }
7658
7659 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
7660 {
7661 if (curr_pool)
7662 s390_add_pool_insn (curr_pool, insn);
7663 /* An LTREL_BASE must follow within the same basic block. */
7664 gcc_assert (!pending_ltrel);
7665 }
7666
7667 if (NOTE_P (insn))
7668 switch (NOTE_KIND (insn))
7669 {
7670 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
7671 section_switch_p = true;
7672 break;
7673 case NOTE_INSN_VAR_LOCATION:
7674 case NOTE_INSN_CALL_ARG_LOCATION:
7675 continue;
7676 default:
7677 break;
7678 }
7679
7680 if (!curr_pool
7681 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
7682 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
7683 continue;
7684
7685 if (TARGET_CPU_ZARCH)
7686 {
7687 if (curr_pool->size < S390_POOL_CHUNK_MAX)
7688 continue;
7689
7690 s390_end_pool (curr_pool, NULL);
7691 curr_pool = NULL;
7692 }
7693 else
7694 {
7695 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
7696 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
7697 + extra_size;
7698
7699 /* We will later have to insert base register reload insns.
7700 Those will have an effect on code size, which we need to
7701 consider here. This calculation makes rather pessimistic
7702 worst-case assumptions. */
7703 if (LABEL_P (insn))
7704 extra_size += 6;
7705
7706 if (chunk_size < S390_POOL_CHUNK_MIN
7707 && curr_pool->size < S390_POOL_CHUNK_MIN
7708 && !section_switch_p)
7709 continue;
7710
7711 /* Pool chunks can only be inserted after BARRIERs ... */
7712 if (BARRIER_P (insn))
7713 {
7714 s390_end_pool (curr_pool, insn);
7715 curr_pool = NULL;
7716 extra_size = 0;
7717 }
7718
7719 /* ... so if we don't find one in time, create one. */
7720 else if (chunk_size > S390_POOL_CHUNK_MAX
7721 || curr_pool->size > S390_POOL_CHUNK_MAX
7722 || section_switch_p)
7723 {
7724 rtx_insn *label, *jump, *barrier, *next, *prev;
7725
7726 if (!section_switch_p)
7727 {
7728 /* We can insert the barrier only after a 'real' insn. */
7729 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
7730 continue;
7731 if (get_attr_length (insn) == 0)
7732 continue;
7733 /* Don't separate LTREL_BASE from the corresponding
7734 LTREL_OFFSET load. */
7735 if (pending_ltrel)
7736 continue;
7737 next = insn;
7738 do
7739 {
7740 insn = next;
7741 next = NEXT_INSN (insn);
7742 }
7743 while (next
7744 && NOTE_P (next)
7745 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
7746 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
7747 }
7748 else
7749 {
7750 gcc_assert (!pending_ltrel);
7751
7752 /* The old pool has to end before the section switch
7753 note in order to make it part of the current
7754 section. */
7755 insn = PREV_INSN (insn);
7756 }
7757
7758 label = gen_label_rtx ();
7759 prev = insn;
7760 if (prev && NOTE_P (prev))
7761 prev = prev_nonnote_insn (prev);
7762 if (prev)
7763 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
7764 INSN_LOCATION (prev));
7765 else
7766 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
7767 barrier = emit_barrier_after (jump);
7768 insn = emit_label_after (label, barrier);
7769 JUMP_LABEL (jump) = label;
7770 LABEL_NUSES (label) = 1;
7771
7772 INSN_ADDRESSES_NEW (jump, -1);
7773 INSN_ADDRESSES_NEW (barrier, -1);
7774 INSN_ADDRESSES_NEW (insn, -1);
7775
7776 s390_end_pool (curr_pool, barrier);
7777 curr_pool = NULL;
7778 extra_size = 0;
7779 }
7780 }
7781 }
7782
7783 if (curr_pool)
7784 s390_end_pool (curr_pool, NULL);
7785 gcc_assert (!pending_ltrel);
7786
7787 /* Find all labels that are branched into
7788 from an insn belonging to a different chunk. */
7789
7790 far_labels = BITMAP_ALLOC (NULL);
7791
7792 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7793 {
7794 rtx_jump_table_data *table;
7795
7796 /* Labels marked with LABEL_PRESERVE_P can be target
7797 of non-local jumps, so we have to mark them.
7798 The same holds for named labels.
7799
7800 Don't do that, however, if it is the label before
7801 a jump table. */
7802
7803 if (LABEL_P (insn)
7804 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7805 {
7806 rtx_insn *vec_insn = NEXT_INSN (insn);
7807 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
7808 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7809 }
7810 /* Check potential targets in a table jump (casesi_jump). */
7811 else if (tablejump_p (insn, NULL, &table))
7812 {
7813 rtx vec_pat = PATTERN (table);
7814 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7815
7816 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7817 {
7818 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7819
7820 if (s390_find_pool (pool_list, label)
7821 != s390_find_pool (pool_list, insn))
7822 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7823 }
7824 }
7825 /* If we have a direct jump (conditional or unconditional),
7826 check all potential targets. */
7827 else if (JUMP_P (insn))
7828 {
7829 rtx pat = PATTERN (insn);
7830
7831 if (GET_CODE (pat) == PARALLEL)
7832 pat = XVECEXP (pat, 0, 0);
7833
7834 if (GET_CODE (pat) == SET)
7835 {
7836 rtx label = JUMP_LABEL (insn);
7837 if (label && !ANY_RETURN_P (label))
7838 {
7839 if (s390_find_pool (pool_list, label)
7840 != s390_find_pool (pool_list, insn))
7841 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7842 }
7843 }
7844 }
7845 }
7846
7847 /* Insert base register reload insns before every pool. */
7848
7849 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7850 {
7851 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7852 curr_pool->label);
7853 rtx_insn *insn = curr_pool->first_insn;
7854 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7855 }
7856
7857 /* Insert base register reload insns at every far label. */
7858
7859 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7860 if (LABEL_P (insn)
7861 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7862 {
7863 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7864 if (pool)
7865 {
7866 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7867 pool->label);
7868 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7869 }
7870 }
7871
7872
7873 BITMAP_FREE (far_labels);
7874
7875
7876 /* Recompute insn addresses. */
7877
7878 init_insn_lengths ();
7879 shorten_branches (get_insns ());
7880
7881 return pool_list;
7882 }
7883
7884 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7885 After we have decided to use this list, finish implementing
7886 all changes to the current function as required. */
7887
7888 static void
7889 s390_chunkify_finish (struct constant_pool *pool_list)
7890 {
7891 struct constant_pool *curr_pool = NULL;
7892 rtx_insn *insn;
7893
7894
7895 /* Replace all literal pool references. */
7896
7897 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7898 {
7899 if (INSN_P (insn))
7900 replace_ltrel_base (&PATTERN (insn));
7901
7902 curr_pool = s390_find_pool (pool_list, insn);
7903 if (!curr_pool)
7904 continue;
7905
7906 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7907 {
7908 rtx addr, pool_ref = NULL_RTX;
7909 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7910 if (pool_ref)
7911 {
7912 if (s390_execute_label (insn))
7913 addr = s390_find_execute (curr_pool, insn);
7914 else
7915 addr = s390_find_constant (curr_pool,
7916 get_pool_constant (pool_ref),
7917 get_pool_mode (pool_ref));
7918
7919 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7920 INSN_CODE (insn) = -1;
7921 }
7922 }
7923 }
7924
7925 /* Dump out all literal pools. */
7926
7927 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7928 s390_dump_pool (curr_pool, 0);
7929
7930 /* Free pool list. */
7931
7932 while (pool_list)
7933 {
7934 struct constant_pool *next = pool_list->next;
7935 s390_free_pool (pool_list);
7936 pool_list = next;
7937 }
7938 }
7939
7940 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7941 We have decided we cannot use this list, so revert all changes
7942 to the current function that were done by s390_chunkify_start. */
7943
7944 static void
7945 s390_chunkify_cancel (struct constant_pool *pool_list)
7946 {
7947 struct constant_pool *curr_pool = NULL;
7948 rtx_insn *insn;
7949
7950 /* Remove all pool placeholder insns. */
7951
7952 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7953 {
7954 /* Did we insert an extra barrier? Remove it. */
7955 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
7956 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
7957 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
7958
7959 if (jump && JUMP_P (jump)
7960 && barrier && BARRIER_P (barrier)
7961 && label && LABEL_P (label)
7962 && GET_CODE (PATTERN (jump)) == SET
7963 && SET_DEST (PATTERN (jump)) == pc_rtx
7964 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7965 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7966 {
7967 remove_insn (jump);
7968 remove_insn (barrier);
7969 remove_insn (label);
7970 }
7971
7972 remove_insn (curr_pool->pool_insn);
7973 }
7974
7975 /* Remove all base register reload insns. */
7976
7977 for (insn = get_insns (); insn; )
7978 {
7979 rtx_insn *next_insn = NEXT_INSN (insn);
7980
7981 if (NONJUMP_INSN_P (insn)
7982 && GET_CODE (PATTERN (insn)) == SET
7983 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7984 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7985 remove_insn (insn);
7986
7987 insn = next_insn;
7988 }
7989
7990 /* Free pool list. */
7991
7992 while (pool_list)
7993 {
7994 struct constant_pool *next = pool_list->next;
7995 s390_free_pool (pool_list);
7996 pool_list = next;
7997 }
7998 }
7999
8000 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
8001
8002 void
8003 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8004 {
8005 REAL_VALUE_TYPE r;
8006
8007 switch (GET_MODE_CLASS (mode))
8008 {
8009 case MODE_FLOAT:
8010 case MODE_DECIMAL_FLOAT:
8011 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8012
8013 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
8014 assemble_real (r, mode, align);
8015 break;
8016
8017 case MODE_INT:
8018 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8019 mark_symbol_refs_as_used (exp);
8020 break;
8021
8022 case MODE_VECTOR_INT:
8023 case MODE_VECTOR_FLOAT:
8024 {
8025 int i;
8026 machine_mode inner_mode;
8027 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8028
8029 inner_mode = GET_MODE_INNER (GET_MODE (exp));
8030 for (i = 0; i < XVECLEN (exp, 0); i++)
8031 s390_output_pool_entry (XVECEXP (exp, 0, i),
8032 inner_mode,
8033 i == 0
8034 ? align
8035 : GET_MODE_BITSIZE (inner_mode));
8036 }
8037 break;
8038
8039 default:
8040 gcc_unreachable ();
8041 }
8042 }
8043
8044
8045 /* Return an RTL expression representing the value of the return address
8046 for the frame COUNT steps up from the current frame. FRAME is the
8047 frame pointer of that frame. */
8048
8049 rtx
8050 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8051 {
8052 int offset;
8053 rtx addr;
8054
8055 /* Without backchain, we fail for all but the current frame. */
8056
8057 if (!TARGET_BACKCHAIN && count > 0)
8058 return NULL_RTX;
8059
8060 /* For the current frame, we need to make sure the initial
8061 value of RETURN_REGNUM is actually saved. */
8062
8063 if (count == 0)
8064 {
8065 /* On non-z architectures branch splitting could overwrite r14. */
8066 if (TARGET_CPU_ZARCH)
8067 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8068 else
8069 {
8070 cfun_frame_layout.save_return_addr_p = true;
8071 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8072 }
8073 }
8074
8075 if (TARGET_PACKED_STACK)
8076 offset = -2 * UNITS_PER_LONG;
8077 else
8078 offset = RETURN_REGNUM * UNITS_PER_LONG;
8079
8080 addr = plus_constant (Pmode, frame, offset);
8081 addr = memory_address (Pmode, addr);
8082 return gen_rtx_MEM (Pmode, addr);
8083 }
8084
8085 /* Return an RTL expression representing the back chain stored in
8086 the current stack frame. */
8087
8088 rtx
8089 s390_back_chain_rtx (void)
8090 {
8091 rtx chain;
8092
8093 gcc_assert (TARGET_BACKCHAIN);
8094
8095 if (TARGET_PACKED_STACK)
8096 chain = plus_constant (Pmode, stack_pointer_rtx,
8097 STACK_POINTER_OFFSET - UNITS_PER_LONG);
8098 else
8099 chain = stack_pointer_rtx;
8100
8101 chain = gen_rtx_MEM (Pmode, chain);
8102 return chain;
8103 }
8104
8105 /* Find first call clobbered register unused in a function.
8106 This could be used as base register in a leaf function
8107 or for holding the return address before epilogue. */
8108
8109 static int
8110 find_unused_clobbered_reg (void)
8111 {
8112 int i;
8113 for (i = 0; i < 6; i++)
8114 if (!df_regs_ever_live_p (i))
8115 return i;
8116 return 0;
8117 }
8118
8119
8120 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
8121 clobbered hard regs in SETREG. */
8122
8123 static void
8124 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
8125 {
8126 char *regs_ever_clobbered = (char *)data;
8127 unsigned int i, regno;
8128 machine_mode mode = GET_MODE (setreg);
8129
8130 if (GET_CODE (setreg) == SUBREG)
8131 {
8132 rtx inner = SUBREG_REG (setreg);
8133 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
8134 return;
8135 regno = subreg_regno (setreg);
8136 }
8137 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
8138 regno = REGNO (setreg);
8139 else
8140 return;
8141
8142 for (i = regno;
8143 i < regno + HARD_REGNO_NREGS (regno, mode);
8144 i++)
8145 regs_ever_clobbered[i] = 1;
8146 }
8147
8148 /* Walks through all basic blocks of the current function looking
8149 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
8150 of the passed integer array REGS_EVER_CLOBBERED are set to one for
8151 each of those regs. */
8152
8153 static void
8154 s390_regs_ever_clobbered (char regs_ever_clobbered[])
8155 {
8156 basic_block cur_bb;
8157 rtx_insn *cur_insn;
8158 unsigned int i;
8159
8160 memset (regs_ever_clobbered, 0, 32);
8161
8162 /* For non-leaf functions we have to consider all call clobbered regs to be
8163 clobbered. */
8164 if (!crtl->is_leaf)
8165 {
8166 for (i = 0; i < 32; i++)
8167 regs_ever_clobbered[i] = call_really_used_regs[i];
8168 }
8169
8170 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
8171 this work is done by liveness analysis (mark_regs_live_at_end).
8172 Special care is needed for functions containing landing pads. Landing pads
8173 may use the eh registers, but the code which sets these registers is not
8174 contained in that function. Hence s390_regs_ever_clobbered is not able to
8175 deal with this automatically. */
8176 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
8177 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
8178 if (crtl->calls_eh_return
8179 || (cfun->machine->has_landing_pad_p
8180 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
8181 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
8182
8183 /* For nonlocal gotos all call-saved registers have to be saved.
8184 This flag is also set for the unwinding code in libgcc.
8185 See expand_builtin_unwind_init. For regs_ever_live this is done by
8186 reload. */
8187 if (crtl->saves_all_registers)
8188 for (i = 0; i < 32; i++)
8189 if (!call_really_used_regs[i])
8190 regs_ever_clobbered[i] = 1;
8191
8192 FOR_EACH_BB_FN (cur_bb, cfun)
8193 {
8194 FOR_BB_INSNS (cur_bb, cur_insn)
8195 {
8196 rtx pat;
8197
8198 if (!INSN_P (cur_insn))
8199 continue;
8200
8201 pat = PATTERN (cur_insn);
8202
8203 /* Ignore GPR restore insns. */
8204 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
8205 {
8206 if (GET_CODE (pat) == SET
8207 && GENERAL_REG_P (SET_DEST (pat)))
8208 {
8209 /* lgdr */
8210 if (GET_MODE (SET_SRC (pat)) == DImode
8211 && FP_REG_P (SET_SRC (pat)))
8212 continue;
8213
8214 /* l / lg */
8215 if (GET_CODE (SET_SRC (pat)) == MEM)
8216 continue;
8217 }
8218
8219 /* lm / lmg */
8220 if (GET_CODE (pat) == PARALLEL
8221 && load_multiple_operation (pat, VOIDmode))
8222 continue;
8223 }
8224
8225 note_stores (pat,
8226 s390_reg_clobbered_rtx,
8227 regs_ever_clobbered);
8228 }
8229 }
8230 }
8231
8232 /* Determine the frame area which actually has to be accessed
8233 in the function epilogue. The values are stored at the
8234 given pointers AREA_BOTTOM (address of the lowest used stack
8235 address) and AREA_TOP (address of the first item which does
8236 not belong to the stack frame). */
8237
8238 static void
8239 s390_frame_area (int *area_bottom, int *area_top)
8240 {
8241 int b, t;
8242
8243 b = INT_MAX;
8244 t = INT_MIN;
8245
8246 if (cfun_frame_layout.first_restore_gpr != -1)
8247 {
8248 b = (cfun_frame_layout.gprs_offset
8249 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
8250 t = b + (cfun_frame_layout.last_restore_gpr
8251 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
8252 }
8253
8254 if (TARGET_64BIT && cfun_save_high_fprs_p)
8255 {
8256 b = MIN (b, cfun_frame_layout.f8_offset);
8257 t = MAX (t, (cfun_frame_layout.f8_offset
8258 + cfun_frame_layout.high_fprs * 8));
8259 }
8260
8261 if (!TARGET_64BIT)
8262 {
8263 if (cfun_fpr_save_p (FPR4_REGNUM))
8264 {
8265 b = MIN (b, cfun_frame_layout.f4_offset);
8266 t = MAX (t, cfun_frame_layout.f4_offset + 8);
8267 }
8268 if (cfun_fpr_save_p (FPR6_REGNUM))
8269 {
8270 b = MIN (b, cfun_frame_layout.f4_offset + 8);
8271 t = MAX (t, cfun_frame_layout.f4_offset + 16);
8272 }
8273 }
8274 *area_bottom = b;
8275 *area_top = t;
8276 }
8277 /* Update gpr_save_slots in the frame layout trying to make use of
8278 FPRs as GPR save slots.
8279 This is a helper routine of s390_register_info. */
8280
8281 static void
8282 s390_register_info_gprtofpr ()
8283 {
8284 int save_reg_slot = FPR0_REGNUM;
8285 int i, j;
8286
8287 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8288 return;
8289
8290 for (i = 15; i >= 6; i--)
8291 {
8292 if (cfun_gpr_save_slot (i) == 0)
8293 continue;
8294
8295 /* Advance to the next FP register which can be used as a
8296 GPR save slot. */
8297 while ((!call_really_used_regs[save_reg_slot]
8298 || df_regs_ever_live_p (save_reg_slot)
8299 || cfun_fpr_save_p (save_reg_slot))
8300 && FP_REGNO_P (save_reg_slot))
8301 save_reg_slot++;
8302 if (!FP_REGNO_P (save_reg_slot))
8303 {
8304 /* We only want to use ldgr/lgdr if we can get rid of
8305 stm/lm entirely. So undo the gpr slot allocation in
8306 case we ran out of FPR save slots. */
8307 for (j = 6; j <= 15; j++)
8308 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
8309 cfun_gpr_save_slot (j) = -1;
8310 break;
8311 }
8312 cfun_gpr_save_slot (i) = save_reg_slot++;
8313 }
8314 }
8315
8316 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
8317 stdarg.
8318 This is a helper routine for s390_register_info. */
8319
8320 static void
8321 s390_register_info_stdarg_fpr ()
8322 {
8323 int i;
8324 int min_fpr;
8325 int max_fpr;
8326
8327 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
8328 f0-f4 for 64 bit. */
8329 if (!cfun->stdarg
8330 || !TARGET_HARD_FLOAT
8331 || !cfun->va_list_fpr_size
8332 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
8333 return;
8334
8335 min_fpr = crtl->args.info.fprs;
8336 max_fpr = min_fpr + cfun->va_list_fpr_size;
8337 if (max_fpr > FP_ARG_NUM_REG)
8338 max_fpr = FP_ARG_NUM_REG;
8339
8340 for (i = min_fpr; i < max_fpr; i++)
8341 cfun_set_fpr_save (i + FPR0_REGNUM);
8342 }
8343
8344 /* Reserve the GPR save slots for GPRs which need to be saved due to
8345 stdarg.
8346 This is a helper routine for s390_register_info. */
8347
8348 static void
8349 s390_register_info_stdarg_gpr ()
8350 {
8351 int i;
8352 int min_gpr;
8353 int max_gpr;
8354
8355 if (!cfun->stdarg
8356 || !cfun->va_list_gpr_size
8357 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
8358 return;
8359
8360 min_gpr = crtl->args.info.gprs;
8361 max_gpr = min_gpr + cfun->va_list_gpr_size;
8362 if (max_gpr > GP_ARG_NUM_REG)
8363 max_gpr = GP_ARG_NUM_REG;
8364
8365 for (i = min_gpr; i < max_gpr; i++)
8366 cfun_gpr_save_slot (2 + i) = -1;
8367 }
8368
8369 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
8370 for registers which need to be saved in function prologue.
8371 This function can be used until the insns emitted for save/restore
8372 of the regs are visible in the RTL stream. */
8373
8374 static void
8375 s390_register_info ()
8376 {
8377 int i, j;
8378 char clobbered_regs[32];
8379
8380 gcc_assert (!epilogue_completed);
8381
8382 if (reload_completed)
8383 /* After reload we rely on our own routine to determine which
8384 registers need saving. */
8385 s390_regs_ever_clobbered (clobbered_regs);
8386 else
8387 /* During reload we use regs_ever_live as a base since reload
8388 does changes in there which we otherwise would not be aware
8389 of. */
8390 for (i = 0; i < 32; i++)
8391 clobbered_regs[i] = df_regs_ever_live_p (i);
8392
8393 for (i = 0; i < 32; i++)
8394 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
8395
8396 /* Mark the call-saved FPRs which need to be saved.
8397 This needs to be done before checking the special GPRs since the
8398 stack pointer usage depends on whether high FPRs have to be saved
8399 or not. */
8400 cfun_frame_layout.fpr_bitmap = 0;
8401 cfun_frame_layout.high_fprs = 0;
8402 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
8403 if (clobbered_regs[i] && !call_really_used_regs[i])
8404 {
8405 cfun_set_fpr_save (i);
8406 if (i >= FPR8_REGNUM)
8407 cfun_frame_layout.high_fprs++;
8408 }
8409
8410 if (flag_pic)
8411 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
8412 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
8413
8414 clobbered_regs[BASE_REGNUM]
8415 |= (cfun->machine->base_reg
8416 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
8417
8418 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
8419 |= !!frame_pointer_needed;
8420
8421 /* On pre z900 machines this might take until machine dependent
8422 reorg to decide.
8423 save_return_addr_p will only be set on non-zarch machines so
8424 there is no risk that r14 goes into an FPR instead of a stack
8425 slot. */
8426 clobbered_regs[RETURN_REGNUM]
8427 |= (!crtl->is_leaf
8428 || TARGET_TPF_PROFILING
8429 || cfun->machine->split_branches_pending_p
8430 || cfun_frame_layout.save_return_addr_p
8431 || crtl->calls_eh_return);
8432
8433 clobbered_regs[STACK_POINTER_REGNUM]
8434 |= (!crtl->is_leaf
8435 || TARGET_TPF_PROFILING
8436 || cfun_save_high_fprs_p
8437 || get_frame_size () > 0
8438 || (reload_completed && cfun_frame_layout.frame_size > 0)
8439 || cfun->calls_alloca);
8440
8441 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
8442
8443 for (i = 6; i < 16; i++)
8444 if (clobbered_regs[i])
8445 cfun_gpr_save_slot (i) = -1;
8446
8447 s390_register_info_stdarg_fpr ();
8448 s390_register_info_gprtofpr ();
8449
8450 /* First find the range of GPRs to be restored. Vararg regs don't
8451 need to be restored so we do it before assigning slots to the
8452 vararg GPRs. */
8453 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
8454 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
8455 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
8456 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
8457
8458 /* stdarg functions might need to save GPRs 2 to 6. This might
8459 override the GPR->FPR save decision made above for r6 since
8460 vararg regs must go to the stack. */
8461 s390_register_info_stdarg_gpr ();
8462
8463 /* Now the range of GPRs which need saving. */
8464 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
8465 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
8466 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
8467 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
8468 }
8469
8470 /* This function is called by s390_optimize_prologue in order to get
8471 rid of unnecessary GPR save/restore instructions. The register info
8472 for the GPRs is re-computed and the ranges are re-calculated. */
8473
8474 static void
8475 s390_optimize_register_info ()
8476 {
8477 char clobbered_regs[32];
8478 int i, j;
8479
8480 gcc_assert (epilogue_completed);
8481 gcc_assert (!cfun->machine->split_branches_pending_p);
8482
8483 s390_regs_ever_clobbered (clobbered_regs);
8484
8485 for (i = 0; i < 32; i++)
8486 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
8487
8488 /* There is still special treatment needed for cases invisible to
8489 s390_regs_ever_clobbered. */
8490 clobbered_regs[RETURN_REGNUM]
8491 |= (TARGET_TPF_PROFILING
8492 /* When expanding builtin_return_addr in ESA mode we do not
8493 know whether r14 will later be needed as scratch reg when
8494 doing branch splitting. So the builtin always accesses the
8495 r14 save slot and we need to stick to the save/restore
8496 decision for r14 even if it turns out that it didn't get
8497 clobbered. */
8498 || cfun_frame_layout.save_return_addr_p
8499 || crtl->calls_eh_return);
8500
8501 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
8502
8503 for (i = 6; i < 16; i++)
8504 if (!clobbered_regs[i])
8505 cfun_gpr_save_slot (i) = 0;
8506
8507 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
8508 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
8509 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
8510 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
8511
8512 s390_register_info_stdarg_gpr ();
8513
8514 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
8515 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
8516 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
8517 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
8518 }
8519
8520 /* Fill cfun->machine with info about frame of current function. */
8521
8522 static void
8523 s390_frame_info (void)
8524 {
8525 HOST_WIDE_INT lowest_offset;
8526
8527 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
8528 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
8529
8530 /* The va_arg builtin uses a constant distance of 16 *
8531 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
8532 pointer. So even if we are going to save the stack pointer in an
8533 FPR we need the stack space in order to keep the offsets
8534 correct. */
8535 if (cfun->stdarg && cfun_save_arg_fprs_p)
8536 {
8537 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
8538
8539 if (cfun_frame_layout.first_save_gpr_slot == -1)
8540 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
8541 }
8542
8543 cfun_frame_layout.frame_size = get_frame_size ();
8544 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
8545 fatal_error (input_location,
8546 "total size of local variables exceeds architecture limit");
8547
8548 if (!TARGET_PACKED_STACK)
8549 {
8550 /* Fixed stack layout. */
8551 cfun_frame_layout.backchain_offset = 0;
8552 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
8553 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
8554 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
8555 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
8556 * UNITS_PER_LONG);
8557 }
8558 else if (TARGET_BACKCHAIN)
8559 {
8560 /* Kernel stack layout - packed stack, backchain, no float */
8561 gcc_assert (TARGET_SOFT_FLOAT);
8562 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
8563 - UNITS_PER_LONG);
8564
8565 /* The distance between the backchain and the return address
8566 save slot must not change. So we always need a slot for the
8567 stack pointer which resides in between. */
8568 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
8569
8570 cfun_frame_layout.gprs_offset
8571 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
8572
8573 /* FPRs will not be saved. Nevertheless pick sane values to
8574 keep area calculations valid. */
8575 cfun_frame_layout.f0_offset =
8576 cfun_frame_layout.f4_offset =
8577 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
8578 }
8579 else
8580 {
8581 int num_fprs;
8582
8583 /* Packed stack layout without backchain. */
8584
8585 /* With stdarg FPRs need their dedicated slots. */
8586 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
8587 : (cfun_fpr_save_p (FPR4_REGNUM) +
8588 cfun_fpr_save_p (FPR6_REGNUM)));
8589 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
8590
8591 num_fprs = (cfun->stdarg ? 2
8592 : (cfun_fpr_save_p (FPR0_REGNUM)
8593 + cfun_fpr_save_p (FPR2_REGNUM)));
8594 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
8595
8596 cfun_frame_layout.gprs_offset
8597 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
8598
8599 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
8600 - cfun_frame_layout.high_fprs * 8);
8601 }
8602
8603 if (cfun_save_high_fprs_p)
8604 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
8605
8606 if (!crtl->is_leaf)
8607 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
8608
8609 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
8610 sized area at the bottom of the stack. This is required also for
8611 leaf functions. When GCC generates a local stack reference it
8612 will always add STACK_POINTER_OFFSET to all these references. */
8613 if (crtl->is_leaf
8614 && !TARGET_TPF_PROFILING
8615 && cfun_frame_layout.frame_size == 0
8616 && !cfun->calls_alloca)
8617 return;
8618
8619 /* Calculate the number of bytes we have used in our own register
8620 save area. With the packed stack layout we can re-use the
8621 remaining bytes for normal stack elements. */
8622
8623 if (TARGET_PACKED_STACK)
8624 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
8625 cfun_frame_layout.f4_offset),
8626 cfun_frame_layout.gprs_offset);
8627 else
8628 lowest_offset = 0;
8629
8630 if (TARGET_BACKCHAIN)
8631 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
8632
8633 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
8634
8635 /* If under 31 bit an odd number of gprs has to be saved we have to
8636 adjust the frame size to sustain 8 byte alignment of stack
8637 frames. */
8638 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
8639 STACK_BOUNDARY / BITS_PER_UNIT - 1)
8640 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
8641 }
8642
8643 /* Generate frame layout. Fills in register and frame data for the current
8644 function in cfun->machine. This routine can be called multiple times;
8645 it will re-do the complete frame layout every time. */
8646
8647 static void
8648 s390_init_frame_layout (void)
8649 {
8650 HOST_WIDE_INT frame_size;
8651 int base_used;
8652
8653 gcc_assert (!reload_completed);
8654
8655 /* On S/390 machines, we may need to perform branch splitting, which
8656 will require both base and return address register. We have no
8657 choice but to assume we're going to need them until right at the
8658 end of the machine dependent reorg phase. */
8659 if (!TARGET_CPU_ZARCH)
8660 cfun->machine->split_branches_pending_p = true;
8661
8662 do
8663 {
8664 frame_size = cfun_frame_layout.frame_size;
8665
8666 /* Try to predict whether we'll need the base register. */
8667 base_used = cfun->machine->split_branches_pending_p
8668 || crtl->uses_const_pool
8669 || (!DISP_IN_RANGE (frame_size)
8670 && !CONST_OK_FOR_K (frame_size));
8671
8672 /* Decide which register to use as literal pool base. In small
8673 leaf functions, try to use an unused call-clobbered register
8674 as base register to avoid save/restore overhead. */
8675 if (!base_used)
8676 cfun->machine->base_reg = NULL_RTX;
8677 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
8678 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
8679 else
8680 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
8681
8682 s390_register_info ();
8683 s390_frame_info ();
8684 }
8685 while (frame_size != cfun_frame_layout.frame_size);
8686 }
8687
8688 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
8689 the TX is nonescaping. A transaction is considered escaping if
8690 there is at least one path from tbegin returning CC0 to the
8691 function exit block without an tend.
8692
8693 The check so far has some limitations:
8694 - only single tbegin/tend BBs are supported
8695 - the first cond jump after tbegin must separate the CC0 path from ~CC0
8696 - when CC is copied to a GPR and the CC0 check is done with the GPR
8697 this is not supported
8698 */
8699
8700 static void
8701 s390_optimize_nonescaping_tx (void)
8702 {
8703 const unsigned int CC0 = 1 << 3;
8704 basic_block tbegin_bb = NULL;
8705 basic_block tend_bb = NULL;
8706 basic_block bb;
8707 rtx_insn *insn;
8708 bool result = true;
8709 int bb_index;
8710 rtx_insn *tbegin_insn = NULL;
8711
8712 if (!cfun->machine->tbegin_p)
8713 return;
8714
8715 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
8716 {
8717 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
8718
8719 if (!bb)
8720 continue;
8721
8722 FOR_BB_INSNS (bb, insn)
8723 {
8724 rtx ite, cc, pat, target;
8725 unsigned HOST_WIDE_INT mask;
8726
8727 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
8728 continue;
8729
8730 pat = PATTERN (insn);
8731
8732 if (GET_CODE (pat) == PARALLEL)
8733 pat = XVECEXP (pat, 0, 0);
8734
8735 if (GET_CODE (pat) != SET
8736 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
8737 continue;
8738
8739 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
8740 {
8741 rtx_insn *tmp;
8742
8743 tbegin_insn = insn;
8744
8745 /* Just return if the tbegin doesn't have clobbers. */
8746 if (GET_CODE (PATTERN (insn)) != PARALLEL)
8747 return;
8748
8749 if (tbegin_bb != NULL)
8750 return;
8751
8752 /* Find the next conditional jump. */
8753 for (tmp = NEXT_INSN (insn);
8754 tmp != NULL_RTX;
8755 tmp = NEXT_INSN (tmp))
8756 {
8757 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
8758 return;
8759 if (!JUMP_P (tmp))
8760 continue;
8761
8762 ite = SET_SRC (PATTERN (tmp));
8763 if (GET_CODE (ite) != IF_THEN_ELSE)
8764 continue;
8765
8766 cc = XEXP (XEXP (ite, 0), 0);
8767 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
8768 || GET_MODE (cc) != CCRAWmode
8769 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
8770 return;
8771
8772 if (bb->succs->length () != 2)
8773 return;
8774
8775 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
8776 if (GET_CODE (XEXP (ite, 0)) == NE)
8777 mask ^= 0xf;
8778
8779 if (mask == CC0)
8780 target = XEXP (ite, 1);
8781 else if (mask == (CC0 ^ 0xf))
8782 target = XEXP (ite, 2);
8783 else
8784 return;
8785
8786 {
8787 edge_iterator ei;
8788 edge e1, e2;
8789
8790 ei = ei_start (bb->succs);
8791 e1 = ei_safe_edge (ei);
8792 ei_next (&ei);
8793 e2 = ei_safe_edge (ei);
8794
8795 if (e2->flags & EDGE_FALLTHRU)
8796 {
8797 e2 = e1;
8798 e1 = ei_safe_edge (ei);
8799 }
8800
8801 if (!(e1->flags & EDGE_FALLTHRU))
8802 return;
8803
8804 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
8805 }
8806 if (tmp == BB_END (bb))
8807 break;
8808 }
8809 }
8810
8811 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
8812 {
8813 if (tend_bb != NULL)
8814 return;
8815 tend_bb = bb;
8816 }
8817 }
8818 }
8819
8820 /* Either we successfully remove the FPR clobbers here or we are not
8821 able to do anything for this TX. Both cases don't qualify for
8822 another look. */
8823 cfun->machine->tbegin_p = false;
8824
8825 if (tbegin_bb == NULL || tend_bb == NULL)
8826 return;
8827
8828 calculate_dominance_info (CDI_POST_DOMINATORS);
8829 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
8830 free_dominance_info (CDI_POST_DOMINATORS);
8831
8832 if (!result)
8833 return;
8834
8835 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
8836 gen_rtvec (2,
8837 XVECEXP (PATTERN (tbegin_insn), 0, 0),
8838 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
8839 INSN_CODE (tbegin_insn) = -1;
8840 df_insn_rescan (tbegin_insn);
8841
8842 return;
8843 }
8844
8845 /* Return true if it is legal to put a value with MODE into REGNO. */
8846
8847 bool
8848 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
8849 {
8850 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
8851 return false;
8852
8853 switch (REGNO_REG_CLASS (regno))
8854 {
8855 case VEC_REGS:
8856 return ((GET_MODE_CLASS (mode) == MODE_INT
8857 && s390_class_max_nregs (VEC_REGS, mode) == 1)
8858 || mode == DFmode
8859 || s390_vector_mode_supported_p (mode));
8860 break;
8861 case FP_REGS:
8862 if (TARGET_VX
8863 && ((GET_MODE_CLASS (mode) == MODE_INT
8864 && s390_class_max_nregs (FP_REGS, mode) == 1)
8865 || mode == DFmode
8866 || s390_vector_mode_supported_p (mode)))
8867 return true;
8868
8869 if (REGNO_PAIR_OK (regno, mode))
8870 {
8871 if (mode == SImode || mode == DImode)
8872 return true;
8873
8874 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
8875 return true;
8876 }
8877 break;
8878 case ADDR_REGS:
8879 if (FRAME_REGNO_P (regno) && mode == Pmode)
8880 return true;
8881
8882 /* fallthrough */
8883 case GENERAL_REGS:
8884 if (REGNO_PAIR_OK (regno, mode))
8885 {
8886 if (TARGET_ZARCH
8887 || (mode != TFmode && mode != TCmode && mode != TDmode))
8888 return true;
8889 }
8890 break;
8891 case CC_REGS:
8892 if (GET_MODE_CLASS (mode) == MODE_CC)
8893 return true;
8894 break;
8895 case ACCESS_REGS:
8896 if (REGNO_PAIR_OK (regno, mode))
8897 {
8898 if (mode == SImode || mode == Pmode)
8899 return true;
8900 }
8901 break;
8902 default:
8903 return false;
8904 }
8905
8906 return false;
8907 }
8908
8909 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
8910
8911 bool
8912 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
8913 {
8914 /* Once we've decided upon a register to use as base register, it must
8915 no longer be used for any other purpose. */
8916 if (cfun->machine->base_reg)
8917 if (REGNO (cfun->machine->base_reg) == old_reg
8918 || REGNO (cfun->machine->base_reg) == new_reg)
8919 return false;
8920
8921 /* Prevent regrename from using call-saved regs which haven't
8922 actually been saved. This is necessary since regrename assumes
8923 the backend save/restore decisions are based on
8924 df_regs_ever_live. Since we have our own routine we have to tell
8925 regrename manually about it. */
8926 if (GENERAL_REGNO_P (new_reg)
8927 && !call_really_used_regs[new_reg]
8928 && cfun_gpr_save_slot (new_reg) == 0)
8929 return false;
8930
8931 return true;
8932 }
8933
8934 /* Return nonzero if register REGNO can be used as a scratch register
8935 in peephole2. */
8936
8937 static bool
8938 s390_hard_regno_scratch_ok (unsigned int regno)
8939 {
8940 /* See s390_hard_regno_rename_ok. */
8941 if (GENERAL_REGNO_P (regno)
8942 && !call_really_used_regs[regno]
8943 && cfun_gpr_save_slot (regno) == 0)
8944 return false;
8945
8946 return true;
8947 }
8948
8949 /* Maximum number of registers to represent a value of mode MODE
8950 in a register of class RCLASS. */
8951
8952 int
8953 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
8954 {
8955 int reg_size;
8956 bool reg_pair_required_p = false;
8957
8958 switch (rclass)
8959 {
8960 case FP_REGS:
8961 case VEC_REGS:
8962 reg_size = TARGET_VX ? 16 : 8;
8963
8964 /* TF and TD modes would fit into a VR but we put them into a
8965 register pair since we do not have 128bit FP instructions on
8966 full VRs. */
8967 if (TARGET_VX
8968 && SCALAR_FLOAT_MODE_P (mode)
8969 && GET_MODE_SIZE (mode) >= 16)
8970 reg_pair_required_p = true;
8971
8972 /* Even if complex types would fit into a single FPR/VR we force
8973 them into a register pair to deal with the parts more easily.
8974 (FIXME: What about complex ints?) */
8975 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8976 reg_pair_required_p = true;
8977 break;
8978 case ACCESS_REGS:
8979 reg_size = 4;
8980 break;
8981 default:
8982 reg_size = UNITS_PER_WORD;
8983 break;
8984 }
8985
8986 if (reg_pair_required_p)
8987 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
8988
8989 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
8990 }
8991
8992 /* Return TRUE if changing mode from FROM to TO should not be allowed
8993 for register class CLASS. */
8994
8995 int
8996 s390_cannot_change_mode_class (machine_mode from_mode,
8997 machine_mode to_mode,
8998 enum reg_class rclass)
8999 {
9000 machine_mode small_mode;
9001 machine_mode big_mode;
9002
9003 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9004 return 0;
9005
9006 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9007 {
9008 small_mode = from_mode;
9009 big_mode = to_mode;
9010 }
9011 else
9012 {
9013 small_mode = to_mode;
9014 big_mode = from_mode;
9015 }
9016
9017 /* Values residing in VRs are little-endian style. All modes are
9018 placed left-aligned in an VR. This means that we cannot allow
9019 switching between modes with differing sizes. Also if the vector
9020 facility is available we still place TFmode values in VR register
9021 pairs, since the only instructions we have operating on TFmodes
9022 only deal with register pairs. Therefore we have to allow DFmode
9023 subregs of TFmodes to enable the TFmode splitters. */
9024 if (reg_classes_intersect_p (VEC_REGS, rclass)
9025 && (GET_MODE_SIZE (small_mode) < 8
9026 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9027 return 1;
9028
9029 /* Likewise for access registers, since they have only half the
9030 word size on 64-bit. */
9031 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9032 return 1;
9033
9034 return 0;
9035 }
9036
9037 /* Return true if we use LRA instead of reload pass. */
9038 static bool
9039 s390_lra_p (void)
9040 {
9041 return s390_lra_flag;
9042 }
9043
9044 /* Return true if register FROM can be eliminated via register TO. */
9045
9046 static bool
9047 s390_can_eliminate (const int from, const int to)
9048 {
9049 /* On zSeries machines, we have not marked the base register as fixed.
9050 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
9051 If a function requires the base register, we say here that this
9052 elimination cannot be performed. This will cause reload to free
9053 up the base register (as if it were fixed). On the other hand,
9054 if the current function does *not* require the base register, we
9055 say here the elimination succeeds, which in turn allows reload
9056 to allocate the base register for any other purpose. */
9057 if (from == BASE_REGNUM && to == BASE_REGNUM)
9058 {
9059 if (TARGET_CPU_ZARCH)
9060 {
9061 s390_init_frame_layout ();
9062 return cfun->machine->base_reg == NULL_RTX;
9063 }
9064
9065 return false;
9066 }
9067
9068 /* Everything else must point into the stack frame. */
9069 gcc_assert (to == STACK_POINTER_REGNUM
9070 || to == HARD_FRAME_POINTER_REGNUM);
9071
9072 gcc_assert (from == FRAME_POINTER_REGNUM
9073 || from == ARG_POINTER_REGNUM
9074 || from == RETURN_ADDRESS_POINTER_REGNUM);
9075
9076 /* Make sure we actually saved the return address. */
9077 if (from == RETURN_ADDRESS_POINTER_REGNUM)
9078 if (!crtl->calls_eh_return
9079 && !cfun->stdarg
9080 && !cfun_frame_layout.save_return_addr_p)
9081 return false;
9082
9083 return true;
9084 }
9085
9086 /* Return offset between register FROM and TO initially after prolog. */
9087
9088 HOST_WIDE_INT
9089 s390_initial_elimination_offset (int from, int to)
9090 {
9091 HOST_WIDE_INT offset;
9092
9093 /* ??? Why are we called for non-eliminable pairs? */
9094 if (!s390_can_eliminate (from, to))
9095 return 0;
9096
9097 switch (from)
9098 {
9099 case FRAME_POINTER_REGNUM:
9100 offset = (get_frame_size()
9101 + STACK_POINTER_OFFSET
9102 + crtl->outgoing_args_size);
9103 break;
9104
9105 case ARG_POINTER_REGNUM:
9106 s390_init_frame_layout ();
9107 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
9108 break;
9109
9110 case RETURN_ADDRESS_POINTER_REGNUM:
9111 s390_init_frame_layout ();
9112
9113 if (cfun_frame_layout.first_save_gpr_slot == -1)
9114 {
9115 /* If it turns out that for stdarg nothing went into the reg
9116 save area we also do not need the return address
9117 pointer. */
9118 if (cfun->stdarg && !cfun_save_arg_fprs_p)
9119 return 0;
9120
9121 gcc_unreachable ();
9122 }
9123
9124 /* In order to make the following work it is not necessary for
9125 r14 to have a save slot. It is sufficient if one other GPR
9126 got one. Since the GPRs are always stored without gaps we
9127 are able to calculate where the r14 save slot would
9128 reside. */
9129 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
9130 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
9131 UNITS_PER_LONG);
9132 break;
9133
9134 case BASE_REGNUM:
9135 offset = 0;
9136 break;
9137
9138 default:
9139 gcc_unreachable ();
9140 }
9141
9142 return offset;
9143 }
9144
9145 /* Emit insn to save fpr REGNUM at offset OFFSET relative
9146 to register BASE. Return generated insn. */
9147
9148 static rtx
9149 save_fpr (rtx base, int offset, int regnum)
9150 {
9151 rtx addr;
9152 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9153
9154 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
9155 set_mem_alias_set (addr, get_varargs_alias_set ());
9156 else
9157 set_mem_alias_set (addr, get_frame_alias_set ());
9158
9159 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
9160 }
9161
9162 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
9163 to register BASE. Return generated insn. */
9164
9165 static rtx
9166 restore_fpr (rtx base, int offset, int regnum)
9167 {
9168 rtx addr;
9169 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9170 set_mem_alias_set (addr, get_frame_alias_set ());
9171
9172 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
9173 }
9174
9175 /* Return true if REGNO is a global register, but not one
9176 of the special ones that need to be saved/restored in anyway. */
9177
9178 static inline bool
9179 global_not_special_regno_p (int regno)
9180 {
9181 return (global_regs[regno]
9182 /* These registers are special and need to be
9183 restored in any case. */
9184 && !(regno == STACK_POINTER_REGNUM
9185 || regno == RETURN_REGNUM
9186 || regno == BASE_REGNUM
9187 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9188 }
9189
9190 /* Generate insn to save registers FIRST to LAST into
9191 the register save area located at offset OFFSET
9192 relative to register BASE. */
9193
9194 static rtx
9195 save_gprs (rtx base, int offset, int first, int last)
9196 {
9197 rtx addr, insn, note;
9198 int i;
9199
9200 addr = plus_constant (Pmode, base, offset);
9201 addr = gen_rtx_MEM (Pmode, addr);
9202
9203 set_mem_alias_set (addr, get_frame_alias_set ());
9204
9205 /* Special-case single register. */
9206 if (first == last)
9207 {
9208 if (TARGET_64BIT)
9209 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
9210 else
9211 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
9212
9213 if (!global_not_special_regno_p (first))
9214 RTX_FRAME_RELATED_P (insn) = 1;
9215 return insn;
9216 }
9217
9218
9219 insn = gen_store_multiple (addr,
9220 gen_rtx_REG (Pmode, first),
9221 GEN_INT (last - first + 1));
9222
9223 if (first <= 6 && cfun->stdarg)
9224 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9225 {
9226 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
9227
9228 if (first + i <= 6)
9229 set_mem_alias_set (mem, get_varargs_alias_set ());
9230 }
9231
9232 /* We need to set the FRAME_RELATED flag on all SETs
9233 inside the store-multiple pattern.
9234
9235 However, we must not emit DWARF records for registers 2..5
9236 if they are stored for use by variable arguments ...
9237
9238 ??? Unfortunately, it is not enough to simply not the
9239 FRAME_RELATED flags for those SETs, because the first SET
9240 of the PARALLEL is always treated as if it had the flag
9241 set, even if it does not. Therefore we emit a new pattern
9242 without those registers as REG_FRAME_RELATED_EXPR note. */
9243
9244 if (first >= 6 && !global_not_special_regno_p (first))
9245 {
9246 rtx pat = PATTERN (insn);
9247
9248 for (i = 0; i < XVECLEN (pat, 0); i++)
9249 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
9250 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
9251 0, i)))))
9252 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
9253
9254 RTX_FRAME_RELATED_P (insn) = 1;
9255 }
9256 else if (last >= 6)
9257 {
9258 int start;
9259
9260 for (start = first >= 6 ? first : 6; start <= last; start++)
9261 if (!global_not_special_regno_p (start))
9262 break;
9263
9264 if (start > last)
9265 return insn;
9266
9267 addr = plus_constant (Pmode, base,
9268 offset + (start - first) * UNITS_PER_LONG);
9269
9270 if (start == last)
9271 {
9272 if (TARGET_64BIT)
9273 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
9274 gen_rtx_REG (Pmode, start));
9275 else
9276 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
9277 gen_rtx_REG (Pmode, start));
9278 note = PATTERN (note);
9279
9280 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9281 RTX_FRAME_RELATED_P (insn) = 1;
9282
9283 return insn;
9284 }
9285
9286 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
9287 gen_rtx_REG (Pmode, start),
9288 GEN_INT (last - start + 1));
9289 note = PATTERN (note);
9290
9291 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9292
9293 for (i = 0; i < XVECLEN (note, 0); i++)
9294 if (GET_CODE (XVECEXP (note, 0, i)) == SET
9295 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
9296 0, i)))))
9297 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
9298
9299 RTX_FRAME_RELATED_P (insn) = 1;
9300 }
9301
9302 return insn;
9303 }
9304
9305 /* Generate insn to restore registers FIRST to LAST from
9306 the register save area located at offset OFFSET
9307 relative to register BASE. */
9308
9309 static rtx
9310 restore_gprs (rtx base, int offset, int first, int last)
9311 {
9312 rtx addr, insn;
9313
9314 addr = plus_constant (Pmode, base, offset);
9315 addr = gen_rtx_MEM (Pmode, addr);
9316 set_mem_alias_set (addr, get_frame_alias_set ());
9317
9318 /* Special-case single register. */
9319 if (first == last)
9320 {
9321 if (TARGET_64BIT)
9322 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
9323 else
9324 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
9325
9326 RTX_FRAME_RELATED_P (insn) = 1;
9327 return insn;
9328 }
9329
9330 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
9331 addr,
9332 GEN_INT (last - first + 1));
9333 RTX_FRAME_RELATED_P (insn) = 1;
9334 return insn;
9335 }
9336
9337 /* Return insn sequence to load the GOT register. */
9338
9339 static GTY(()) rtx got_symbol;
9340 rtx_insn *
9341 s390_load_got (void)
9342 {
9343 rtx_insn *insns;
9344
9345 /* We cannot use pic_offset_table_rtx here since we use this
9346 function also for non-pic if __tls_get_offset is called and in
9347 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
9348 aren't usable. */
9349 rtx got_rtx = gen_rtx_REG (Pmode, 12);
9350
9351 if (!got_symbol)
9352 {
9353 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9354 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
9355 }
9356
9357 start_sequence ();
9358
9359 if (TARGET_CPU_ZARCH)
9360 {
9361 emit_move_insn (got_rtx, got_symbol);
9362 }
9363 else
9364 {
9365 rtx offset;
9366
9367 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
9368 UNSPEC_LTREL_OFFSET);
9369 offset = gen_rtx_CONST (Pmode, offset);
9370 offset = force_const_mem (Pmode, offset);
9371
9372 emit_move_insn (got_rtx, offset);
9373
9374 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
9375 UNSPEC_LTREL_BASE);
9376 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
9377
9378 emit_move_insn (got_rtx, offset);
9379 }
9380
9381 insns = get_insns ();
9382 end_sequence ();
9383 return insns;
9384 }
9385
9386 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
9387 and the change to the stack pointer. */
9388
9389 static void
9390 s390_emit_stack_tie (void)
9391 {
9392 rtx mem = gen_frame_mem (BLKmode,
9393 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
9394
9395 emit_insn (gen_stack_tie (mem));
9396 }
9397
9398 /* Copy GPRS into FPR save slots. */
9399
9400 static void
9401 s390_save_gprs_to_fprs (void)
9402 {
9403 int i;
9404
9405 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9406 return;
9407
9408 for (i = 6; i < 16; i++)
9409 {
9410 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
9411 {
9412 rtx_insn *insn =
9413 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
9414 gen_rtx_REG (DImode, i));
9415 RTX_FRAME_RELATED_P (insn) = 1;
9416 }
9417 }
9418 }
9419
9420 /* Restore GPRs from FPR save slots. */
9421
9422 static void
9423 s390_restore_gprs_from_fprs (void)
9424 {
9425 int i;
9426
9427 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9428 return;
9429
9430 for (i = 6; i < 16; i++)
9431 {
9432 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
9433 {
9434 rtx_insn *insn =
9435 emit_move_insn (gen_rtx_REG (DImode, i),
9436 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
9437 df_set_regs_ever_live (i, true);
9438 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
9439 if (i == STACK_POINTER_REGNUM)
9440 add_reg_note (insn, REG_CFA_DEF_CFA,
9441 plus_constant (Pmode, stack_pointer_rtx,
9442 STACK_POINTER_OFFSET));
9443 RTX_FRAME_RELATED_P (insn) = 1;
9444 }
9445 }
9446 }
9447
9448
9449 /* A pass run immediately before shrink-wrapping and prologue and epilogue
9450 generation. */
9451
9452 namespace {
9453
9454 const pass_data pass_data_s390_early_mach =
9455 {
9456 RTL_PASS, /* type */
9457 "early_mach", /* name */
9458 OPTGROUP_NONE, /* optinfo_flags */
9459 TV_MACH_DEP, /* tv_id */
9460 0, /* properties_required */
9461 0, /* properties_provided */
9462 0, /* properties_destroyed */
9463 0, /* todo_flags_start */
9464 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
9465 };
9466
9467 class pass_s390_early_mach : public rtl_opt_pass
9468 {
9469 public:
9470 pass_s390_early_mach (gcc::context *ctxt)
9471 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
9472 {}
9473
9474 /* opt_pass methods: */
9475 virtual unsigned int execute (function *);
9476
9477 }; // class pass_s390_early_mach
9478
9479 unsigned int
9480 pass_s390_early_mach::execute (function *fun)
9481 {
9482 rtx_insn *insn;
9483
9484 /* Try to get rid of the FPR clobbers. */
9485 s390_optimize_nonescaping_tx ();
9486
9487 /* Re-compute register info. */
9488 s390_register_info ();
9489
9490 /* If we're using a base register, ensure that it is always valid for
9491 the first non-prologue instruction. */
9492 if (fun->machine->base_reg)
9493 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
9494
9495 /* Annotate all constant pool references to let the scheduler know
9496 they implicitly use the base register. */
9497 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9498 if (INSN_P (insn))
9499 {
9500 annotate_constant_pool_refs (&PATTERN (insn));
9501 df_insn_rescan (insn);
9502 }
9503 return 0;
9504 }
9505
9506 } // anon namespace
9507
9508 /* Expand the prologue into a bunch of separate insns. */
9509
9510 void
9511 s390_emit_prologue (void)
9512 {
9513 rtx insn, addr;
9514 rtx temp_reg;
9515 int i;
9516 int offset;
9517 int next_fpr = 0;
9518
9519 /* Choose best register to use for temp use within prologue.
9520 See below for why TPF must use the register 1. */
9521
9522 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
9523 && !crtl->is_leaf
9524 && !TARGET_TPF_PROFILING)
9525 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
9526 else
9527 temp_reg = gen_rtx_REG (Pmode, 1);
9528
9529 s390_save_gprs_to_fprs ();
9530
9531 /* Save call saved gprs. */
9532 if (cfun_frame_layout.first_save_gpr != -1)
9533 {
9534 insn = save_gprs (stack_pointer_rtx,
9535 cfun_frame_layout.gprs_offset +
9536 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
9537 - cfun_frame_layout.first_save_gpr_slot),
9538 cfun_frame_layout.first_save_gpr,
9539 cfun_frame_layout.last_save_gpr);
9540 emit_insn (insn);
9541 }
9542
9543 /* Dummy insn to mark literal pool slot. */
9544
9545 if (cfun->machine->base_reg)
9546 emit_insn (gen_main_pool (cfun->machine->base_reg));
9547
9548 offset = cfun_frame_layout.f0_offset;
9549
9550 /* Save f0 and f2. */
9551 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
9552 {
9553 if (cfun_fpr_save_p (i))
9554 {
9555 save_fpr (stack_pointer_rtx, offset, i);
9556 offset += 8;
9557 }
9558 else if (!TARGET_PACKED_STACK || cfun->stdarg)
9559 offset += 8;
9560 }
9561
9562 /* Save f4 and f6. */
9563 offset = cfun_frame_layout.f4_offset;
9564 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
9565 {
9566 if (cfun_fpr_save_p (i))
9567 {
9568 insn = save_fpr (stack_pointer_rtx, offset, i);
9569 offset += 8;
9570
9571 /* If f4 and f6 are call clobbered they are saved due to
9572 stdargs and therefore are not frame related. */
9573 if (!call_really_used_regs[i])
9574 RTX_FRAME_RELATED_P (insn) = 1;
9575 }
9576 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
9577 offset += 8;
9578 }
9579
9580 if (TARGET_PACKED_STACK
9581 && cfun_save_high_fprs_p
9582 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
9583 {
9584 offset = (cfun_frame_layout.f8_offset
9585 + (cfun_frame_layout.high_fprs - 1) * 8);
9586
9587 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
9588 if (cfun_fpr_save_p (i))
9589 {
9590 insn = save_fpr (stack_pointer_rtx, offset, i);
9591
9592 RTX_FRAME_RELATED_P (insn) = 1;
9593 offset -= 8;
9594 }
9595 if (offset >= cfun_frame_layout.f8_offset)
9596 next_fpr = i;
9597 }
9598
9599 if (!TARGET_PACKED_STACK)
9600 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
9601
9602 if (flag_stack_usage_info)
9603 current_function_static_stack_size = cfun_frame_layout.frame_size;
9604
9605 /* Decrement stack pointer. */
9606
9607 if (cfun_frame_layout.frame_size > 0)
9608 {
9609 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
9610 rtx real_frame_off;
9611
9612 if (s390_stack_size)
9613 {
9614 HOST_WIDE_INT stack_guard;
9615
9616 if (s390_stack_guard)
9617 stack_guard = s390_stack_guard;
9618 else
9619 {
9620 /* If no value for stack guard is provided the smallest power of 2
9621 larger than the current frame size is chosen. */
9622 stack_guard = 1;
9623 while (stack_guard < cfun_frame_layout.frame_size)
9624 stack_guard <<= 1;
9625 }
9626
9627 if (cfun_frame_layout.frame_size >= s390_stack_size)
9628 {
9629 warning (0, "frame size of function %qs is %wd"
9630 " bytes exceeding user provided stack limit of "
9631 "%d bytes. "
9632 "An unconditional trap is added.",
9633 current_function_name(), cfun_frame_layout.frame_size,
9634 s390_stack_size);
9635 emit_insn (gen_trap ());
9636 }
9637 else
9638 {
9639 /* stack_guard has to be smaller than s390_stack_size.
9640 Otherwise we would emit an AND with zero which would
9641 not match the test under mask pattern. */
9642 if (stack_guard >= s390_stack_size)
9643 {
9644 warning (0, "frame size of function %qs is %wd"
9645 " bytes which is more than half the stack size. "
9646 "The dynamic check would not be reliable. "
9647 "No check emitted for this function.",
9648 current_function_name(),
9649 cfun_frame_layout.frame_size);
9650 }
9651 else
9652 {
9653 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
9654 & ~(stack_guard - 1));
9655
9656 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
9657 GEN_INT (stack_check_mask));
9658 if (TARGET_64BIT)
9659 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
9660 t, const0_rtx),
9661 t, const0_rtx, const0_rtx));
9662 else
9663 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
9664 t, const0_rtx),
9665 t, const0_rtx, const0_rtx));
9666 }
9667 }
9668 }
9669
9670 if (s390_warn_framesize > 0
9671 && cfun_frame_layout.frame_size >= s390_warn_framesize)
9672 warning (0, "frame size of %qs is %wd bytes",
9673 current_function_name (), cfun_frame_layout.frame_size);
9674
9675 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
9676 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
9677
9678 /* Save incoming stack pointer into temp reg. */
9679 if (TARGET_BACKCHAIN || next_fpr)
9680 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
9681
9682 /* Subtract frame size from stack pointer. */
9683
9684 if (DISP_IN_RANGE (INTVAL (frame_off)))
9685 {
9686 insn = gen_rtx_SET (stack_pointer_rtx,
9687 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9688 frame_off));
9689 insn = emit_insn (insn);
9690 }
9691 else
9692 {
9693 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
9694 frame_off = force_const_mem (Pmode, frame_off);
9695
9696 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
9697 annotate_constant_pool_refs (&PATTERN (insn));
9698 }
9699
9700 RTX_FRAME_RELATED_P (insn) = 1;
9701 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
9702 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9703 gen_rtx_SET (stack_pointer_rtx,
9704 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9705 real_frame_off)));
9706
9707 /* Set backchain. */
9708
9709 if (TARGET_BACKCHAIN)
9710 {
9711 if (cfun_frame_layout.backchain_offset)
9712 addr = gen_rtx_MEM (Pmode,
9713 plus_constant (Pmode, stack_pointer_rtx,
9714 cfun_frame_layout.backchain_offset));
9715 else
9716 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9717 set_mem_alias_set (addr, get_frame_alias_set ());
9718 insn = emit_insn (gen_move_insn (addr, temp_reg));
9719 }
9720
9721 /* If we support non-call exceptions (e.g. for Java),
9722 we need to make sure the backchain pointer is set up
9723 before any possibly trapping memory access. */
9724 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
9725 {
9726 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9727 emit_clobber (addr);
9728 }
9729 }
9730
9731 /* Save fprs 8 - 15 (64 bit ABI). */
9732
9733 if (cfun_save_high_fprs_p && next_fpr)
9734 {
9735 /* If the stack might be accessed through a different register
9736 we have to make sure that the stack pointer decrement is not
9737 moved below the use of the stack slots. */
9738 s390_emit_stack_tie ();
9739
9740 insn = emit_insn (gen_add2_insn (temp_reg,
9741 GEN_INT (cfun_frame_layout.f8_offset)));
9742
9743 offset = 0;
9744
9745 for (i = FPR8_REGNUM; i <= next_fpr; i++)
9746 if (cfun_fpr_save_p (i))
9747 {
9748 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
9749 cfun_frame_layout.frame_size
9750 + cfun_frame_layout.f8_offset
9751 + offset);
9752
9753 insn = save_fpr (temp_reg, offset, i);
9754 offset += 8;
9755 RTX_FRAME_RELATED_P (insn) = 1;
9756 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9757 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
9758 gen_rtx_REG (DFmode, i)));
9759 }
9760 }
9761
9762 /* Set frame pointer, if needed. */
9763
9764 if (frame_pointer_needed)
9765 {
9766 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9767 RTX_FRAME_RELATED_P (insn) = 1;
9768 }
9769
9770 /* Set up got pointer, if needed. */
9771
9772 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9773 {
9774 rtx_insn *insns = s390_load_got ();
9775
9776 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
9777 annotate_constant_pool_refs (&PATTERN (insn));
9778
9779 emit_insn (insns);
9780 }
9781
9782 if (TARGET_TPF_PROFILING)
9783 {
9784 /* Generate a BAS instruction to serve as a function
9785 entry intercept to facilitate the use of tracing
9786 algorithms located at the branch target. */
9787 emit_insn (gen_prologue_tpf ());
9788
9789 /* Emit a blockage here so that all code
9790 lies between the profiling mechanisms. */
9791 emit_insn (gen_blockage ());
9792 }
9793 }
9794
9795 /* Expand the epilogue into a bunch of separate insns. */
9796
9797 void
9798 s390_emit_epilogue (bool sibcall)
9799 {
9800 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
9801 int area_bottom, area_top, offset = 0;
9802 int next_offset;
9803 rtvec p;
9804 int i;
9805
9806 if (TARGET_TPF_PROFILING)
9807 {
9808
9809 /* Generate a BAS instruction to serve as a function
9810 entry intercept to facilitate the use of tracing
9811 algorithms located at the branch target. */
9812
9813 /* Emit a blockage here so that all code
9814 lies between the profiling mechanisms. */
9815 emit_insn (gen_blockage ());
9816
9817 emit_insn (gen_epilogue_tpf ());
9818 }
9819
9820 /* Check whether to use frame or stack pointer for restore. */
9821
9822 frame_pointer = (frame_pointer_needed
9823 ? hard_frame_pointer_rtx : stack_pointer_rtx);
9824
9825 s390_frame_area (&area_bottom, &area_top);
9826
9827 /* Check whether we can access the register save area.
9828 If not, increment the frame pointer as required. */
9829
9830 if (area_top <= area_bottom)
9831 {
9832 /* Nothing to restore. */
9833 }
9834 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
9835 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
9836 {
9837 /* Area is in range. */
9838 offset = cfun_frame_layout.frame_size;
9839 }
9840 else
9841 {
9842 rtx insn, frame_off, cfa;
9843
9844 offset = area_bottom < 0 ? -area_bottom : 0;
9845 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
9846
9847 cfa = gen_rtx_SET (frame_pointer,
9848 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
9849 if (DISP_IN_RANGE (INTVAL (frame_off)))
9850 {
9851 insn = gen_rtx_SET (frame_pointer,
9852 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
9853 insn = emit_insn (insn);
9854 }
9855 else
9856 {
9857 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
9858 frame_off = force_const_mem (Pmode, frame_off);
9859
9860 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
9861 annotate_constant_pool_refs (&PATTERN (insn));
9862 }
9863 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
9864 RTX_FRAME_RELATED_P (insn) = 1;
9865 }
9866
9867 /* Restore call saved fprs. */
9868
9869 if (TARGET_64BIT)
9870 {
9871 if (cfun_save_high_fprs_p)
9872 {
9873 next_offset = cfun_frame_layout.f8_offset;
9874 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
9875 {
9876 if (cfun_fpr_save_p (i))
9877 {
9878 restore_fpr (frame_pointer,
9879 offset + next_offset, i);
9880 cfa_restores
9881 = alloc_reg_note (REG_CFA_RESTORE,
9882 gen_rtx_REG (DFmode, i), cfa_restores);
9883 next_offset += 8;
9884 }
9885 }
9886 }
9887
9888 }
9889 else
9890 {
9891 next_offset = cfun_frame_layout.f4_offset;
9892 /* f4, f6 */
9893 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
9894 {
9895 if (cfun_fpr_save_p (i))
9896 {
9897 restore_fpr (frame_pointer,
9898 offset + next_offset, i);
9899 cfa_restores
9900 = alloc_reg_note (REG_CFA_RESTORE,
9901 gen_rtx_REG (DFmode, i), cfa_restores);
9902 next_offset += 8;
9903 }
9904 else if (!TARGET_PACKED_STACK)
9905 next_offset += 8;
9906 }
9907
9908 }
9909
9910 /* Return register. */
9911
9912 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
9913
9914 /* Restore call saved gprs. */
9915
9916 if (cfun_frame_layout.first_restore_gpr != -1)
9917 {
9918 rtx insn, addr;
9919 int i;
9920
9921 /* Check for global register and save them
9922 to stack location from where they get restored. */
9923
9924 for (i = cfun_frame_layout.first_restore_gpr;
9925 i <= cfun_frame_layout.last_restore_gpr;
9926 i++)
9927 {
9928 if (global_not_special_regno_p (i))
9929 {
9930 addr = plus_constant (Pmode, frame_pointer,
9931 offset + cfun_frame_layout.gprs_offset
9932 + (i - cfun_frame_layout.first_save_gpr_slot)
9933 * UNITS_PER_LONG);
9934 addr = gen_rtx_MEM (Pmode, addr);
9935 set_mem_alias_set (addr, get_frame_alias_set ());
9936 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
9937 }
9938 else
9939 cfa_restores
9940 = alloc_reg_note (REG_CFA_RESTORE,
9941 gen_rtx_REG (Pmode, i), cfa_restores);
9942 }
9943
9944 if (! sibcall)
9945 {
9946 /* Fetch return address from stack before load multiple,
9947 this will do good for scheduling.
9948
9949 Only do this if we already decided that r14 needs to be
9950 saved to a stack slot. (And not just because r14 happens to
9951 be in between two GPRs which need saving.) Otherwise it
9952 would be difficult to take that decision back in
9953 s390_optimize_prologue. */
9954 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
9955 {
9956 int return_regnum = find_unused_clobbered_reg();
9957 if (!return_regnum)
9958 return_regnum = 4;
9959 return_reg = gen_rtx_REG (Pmode, return_regnum);
9960
9961 addr = plus_constant (Pmode, frame_pointer,
9962 offset + cfun_frame_layout.gprs_offset
9963 + (RETURN_REGNUM
9964 - cfun_frame_layout.first_save_gpr_slot)
9965 * UNITS_PER_LONG);
9966 addr = gen_rtx_MEM (Pmode, addr);
9967 set_mem_alias_set (addr, get_frame_alias_set ());
9968 emit_move_insn (return_reg, addr);
9969
9970 /* Once we did that optimization we have to make sure
9971 s390_optimize_prologue does not try to remove the
9972 store of r14 since we will not be able to find the
9973 load issued here. */
9974 cfun_frame_layout.save_return_addr_p = true;
9975 }
9976 }
9977
9978 insn = restore_gprs (frame_pointer,
9979 offset + cfun_frame_layout.gprs_offset
9980 + (cfun_frame_layout.first_restore_gpr
9981 - cfun_frame_layout.first_save_gpr_slot)
9982 * UNITS_PER_LONG,
9983 cfun_frame_layout.first_restore_gpr,
9984 cfun_frame_layout.last_restore_gpr);
9985 insn = emit_insn (insn);
9986 REG_NOTES (insn) = cfa_restores;
9987 add_reg_note (insn, REG_CFA_DEF_CFA,
9988 plus_constant (Pmode, stack_pointer_rtx,
9989 STACK_POINTER_OFFSET));
9990 RTX_FRAME_RELATED_P (insn) = 1;
9991 }
9992
9993 s390_restore_gprs_from_fprs ();
9994
9995 if (! sibcall)
9996 {
9997
9998 /* Return to caller. */
9999
10000 p = rtvec_alloc (2);
10001
10002 RTVEC_ELT (p, 0) = ret_rtx;
10003 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10004 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10005 }
10006 }
10007
10008 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
10009
10010 static void
10011 s300_set_up_by_prologue (hard_reg_set_container *regs)
10012 {
10013 if (cfun->machine->base_reg
10014 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10015 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10016 }
10017
10018 /* Return true if the function can use simple_return to return outside
10019 of a shrink-wrapped region. At present shrink-wrapping is supported
10020 in all cases. */
10021
10022 bool
10023 s390_can_use_simple_return_insn (void)
10024 {
10025 return true;
10026 }
10027
10028 /* Return true if the epilogue is guaranteed to contain only a return
10029 instruction and if a direct return can therefore be used instead.
10030 One of the main advantages of using direct return instructions
10031 is that we can then use conditional returns. */
10032
10033 bool
10034 s390_can_use_return_insn (void)
10035 {
10036 int i;
10037
10038 if (!reload_completed)
10039 return false;
10040
10041 if (crtl->profile)
10042 return false;
10043
10044 if (TARGET_TPF_PROFILING)
10045 return false;
10046
10047 for (i = 0; i < 16; i++)
10048 if (cfun_gpr_save_slot (i))
10049 return false;
10050
10051 /* For 31 bit this is not covered by the frame_size check below
10052 since f4, f6 are saved in the register save area without needing
10053 additional stack space. */
10054 if (!TARGET_64BIT
10055 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
10056 return false;
10057
10058 if (cfun->machine->base_reg
10059 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10060 return false;
10061
10062 return cfun_frame_layout.frame_size == 0;
10063 }
10064
10065 /* The VX ABI differs for vararg functions. Therefore we need the
10066 prototype of the callee to be available when passing vector type
10067 values. */
10068 static const char *
10069 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
10070 {
10071 return ((TARGET_VX_ABI
10072 && typelist == 0
10073 && VECTOR_TYPE_P (TREE_TYPE (val))
10074 && (funcdecl == NULL_TREE
10075 || (TREE_CODE (funcdecl) == FUNCTION_DECL
10076 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
10077 ? N_("Vector argument passed to unprototyped function")
10078 : NULL);
10079 }
10080
10081
10082 /* Return the size in bytes of a function argument of
10083 type TYPE and/or mode MODE. At least one of TYPE or
10084 MODE must be specified. */
10085
10086 static int
10087 s390_function_arg_size (machine_mode mode, const_tree type)
10088 {
10089 if (type)
10090 return int_size_in_bytes (type);
10091
10092 /* No type info available for some library calls ... */
10093 if (mode != BLKmode)
10094 return GET_MODE_SIZE (mode);
10095
10096 /* If we have neither type nor mode, abort */
10097 gcc_unreachable ();
10098 }
10099
10100 /* Return true if a function argument of type TYPE and mode MODE
10101 is to be passed in a vector register, if available. */
10102
10103 bool
10104 s390_function_arg_vector (machine_mode mode, const_tree type)
10105 {
10106 if (!TARGET_VX_ABI)
10107 return false;
10108
10109 if (s390_function_arg_size (mode, type) > 16)
10110 return false;
10111
10112 /* No type info available for some library calls ... */
10113 if (!type)
10114 return VECTOR_MODE_P (mode);
10115
10116 /* The ABI says that record types with a single member are treated
10117 just like that member would be. */
10118 while (TREE_CODE (type) == RECORD_TYPE)
10119 {
10120 tree field, single = NULL_TREE;
10121
10122 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10123 {
10124 if (TREE_CODE (field) != FIELD_DECL)
10125 continue;
10126
10127 if (single == NULL_TREE)
10128 single = TREE_TYPE (field);
10129 else
10130 return false;
10131 }
10132
10133 if (single == NULL_TREE)
10134 return false;
10135 else
10136 {
10137 /* If the field declaration adds extra byte due to
10138 e.g. padding this is not accepted as vector type. */
10139 if (int_size_in_bytes (single) <= 0
10140 || int_size_in_bytes (single) != int_size_in_bytes (type))
10141 return false;
10142 type = single;
10143 }
10144 }
10145
10146 return VECTOR_TYPE_P (type);
10147 }
10148
10149 /* Return true if a function argument of type TYPE and mode MODE
10150 is to be passed in a floating-point register, if available. */
10151
10152 static bool
10153 s390_function_arg_float (machine_mode mode, const_tree type)
10154 {
10155 if (s390_function_arg_size (mode, type) > 8)
10156 return false;
10157
10158 /* Soft-float changes the ABI: no floating-point registers are used. */
10159 if (TARGET_SOFT_FLOAT)
10160 return false;
10161
10162 /* No type info available for some library calls ... */
10163 if (!type)
10164 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
10165
10166 /* The ABI says that record types with a single member are treated
10167 just like that member would be. */
10168 while (TREE_CODE (type) == RECORD_TYPE)
10169 {
10170 tree field, single = NULL_TREE;
10171
10172 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10173 {
10174 if (TREE_CODE (field) != FIELD_DECL)
10175 continue;
10176
10177 if (single == NULL_TREE)
10178 single = TREE_TYPE (field);
10179 else
10180 return false;
10181 }
10182
10183 if (single == NULL_TREE)
10184 return false;
10185 else
10186 type = single;
10187 }
10188
10189 return TREE_CODE (type) == REAL_TYPE;
10190 }
10191
10192 /* Return true if a function argument of type TYPE and mode MODE
10193 is to be passed in an integer register, or a pair of integer
10194 registers, if available. */
10195
10196 static bool
10197 s390_function_arg_integer (machine_mode mode, const_tree type)
10198 {
10199 int size = s390_function_arg_size (mode, type);
10200 if (size > 8)
10201 return false;
10202
10203 /* No type info available for some library calls ... */
10204 if (!type)
10205 return GET_MODE_CLASS (mode) == MODE_INT
10206 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
10207
10208 /* We accept small integral (and similar) types. */
10209 if (INTEGRAL_TYPE_P (type)
10210 || POINTER_TYPE_P (type)
10211 || TREE_CODE (type) == NULLPTR_TYPE
10212 || TREE_CODE (type) == OFFSET_TYPE
10213 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
10214 return true;
10215
10216 /* We also accept structs of size 1, 2, 4, 8 that are not
10217 passed in floating-point registers. */
10218 if (AGGREGATE_TYPE_P (type)
10219 && exact_log2 (size) >= 0
10220 && !s390_function_arg_float (mode, type))
10221 return true;
10222
10223 return false;
10224 }
10225
10226 /* Return 1 if a function argument of type TYPE and mode MODE
10227 is to be passed by reference. The ABI specifies that only
10228 structures of size 1, 2, 4, or 8 bytes are passed by value,
10229 all other structures (and complex numbers) are passed by
10230 reference. */
10231
10232 static bool
10233 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
10234 machine_mode mode, const_tree type,
10235 bool named ATTRIBUTE_UNUSED)
10236 {
10237 int size = s390_function_arg_size (mode, type);
10238
10239 if (s390_function_arg_vector (mode, type))
10240 return false;
10241
10242 if (size > 8)
10243 return true;
10244
10245 if (type)
10246 {
10247 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
10248 return true;
10249
10250 if (TREE_CODE (type) == COMPLEX_TYPE
10251 || TREE_CODE (type) == VECTOR_TYPE)
10252 return true;
10253 }
10254
10255 return false;
10256 }
10257
10258 /* Update the data in CUM to advance over an argument of mode MODE and
10259 data type TYPE. (TYPE is null for libcalls where that information
10260 may not be available.). The boolean NAMED specifies whether the
10261 argument is a named argument (as opposed to an unnamed argument
10262 matching an ellipsis). */
10263
10264 static void
10265 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
10266 const_tree type, bool named)
10267 {
10268 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10269
10270 if (s390_function_arg_vector (mode, type))
10271 {
10272 /* We are called for unnamed vector stdarg arguments which are
10273 passed on the stack. In this case this hook does not have to
10274 do anything since stack arguments are tracked by common
10275 code. */
10276 if (!named)
10277 return;
10278 cum->vrs += 1;
10279 }
10280 else if (s390_function_arg_float (mode, type))
10281 {
10282 cum->fprs += 1;
10283 }
10284 else if (s390_function_arg_integer (mode, type))
10285 {
10286 int size = s390_function_arg_size (mode, type);
10287 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
10288 }
10289 else
10290 gcc_unreachable ();
10291 }
10292
10293 /* Define where to put the arguments to a function.
10294 Value is zero to push the argument on the stack,
10295 or a hard register in which to store the argument.
10296
10297 MODE is the argument's machine mode.
10298 TYPE is the data type of the argument (as a tree).
10299 This is null for libcalls where that information may
10300 not be available.
10301 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10302 the preceding args and about the function being called.
10303 NAMED is nonzero if this argument is a named parameter
10304 (otherwise it is an extra parameter matching an ellipsis).
10305
10306 On S/390, we use general purpose registers 2 through 6 to
10307 pass integer, pointer, and certain structure arguments, and
10308 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
10309 to pass floating point arguments. All remaining arguments
10310 are pushed to the stack. */
10311
10312 static rtx
10313 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
10314 const_tree type, bool named)
10315 {
10316 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10317
10318
10319 if (s390_function_arg_vector (mode, type))
10320 {
10321 /* Vector arguments being part of the ellipsis are passed on the
10322 stack. */
10323 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
10324 return NULL_RTX;
10325
10326 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
10327 }
10328 else if (s390_function_arg_float (mode, type))
10329 {
10330 if (cum->fprs + 1 > FP_ARG_NUM_REG)
10331 return NULL_RTX;
10332 else
10333 return gen_rtx_REG (mode, cum->fprs + 16);
10334 }
10335 else if (s390_function_arg_integer (mode, type))
10336 {
10337 int size = s390_function_arg_size (mode, type);
10338 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
10339
10340 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
10341 return NULL_RTX;
10342 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
10343 return gen_rtx_REG (mode, cum->gprs + 2);
10344 else if (n_gprs == 2)
10345 {
10346 rtvec p = rtvec_alloc (2);
10347
10348 RTVEC_ELT (p, 0)
10349 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
10350 const0_rtx);
10351 RTVEC_ELT (p, 1)
10352 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
10353 GEN_INT (4));
10354
10355 return gen_rtx_PARALLEL (mode, p);
10356 }
10357 }
10358
10359 /* After the real arguments, expand_call calls us once again
10360 with a void_type_node type. Whatever we return here is
10361 passed as operand 2 to the call expanders.
10362
10363 We don't need this feature ... */
10364 else if (type == void_type_node)
10365 return const0_rtx;
10366
10367 gcc_unreachable ();
10368 }
10369
10370 /* Return true if return values of type TYPE should be returned
10371 in a memory buffer whose address is passed by the caller as
10372 hidden first argument. */
10373
10374 static bool
10375 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
10376 {
10377 /* We accept small integral (and similar) types. */
10378 if (INTEGRAL_TYPE_P (type)
10379 || POINTER_TYPE_P (type)
10380 || TREE_CODE (type) == OFFSET_TYPE
10381 || TREE_CODE (type) == REAL_TYPE)
10382 return int_size_in_bytes (type) > 8;
10383
10384 /* vector types which fit into a VR. */
10385 if (TARGET_VX_ABI
10386 && VECTOR_TYPE_P (type)
10387 && int_size_in_bytes (type) <= 16)
10388 return false;
10389
10390 /* Aggregates and similar constructs are always returned
10391 in memory. */
10392 if (AGGREGATE_TYPE_P (type)
10393 || TREE_CODE (type) == COMPLEX_TYPE
10394 || VECTOR_TYPE_P (type))
10395 return true;
10396
10397 /* ??? We get called on all sorts of random stuff from
10398 aggregate_value_p. We can't abort, but it's not clear
10399 what's safe to return. Pretend it's a struct I guess. */
10400 return true;
10401 }
10402
10403 /* Function arguments and return values are promoted to word size. */
10404
10405 static machine_mode
10406 s390_promote_function_mode (const_tree type, machine_mode mode,
10407 int *punsignedp,
10408 const_tree fntype ATTRIBUTE_UNUSED,
10409 int for_return ATTRIBUTE_UNUSED)
10410 {
10411 if (INTEGRAL_MODE_P (mode)
10412 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
10413 {
10414 if (type != NULL_TREE && POINTER_TYPE_P (type))
10415 *punsignedp = POINTERS_EXTEND_UNSIGNED;
10416 return Pmode;
10417 }
10418
10419 return mode;
10420 }
10421
10422 /* Define where to return a (scalar) value of type RET_TYPE.
10423 If RET_TYPE is null, define where to return a (scalar)
10424 value of mode MODE from a libcall. */
10425
10426 static rtx
10427 s390_function_and_libcall_value (machine_mode mode,
10428 const_tree ret_type,
10429 const_tree fntype_or_decl,
10430 bool outgoing ATTRIBUTE_UNUSED)
10431 {
10432 /* For vector return types it is important to use the RET_TYPE
10433 argument whenever available since the middle-end might have
10434 changed the mode to a scalar mode. */
10435 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
10436 || (!ret_type && VECTOR_MODE_P (mode)));
10437
10438 /* For normal functions perform the promotion as
10439 promote_function_mode would do. */
10440 if (ret_type)
10441 {
10442 int unsignedp = TYPE_UNSIGNED (ret_type);
10443 mode = promote_function_mode (ret_type, mode, &unsignedp,
10444 fntype_or_decl, 1);
10445 }
10446
10447 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
10448 || SCALAR_FLOAT_MODE_P (mode)
10449 || (TARGET_VX_ABI && vector_ret_type_p));
10450 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
10451
10452 if (TARGET_VX_ABI && vector_ret_type_p)
10453 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
10454 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
10455 return gen_rtx_REG (mode, 16);
10456 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
10457 || UNITS_PER_LONG == UNITS_PER_WORD)
10458 return gen_rtx_REG (mode, 2);
10459 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
10460 {
10461 /* This case is triggered when returning a 64 bit value with
10462 -m31 -mzarch. Although the value would fit into a single
10463 register it has to be forced into a 32 bit register pair in
10464 order to match the ABI. */
10465 rtvec p = rtvec_alloc (2);
10466
10467 RTVEC_ELT (p, 0)
10468 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
10469 RTVEC_ELT (p, 1)
10470 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
10471
10472 return gen_rtx_PARALLEL (mode, p);
10473 }
10474
10475 gcc_unreachable ();
10476 }
10477
10478 /* Define where to return a scalar return value of type RET_TYPE. */
10479
10480 static rtx
10481 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
10482 bool outgoing)
10483 {
10484 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
10485 fn_decl_or_type, outgoing);
10486 }
10487
10488 /* Define where to return a scalar libcall return value of mode
10489 MODE. */
10490
10491 static rtx
10492 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
10493 {
10494 return s390_function_and_libcall_value (mode, NULL_TREE,
10495 NULL_TREE, true);
10496 }
10497
10498
10499 /* Create and return the va_list datatype.
10500
10501 On S/390, va_list is an array type equivalent to
10502
10503 typedef struct __va_list_tag
10504 {
10505 long __gpr;
10506 long __fpr;
10507 void *__overflow_arg_area;
10508 void *__reg_save_area;
10509 } va_list[1];
10510
10511 where __gpr and __fpr hold the number of general purpose
10512 or floating point arguments used up to now, respectively,
10513 __overflow_arg_area points to the stack location of the
10514 next argument passed on the stack, and __reg_save_area
10515 always points to the start of the register area in the
10516 call frame of the current function. The function prologue
10517 saves all registers used for argument passing into this
10518 area if the function uses variable arguments. */
10519
10520 static tree
10521 s390_build_builtin_va_list (void)
10522 {
10523 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
10524
10525 record = lang_hooks.types.make_type (RECORD_TYPE);
10526
10527 type_decl =
10528 build_decl (BUILTINS_LOCATION,
10529 TYPE_DECL, get_identifier ("__va_list_tag"), record);
10530
10531 f_gpr = build_decl (BUILTINS_LOCATION,
10532 FIELD_DECL, get_identifier ("__gpr"),
10533 long_integer_type_node);
10534 f_fpr = build_decl (BUILTINS_LOCATION,
10535 FIELD_DECL, get_identifier ("__fpr"),
10536 long_integer_type_node);
10537 f_ovf = build_decl (BUILTINS_LOCATION,
10538 FIELD_DECL, get_identifier ("__overflow_arg_area"),
10539 ptr_type_node);
10540 f_sav = build_decl (BUILTINS_LOCATION,
10541 FIELD_DECL, get_identifier ("__reg_save_area"),
10542 ptr_type_node);
10543
10544 va_list_gpr_counter_field = f_gpr;
10545 va_list_fpr_counter_field = f_fpr;
10546
10547 DECL_FIELD_CONTEXT (f_gpr) = record;
10548 DECL_FIELD_CONTEXT (f_fpr) = record;
10549 DECL_FIELD_CONTEXT (f_ovf) = record;
10550 DECL_FIELD_CONTEXT (f_sav) = record;
10551
10552 TYPE_STUB_DECL (record) = type_decl;
10553 TYPE_NAME (record) = type_decl;
10554 TYPE_FIELDS (record) = f_gpr;
10555 DECL_CHAIN (f_gpr) = f_fpr;
10556 DECL_CHAIN (f_fpr) = f_ovf;
10557 DECL_CHAIN (f_ovf) = f_sav;
10558
10559 layout_type (record);
10560
10561 /* The correct type is an array type of one element. */
10562 return build_array_type (record, build_index_type (size_zero_node));
10563 }
10564
10565 /* Implement va_start by filling the va_list structure VALIST.
10566 STDARG_P is always true, and ignored.
10567 NEXTARG points to the first anonymous stack argument.
10568
10569 The following global variables are used to initialize
10570 the va_list structure:
10571
10572 crtl->args.info:
10573 holds number of gprs and fprs used for named arguments.
10574 crtl->args.arg_offset_rtx:
10575 holds the offset of the first anonymous stack argument
10576 (relative to the virtual arg pointer). */
10577
10578 static void
10579 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
10580 {
10581 HOST_WIDE_INT n_gpr, n_fpr;
10582 int off;
10583 tree f_gpr, f_fpr, f_ovf, f_sav;
10584 tree gpr, fpr, ovf, sav, t;
10585
10586 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
10587 f_fpr = DECL_CHAIN (f_gpr);
10588 f_ovf = DECL_CHAIN (f_fpr);
10589 f_sav = DECL_CHAIN (f_ovf);
10590
10591 valist = build_simple_mem_ref (valist);
10592 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
10593 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10594 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10595 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10596
10597 /* Count number of gp and fp argument registers used. */
10598
10599 n_gpr = crtl->args.info.gprs;
10600 n_fpr = crtl->args.info.fprs;
10601
10602 if (cfun->va_list_gpr_size)
10603 {
10604 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
10605 build_int_cst (NULL_TREE, n_gpr));
10606 TREE_SIDE_EFFECTS (t) = 1;
10607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10608 }
10609
10610 if (cfun->va_list_fpr_size)
10611 {
10612 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
10613 build_int_cst (NULL_TREE, n_fpr));
10614 TREE_SIDE_EFFECTS (t) = 1;
10615 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10616 }
10617
10618 /* Find the overflow area.
10619 FIXME: This currently is too pessimistic when the vector ABI is
10620 enabled. In that case we *always* set up the overflow area
10621 pointer. */
10622 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
10623 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
10624 || TARGET_VX_ABI)
10625 {
10626 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
10627
10628 off = INTVAL (crtl->args.arg_offset_rtx);
10629 off = off < 0 ? 0 : off;
10630 if (TARGET_DEBUG_ARG)
10631 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
10632 (int)n_gpr, (int)n_fpr, off);
10633
10634 t = fold_build_pointer_plus_hwi (t, off);
10635
10636 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
10637 TREE_SIDE_EFFECTS (t) = 1;
10638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10639 }
10640
10641 /* Find the register save area. */
10642 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
10643 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
10644 {
10645 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
10646 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
10647
10648 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
10649 TREE_SIDE_EFFECTS (t) = 1;
10650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10651 }
10652 }
10653
10654 /* Implement va_arg by updating the va_list structure
10655 VALIST as required to retrieve an argument of type
10656 TYPE, and returning that argument.
10657
10658 Generates code equivalent to:
10659
10660 if (integral value) {
10661 if (size <= 4 && args.gpr < 5 ||
10662 size > 4 && args.gpr < 4 )
10663 ret = args.reg_save_area[args.gpr+8]
10664 else
10665 ret = *args.overflow_arg_area++;
10666 } else if (vector value) {
10667 ret = *args.overflow_arg_area;
10668 args.overflow_arg_area += size / 8;
10669 } else if (float value) {
10670 if (args.fgpr < 2)
10671 ret = args.reg_save_area[args.fpr+64]
10672 else
10673 ret = *args.overflow_arg_area++;
10674 } else if (aggregate value) {
10675 if (args.gpr < 5)
10676 ret = *args.reg_save_area[args.gpr]
10677 else
10678 ret = **args.overflow_arg_area++;
10679 } */
10680
10681 static tree
10682 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10683 gimple_seq *post_p ATTRIBUTE_UNUSED)
10684 {
10685 tree f_gpr, f_fpr, f_ovf, f_sav;
10686 tree gpr, fpr, ovf, sav, reg, t, u;
10687 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
10688 tree lab_false, lab_over;
10689 tree addr = create_tmp_var (ptr_type_node, "addr");
10690 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
10691 a stack slot. */
10692
10693 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
10694 f_fpr = DECL_CHAIN (f_gpr);
10695 f_ovf = DECL_CHAIN (f_fpr);
10696 f_sav = DECL_CHAIN (f_ovf);
10697
10698 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
10699 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
10700 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
10701
10702 /* The tree for args* cannot be shared between gpr/fpr and ovf since
10703 both appear on a lhs. */
10704 valist = unshare_expr (valist);
10705 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
10706
10707 size = int_size_in_bytes (type);
10708
10709 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
10710 {
10711 if (TARGET_DEBUG_ARG)
10712 {
10713 fprintf (stderr, "va_arg: aggregate type");
10714 debug_tree (type);
10715 }
10716
10717 /* Aggregates are passed by reference. */
10718 indirect_p = 1;
10719 reg = gpr;
10720 n_reg = 1;
10721
10722 /* kernel stack layout on 31 bit: It is assumed here that no padding
10723 will be added by s390_frame_info because for va_args always an even
10724 number of gprs has to be saved r15-r2 = 14 regs. */
10725 sav_ofs = 2 * UNITS_PER_LONG;
10726 sav_scale = UNITS_PER_LONG;
10727 size = UNITS_PER_LONG;
10728 max_reg = GP_ARG_NUM_REG - n_reg;
10729 left_align_p = false;
10730 }
10731 else if (s390_function_arg_vector (TYPE_MODE (type), type))
10732 {
10733 if (TARGET_DEBUG_ARG)
10734 {
10735 fprintf (stderr, "va_arg: vector type");
10736 debug_tree (type);
10737 }
10738
10739 indirect_p = 0;
10740 reg = NULL_TREE;
10741 n_reg = 0;
10742 sav_ofs = 0;
10743 sav_scale = 8;
10744 max_reg = 0;
10745 left_align_p = true;
10746 }
10747 else if (s390_function_arg_float (TYPE_MODE (type), type))
10748 {
10749 if (TARGET_DEBUG_ARG)
10750 {
10751 fprintf (stderr, "va_arg: float type");
10752 debug_tree (type);
10753 }
10754
10755 /* FP args go in FP registers, if present. */
10756 indirect_p = 0;
10757 reg = fpr;
10758 n_reg = 1;
10759 sav_ofs = 16 * UNITS_PER_LONG;
10760 sav_scale = 8;
10761 max_reg = FP_ARG_NUM_REG - n_reg;
10762 left_align_p = false;
10763 }
10764 else
10765 {
10766 if (TARGET_DEBUG_ARG)
10767 {
10768 fprintf (stderr, "va_arg: other type");
10769 debug_tree (type);
10770 }
10771
10772 /* Otherwise into GP registers. */
10773 indirect_p = 0;
10774 reg = gpr;
10775 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
10776
10777 /* kernel stack layout on 31 bit: It is assumed here that no padding
10778 will be added by s390_frame_info because for va_args always an even
10779 number of gprs has to be saved r15-r2 = 14 regs. */
10780 sav_ofs = 2 * UNITS_PER_LONG;
10781
10782 if (size < UNITS_PER_LONG)
10783 sav_ofs += UNITS_PER_LONG - size;
10784
10785 sav_scale = UNITS_PER_LONG;
10786 max_reg = GP_ARG_NUM_REG - n_reg;
10787 left_align_p = false;
10788 }
10789
10790 /* Pull the value out of the saved registers ... */
10791
10792 if (reg != NULL_TREE)
10793 {
10794 /*
10795 if (reg > ((typeof (reg))max_reg))
10796 goto lab_false;
10797
10798 addr = sav + sav_ofs + reg * save_scale;
10799
10800 goto lab_over;
10801
10802 lab_false:
10803 */
10804
10805 lab_false = create_artificial_label (UNKNOWN_LOCATION);
10806 lab_over = create_artificial_label (UNKNOWN_LOCATION);
10807
10808 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
10809 t = build2 (GT_EXPR, boolean_type_node, reg, t);
10810 u = build1 (GOTO_EXPR, void_type_node, lab_false);
10811 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
10812 gimplify_and_add (t, pre_p);
10813
10814 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
10815 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
10816 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
10817 t = fold_build_pointer_plus (t, u);
10818
10819 gimplify_assign (addr, t, pre_p);
10820
10821 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10822
10823 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
10824 }
10825
10826 /* ... Otherwise out of the overflow area. */
10827
10828 t = ovf;
10829 if (size < UNITS_PER_LONG && !left_align_p)
10830 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
10831
10832 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10833
10834 gimplify_assign (addr, t, pre_p);
10835
10836 if (size < UNITS_PER_LONG && left_align_p)
10837 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
10838 else
10839 t = fold_build_pointer_plus_hwi (t, size);
10840
10841 gimplify_assign (ovf, t, pre_p);
10842
10843 if (reg != NULL_TREE)
10844 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
10845
10846
10847 /* Increment register save count. */
10848
10849 if (n_reg > 0)
10850 {
10851 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
10852 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
10853 gimplify_and_add (u, pre_p);
10854 }
10855
10856 if (indirect_p)
10857 {
10858 t = build_pointer_type_for_mode (build_pointer_type (type),
10859 ptr_mode, true);
10860 addr = fold_convert (t, addr);
10861 addr = build_va_arg_indirect_ref (addr);
10862 }
10863 else
10864 {
10865 t = build_pointer_type_for_mode (type, ptr_mode, true);
10866 addr = fold_convert (t, addr);
10867 }
10868
10869 return build_va_arg_indirect_ref (addr);
10870 }
10871
10872 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
10873 expanders.
10874 DEST - Register location where CC will be stored.
10875 TDB - Pointer to a 256 byte area where to store the transaction.
10876 diagnostic block. NULL if TDB is not needed.
10877 RETRY - Retry count value. If non-NULL a retry loop for CC2
10878 is emitted
10879 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
10880 of the tbegin instruction pattern. */
10881
10882 void
10883 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
10884 {
10885 rtx retry_plus_two = gen_reg_rtx (SImode);
10886 rtx retry_reg = gen_reg_rtx (SImode);
10887 rtx_code_label *retry_label = NULL;
10888
10889 if (retry != NULL_RTX)
10890 {
10891 emit_move_insn (retry_reg, retry);
10892 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
10893 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
10894 retry_label = gen_label_rtx ();
10895 emit_label (retry_label);
10896 }
10897
10898 if (clobber_fprs_p)
10899 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
10900 else
10901 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
10902 tdb));
10903
10904 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
10905 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
10906 CC_REGNUM)),
10907 UNSPEC_CC_TO_INT));
10908 if (retry != NULL_RTX)
10909 {
10910 const int CC0 = 1 << 3;
10911 const int CC1 = 1 << 2;
10912 const int CC3 = 1 << 0;
10913 rtx jump;
10914 rtx count = gen_reg_rtx (SImode);
10915 rtx_code_label *leave_label = gen_label_rtx ();
10916
10917 /* Exit for success and permanent failures. */
10918 jump = s390_emit_jump (leave_label,
10919 gen_rtx_EQ (VOIDmode,
10920 gen_rtx_REG (CCRAWmode, CC_REGNUM),
10921 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
10922 LABEL_NUSES (leave_label) = 1;
10923
10924 /* CC2 - transient failure. Perform retry with ppa. */
10925 emit_move_insn (count, retry_plus_two);
10926 emit_insn (gen_subsi3 (count, count, retry_reg));
10927 emit_insn (gen_tx_assist (count));
10928 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
10929 retry_reg,
10930 retry_reg));
10931 JUMP_LABEL (jump) = retry_label;
10932 LABEL_NUSES (retry_label) = 1;
10933 emit_label (leave_label);
10934 }
10935 }
10936
10937 /* Builtins. */
10938
10939 enum s390_builtin
10940 {
10941 S390_BUILTIN_TBEGIN,
10942 S390_BUILTIN_TBEGIN_NOFLOAT,
10943 S390_BUILTIN_TBEGIN_RETRY,
10944 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
10945 S390_BUILTIN_TBEGINC,
10946 S390_BUILTIN_TEND,
10947 S390_BUILTIN_TABORT,
10948 S390_BUILTIN_NON_TX_STORE,
10949 S390_BUILTIN_TX_NESTING_DEPTH,
10950 S390_BUILTIN_TX_ASSIST,
10951
10952 S390_BUILTIN_S390_SFPC,
10953 S390_BUILTIN_S390_EFPC,
10954
10955 S390_BUILTIN_MAX
10956 };
10957
10958 tree s390_builtin_decls[S390_BUILTIN_MAX];
10959
10960 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX] = {
10961 CODE_FOR_tbegin,
10962 CODE_FOR_tbegin_nofloat,
10963 CODE_FOR_tbegin_retry,
10964 CODE_FOR_tbegin_retry_nofloat,
10965 CODE_FOR_tbeginc,
10966 CODE_FOR_tend,
10967 CODE_FOR_tabort,
10968 CODE_FOR_ntstg,
10969 CODE_FOR_etnd,
10970 CODE_FOR_tx_assist,
10971
10972 CODE_FOR_s390_sfpc,
10973 CODE_FOR_s390_efpc
10974 };
10975
10976 static void
10977 s390_init_builtins (void)
10978 {
10979 tree ftype, uint64_type;
10980 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
10981 NULL, NULL);
10982 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
10983
10984 /* void foo (void) */
10985 ftype = build_function_type_list (void_type_node, NULL_TREE);
10986 s390_builtin_decls[S390_BUILTIN_TBEGINC] =
10987 add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
10988 BUILT_IN_MD, NULL, NULL_TREE);
10989
10990 /* void foo (int) */
10991 ftype = build_function_type_list (void_type_node, integer_type_node,
10992 NULL_TREE);
10993 s390_builtin_decls[S390_BUILTIN_TABORT] =
10994 add_builtin_function ("__builtin_tabort", ftype,
10995 S390_BUILTIN_TABORT, BUILT_IN_MD, NULL,
10996 noreturn_attr);
10997 s390_builtin_decls[S390_BUILTIN_TX_ASSIST] =
10998 add_builtin_function ("__builtin_tx_assist", ftype,
10999 S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
11000
11001 /* void foo (unsigned) */
11002 ftype = build_function_type_list (void_type_node, unsigned_type_node,
11003 NULL_TREE);
11004 s390_builtin_decls[S390_BUILTIN_S390_SFPC] =
11005 add_builtin_function ("__builtin_s390_sfpc", ftype,
11006 S390_BUILTIN_S390_SFPC, BUILT_IN_MD, NULL, NULL_TREE);
11007
11008 /* int foo (void *) */
11009 ftype = build_function_type_list (integer_type_node, ptr_type_node,
11010 NULL_TREE);
11011 s390_builtin_decls[S390_BUILTIN_TBEGIN] =
11012 add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
11013 BUILT_IN_MD, NULL, returns_twice_attr);
11014 s390_builtin_decls[S390_BUILTIN_TBEGIN_NOFLOAT] =
11015 add_builtin_function ("__builtin_tbegin_nofloat", ftype,
11016 S390_BUILTIN_TBEGIN_NOFLOAT,
11017 BUILT_IN_MD, NULL, returns_twice_attr);
11018
11019 /* int foo (void *, int) */
11020 ftype = build_function_type_list (integer_type_node, ptr_type_node,
11021 integer_type_node, NULL_TREE);
11022 s390_builtin_decls[S390_BUILTIN_TBEGIN_RETRY] =
11023 add_builtin_function ("__builtin_tbegin_retry", ftype,
11024 S390_BUILTIN_TBEGIN_RETRY,
11025 BUILT_IN_MD,
11026 NULL, returns_twice_attr);
11027 s390_builtin_decls[S390_BUILTIN_TBEGIN_RETRY_NOFLOAT] =
11028 add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
11029 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
11030 BUILT_IN_MD,
11031 NULL, returns_twice_attr);
11032
11033 /* int foo (void) */
11034 ftype = build_function_type_list (integer_type_node, NULL_TREE);
11035 s390_builtin_decls[S390_BUILTIN_TX_NESTING_DEPTH] =
11036 add_builtin_function ("__builtin_tx_nesting_depth", ftype,
11037 S390_BUILTIN_TX_NESTING_DEPTH,
11038 BUILT_IN_MD, NULL, NULL_TREE);
11039 s390_builtin_decls[S390_BUILTIN_TEND] =
11040 add_builtin_function ("__builtin_tend", ftype,
11041 S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
11042
11043 /* unsigned foo (void) */
11044 ftype = build_function_type_list (unsigned_type_node, NULL_TREE);
11045 s390_builtin_decls[S390_BUILTIN_S390_EFPC] =
11046 add_builtin_function ("__builtin_s390_efpc", ftype,
11047 S390_BUILTIN_S390_EFPC, BUILT_IN_MD, NULL, NULL_TREE);
11048
11049 /* void foo (uint64_t *, uint64_t) */
11050 if (TARGET_64BIT)
11051 uint64_type = long_unsigned_type_node;
11052 else
11053 uint64_type = long_long_unsigned_type_node;
11054
11055 ftype = build_function_type_list (void_type_node,
11056 build_pointer_type (uint64_type),
11057 uint64_type, NULL_TREE);
11058 s390_builtin_decls[S390_BUILTIN_NON_TX_STORE] =
11059 add_builtin_function ("__builtin_non_tx_store", ftype,
11060 S390_BUILTIN_NON_TX_STORE,
11061 BUILT_IN_MD, NULL, NULL_TREE);
11062 }
11063
11064 /* Expand an expression EXP that calls a built-in function,
11065 with result going to TARGET if that's convenient
11066 (and in mode MODE if that's convenient).
11067 SUBTARGET may be used as the target for computing one of EXP's operands.
11068 IGNORE is nonzero if the value is to be ignored. */
11069
11070 static rtx
11071 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11072 machine_mode mode ATTRIBUTE_UNUSED,
11073 int ignore ATTRIBUTE_UNUSED)
11074 {
11075 #define MAX_ARGS 2
11076
11077 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11078 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11079 enum insn_code icode;
11080 rtx op[MAX_ARGS], pat;
11081 int arity;
11082 bool nonvoid;
11083 tree arg;
11084 call_expr_arg_iterator iter;
11085
11086 if (fcode >= S390_BUILTIN_MAX)
11087 internal_error ("bad builtin fcode");
11088 icode = code_for_builtin[fcode];
11089 if (icode == 0)
11090 internal_error ("bad builtin fcode");
11091
11092 if (!TARGET_HTM && fcode <= S390_BUILTIN_TX_ASSIST)
11093 error ("Transactional execution builtins not enabled (-mhtm)\n");
11094
11095 /* Set a flag in the machine specific cfun part in order to support
11096 saving/restoring of FPRs. */
11097 if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
11098 cfun->machine->tbegin_p = true;
11099
11100 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11101
11102 arity = 0;
11103 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11104 {
11105 const struct insn_operand_data *insn_op;
11106
11107 if (arg == error_mark_node)
11108 return NULL_RTX;
11109 if (arity >= MAX_ARGS)
11110 return NULL_RTX;
11111
11112 insn_op = &insn_data[icode].operand[arity + nonvoid];
11113
11114 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
11115
11116 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
11117 {
11118 if (insn_op->predicate == memory_operand)
11119 {
11120 /* Don't move a NULL pointer into a register. Otherwise
11121 we have to rely on combine being able to move it back
11122 in order to get an immediate 0 in the instruction. */
11123 if (op[arity] != const0_rtx)
11124 op[arity] = copy_to_mode_reg (Pmode, op[arity]);
11125 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
11126 }
11127 else
11128 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
11129 }
11130
11131 arity++;
11132 }
11133
11134 if (nonvoid)
11135 {
11136 machine_mode tmode = insn_data[icode].operand[0].mode;
11137 if (!target
11138 || GET_MODE (target) != tmode
11139 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
11140 target = gen_reg_rtx (tmode);
11141 }
11142
11143 switch (arity)
11144 {
11145 case 0:
11146 pat = GEN_FCN (icode) (target);
11147 break;
11148 case 1:
11149 if (nonvoid)
11150 pat = GEN_FCN (icode) (target, op[0]);
11151 else
11152 pat = GEN_FCN (icode) (op[0]);
11153 break;
11154 case 2:
11155 if (nonvoid)
11156 pat = GEN_FCN (icode) (target, op[0], op[1]);
11157 else
11158 pat = GEN_FCN (icode) (op[0], op[1]);
11159 break;
11160 default:
11161 gcc_unreachable ();
11162 }
11163 if (!pat)
11164 return NULL_RTX;
11165 emit_insn (pat);
11166
11167 if (nonvoid)
11168 return target;
11169 else
11170 return const0_rtx;
11171 }
11172
11173 /* Return the decl for the target specific builtin with the function
11174 code FCODE. */
11175
11176 static tree
11177 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
11178 {
11179 if (fcode >= S390_BUILTIN_MAX)
11180 return error_mark_node;
11181
11182 return s390_builtin_decls[fcode];
11183 }
11184
11185 /* We call mcount before the function prologue. So a profiled leaf
11186 function should stay a leaf function. */
11187
11188 static bool
11189 s390_keep_leaf_when_profiled ()
11190 {
11191 return true;
11192 }
11193
11194 /* Output assembly code for the trampoline template to
11195 stdio stream FILE.
11196
11197 On S/390, we use gpr 1 internally in the trampoline code;
11198 gpr 0 is used to hold the static chain. */
11199
11200 static void
11201 s390_asm_trampoline_template (FILE *file)
11202 {
11203 rtx op[2];
11204 op[0] = gen_rtx_REG (Pmode, 0);
11205 op[1] = gen_rtx_REG (Pmode, 1);
11206
11207 if (TARGET_64BIT)
11208 {
11209 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11210 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
11211 output_asm_insn ("br\t%1", op); /* 2 byte */
11212 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
11213 }
11214 else
11215 {
11216 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11217 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
11218 output_asm_insn ("br\t%1", op); /* 2 byte */
11219 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
11220 }
11221 }
11222
11223 /* Emit RTL insns to initialize the variable parts of a trampoline.
11224 FNADDR is an RTX for the address of the function's pure code.
11225 CXT is an RTX for the static chain value for the function. */
11226
11227 static void
11228 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
11229 {
11230 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11231 rtx mem;
11232
11233 emit_block_move (m_tramp, assemble_trampoline_template (),
11234 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
11235
11236 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
11237 emit_move_insn (mem, cxt);
11238 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
11239 emit_move_insn (mem, fnaddr);
11240 }
11241
11242 /* Output assembler code to FILE to increment profiler label # LABELNO
11243 for profiling a function entry. */
11244
11245 void
11246 s390_function_profiler (FILE *file, int labelno)
11247 {
11248 rtx op[7];
11249
11250 char label[128];
11251 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
11252
11253 fprintf (file, "# function profiler \n");
11254
11255 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
11256 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
11257 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
11258
11259 op[2] = gen_rtx_REG (Pmode, 1);
11260 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
11261 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
11262
11263 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
11264 if (flag_pic)
11265 {
11266 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
11267 op[4] = gen_rtx_CONST (Pmode, op[4]);
11268 }
11269
11270 if (TARGET_64BIT)
11271 {
11272 output_asm_insn ("stg\t%0,%1", op);
11273 output_asm_insn ("larl\t%2,%3", op);
11274 output_asm_insn ("brasl\t%0,%4", op);
11275 output_asm_insn ("lg\t%0,%1", op);
11276 }
11277 else if (!flag_pic)
11278 {
11279 op[6] = gen_label_rtx ();
11280
11281 output_asm_insn ("st\t%0,%1", op);
11282 output_asm_insn ("bras\t%2,%l6", op);
11283 output_asm_insn (".long\t%4", op);
11284 output_asm_insn (".long\t%3", op);
11285 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11286 output_asm_insn ("l\t%0,0(%2)", op);
11287 output_asm_insn ("l\t%2,4(%2)", op);
11288 output_asm_insn ("basr\t%0,%0", op);
11289 output_asm_insn ("l\t%0,%1", op);
11290 }
11291 else
11292 {
11293 op[5] = gen_label_rtx ();
11294 op[6] = gen_label_rtx ();
11295
11296 output_asm_insn ("st\t%0,%1", op);
11297 output_asm_insn ("bras\t%2,%l6", op);
11298 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
11299 output_asm_insn (".long\t%4-%l5", op);
11300 output_asm_insn (".long\t%3-%l5", op);
11301 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11302 output_asm_insn ("lr\t%0,%2", op);
11303 output_asm_insn ("a\t%0,0(%2)", op);
11304 output_asm_insn ("a\t%2,4(%2)", op);
11305 output_asm_insn ("basr\t%0,%0", op);
11306 output_asm_insn ("l\t%0,%1", op);
11307 }
11308 }
11309
11310 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
11311 into its SYMBOL_REF_FLAGS. */
11312
11313 static void
11314 s390_encode_section_info (tree decl, rtx rtl, int first)
11315 {
11316 default_encode_section_info (decl, rtl, first);
11317
11318 if (TREE_CODE (decl) == VAR_DECL)
11319 {
11320 /* If a variable has a forced alignment to < 2 bytes, mark it
11321 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
11322 operand. */
11323 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
11324 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
11325 if (!DECL_SIZE (decl)
11326 || !DECL_ALIGN (decl)
11327 || !tree_fits_shwi_p (DECL_SIZE (decl))
11328 || (DECL_ALIGN (decl) <= 64
11329 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
11330 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11331 }
11332
11333 /* Literal pool references don't have a decl so they are handled
11334 differently here. We rely on the information in the MEM_ALIGN
11335 entry to decide upon natural alignment. */
11336 if (MEM_P (rtl)
11337 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
11338 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
11339 && (MEM_ALIGN (rtl) == 0
11340 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
11341 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
11342 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11343 }
11344
11345 /* Output thunk to FILE that implements a C++ virtual function call (with
11346 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
11347 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
11348 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
11349 relative to the resulting this pointer. */
11350
11351 static void
11352 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
11353 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11354 tree function)
11355 {
11356 rtx op[10];
11357 int nonlocal = 0;
11358
11359 /* Make sure unwind info is emitted for the thunk if needed. */
11360 final_start_function (emit_barrier (), file, 1);
11361
11362 /* Operand 0 is the target function. */
11363 op[0] = XEXP (DECL_RTL (function), 0);
11364 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
11365 {
11366 nonlocal = 1;
11367 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
11368 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
11369 op[0] = gen_rtx_CONST (Pmode, op[0]);
11370 }
11371
11372 /* Operand 1 is the 'this' pointer. */
11373 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11374 op[1] = gen_rtx_REG (Pmode, 3);
11375 else
11376 op[1] = gen_rtx_REG (Pmode, 2);
11377
11378 /* Operand 2 is the delta. */
11379 op[2] = GEN_INT (delta);
11380
11381 /* Operand 3 is the vcall_offset. */
11382 op[3] = GEN_INT (vcall_offset);
11383
11384 /* Operand 4 is the temporary register. */
11385 op[4] = gen_rtx_REG (Pmode, 1);
11386
11387 /* Operands 5 to 8 can be used as labels. */
11388 op[5] = NULL_RTX;
11389 op[6] = NULL_RTX;
11390 op[7] = NULL_RTX;
11391 op[8] = NULL_RTX;
11392
11393 /* Operand 9 can be used for temporary register. */
11394 op[9] = NULL_RTX;
11395
11396 /* Generate code. */
11397 if (TARGET_64BIT)
11398 {
11399 /* Setup literal pool pointer if required. */
11400 if ((!DISP_IN_RANGE (delta)
11401 && !CONST_OK_FOR_K (delta)
11402 && !CONST_OK_FOR_Os (delta))
11403 || (!DISP_IN_RANGE (vcall_offset)
11404 && !CONST_OK_FOR_K (vcall_offset)
11405 && !CONST_OK_FOR_Os (vcall_offset)))
11406 {
11407 op[5] = gen_label_rtx ();
11408 output_asm_insn ("larl\t%4,%5", op);
11409 }
11410
11411 /* Add DELTA to this pointer. */
11412 if (delta)
11413 {
11414 if (CONST_OK_FOR_J (delta))
11415 output_asm_insn ("la\t%1,%2(%1)", op);
11416 else if (DISP_IN_RANGE (delta))
11417 output_asm_insn ("lay\t%1,%2(%1)", op);
11418 else if (CONST_OK_FOR_K (delta))
11419 output_asm_insn ("aghi\t%1,%2", op);
11420 else if (CONST_OK_FOR_Os (delta))
11421 output_asm_insn ("agfi\t%1,%2", op);
11422 else
11423 {
11424 op[6] = gen_label_rtx ();
11425 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
11426 }
11427 }
11428
11429 /* Perform vcall adjustment. */
11430 if (vcall_offset)
11431 {
11432 if (DISP_IN_RANGE (vcall_offset))
11433 {
11434 output_asm_insn ("lg\t%4,0(%1)", op);
11435 output_asm_insn ("ag\t%1,%3(%4)", op);
11436 }
11437 else if (CONST_OK_FOR_K (vcall_offset))
11438 {
11439 output_asm_insn ("lghi\t%4,%3", op);
11440 output_asm_insn ("ag\t%4,0(%1)", op);
11441 output_asm_insn ("ag\t%1,0(%4)", op);
11442 }
11443 else if (CONST_OK_FOR_Os (vcall_offset))
11444 {
11445 output_asm_insn ("lgfi\t%4,%3", op);
11446 output_asm_insn ("ag\t%4,0(%1)", op);
11447 output_asm_insn ("ag\t%1,0(%4)", op);
11448 }
11449 else
11450 {
11451 op[7] = gen_label_rtx ();
11452 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
11453 output_asm_insn ("ag\t%4,0(%1)", op);
11454 output_asm_insn ("ag\t%1,0(%4)", op);
11455 }
11456 }
11457
11458 /* Jump to target. */
11459 output_asm_insn ("jg\t%0", op);
11460
11461 /* Output literal pool if required. */
11462 if (op[5])
11463 {
11464 output_asm_insn (".align\t4", op);
11465 targetm.asm_out.internal_label (file, "L",
11466 CODE_LABEL_NUMBER (op[5]));
11467 }
11468 if (op[6])
11469 {
11470 targetm.asm_out.internal_label (file, "L",
11471 CODE_LABEL_NUMBER (op[6]));
11472 output_asm_insn (".long\t%2", op);
11473 }
11474 if (op[7])
11475 {
11476 targetm.asm_out.internal_label (file, "L",
11477 CODE_LABEL_NUMBER (op[7]));
11478 output_asm_insn (".long\t%3", op);
11479 }
11480 }
11481 else
11482 {
11483 /* Setup base pointer if required. */
11484 if (!vcall_offset
11485 || (!DISP_IN_RANGE (delta)
11486 && !CONST_OK_FOR_K (delta)
11487 && !CONST_OK_FOR_Os (delta))
11488 || (!DISP_IN_RANGE (delta)
11489 && !CONST_OK_FOR_K (vcall_offset)
11490 && !CONST_OK_FOR_Os (vcall_offset)))
11491 {
11492 op[5] = gen_label_rtx ();
11493 output_asm_insn ("basr\t%4,0", op);
11494 targetm.asm_out.internal_label (file, "L",
11495 CODE_LABEL_NUMBER (op[5]));
11496 }
11497
11498 /* Add DELTA to this pointer. */
11499 if (delta)
11500 {
11501 if (CONST_OK_FOR_J (delta))
11502 output_asm_insn ("la\t%1,%2(%1)", op);
11503 else if (DISP_IN_RANGE (delta))
11504 output_asm_insn ("lay\t%1,%2(%1)", op);
11505 else if (CONST_OK_FOR_K (delta))
11506 output_asm_insn ("ahi\t%1,%2", op);
11507 else if (CONST_OK_FOR_Os (delta))
11508 output_asm_insn ("afi\t%1,%2", op);
11509 else
11510 {
11511 op[6] = gen_label_rtx ();
11512 output_asm_insn ("a\t%1,%6-%5(%4)", op);
11513 }
11514 }
11515
11516 /* Perform vcall adjustment. */
11517 if (vcall_offset)
11518 {
11519 if (CONST_OK_FOR_J (vcall_offset))
11520 {
11521 output_asm_insn ("l\t%4,0(%1)", op);
11522 output_asm_insn ("a\t%1,%3(%4)", op);
11523 }
11524 else if (DISP_IN_RANGE (vcall_offset))
11525 {
11526 output_asm_insn ("l\t%4,0(%1)", op);
11527 output_asm_insn ("ay\t%1,%3(%4)", op);
11528 }
11529 else if (CONST_OK_FOR_K (vcall_offset))
11530 {
11531 output_asm_insn ("lhi\t%4,%3", op);
11532 output_asm_insn ("a\t%4,0(%1)", op);
11533 output_asm_insn ("a\t%1,0(%4)", op);
11534 }
11535 else if (CONST_OK_FOR_Os (vcall_offset))
11536 {
11537 output_asm_insn ("iilf\t%4,%3", op);
11538 output_asm_insn ("a\t%4,0(%1)", op);
11539 output_asm_insn ("a\t%1,0(%4)", op);
11540 }
11541 else
11542 {
11543 op[7] = gen_label_rtx ();
11544 output_asm_insn ("l\t%4,%7-%5(%4)", op);
11545 output_asm_insn ("a\t%4,0(%1)", op);
11546 output_asm_insn ("a\t%1,0(%4)", op);
11547 }
11548
11549 /* We had to clobber the base pointer register.
11550 Re-setup the base pointer (with a different base). */
11551 op[5] = gen_label_rtx ();
11552 output_asm_insn ("basr\t%4,0", op);
11553 targetm.asm_out.internal_label (file, "L",
11554 CODE_LABEL_NUMBER (op[5]));
11555 }
11556
11557 /* Jump to target. */
11558 op[8] = gen_label_rtx ();
11559
11560 if (!flag_pic)
11561 output_asm_insn ("l\t%4,%8-%5(%4)", op);
11562 else if (!nonlocal)
11563 output_asm_insn ("a\t%4,%8-%5(%4)", op);
11564 /* We cannot call through .plt, since .plt requires %r12 loaded. */
11565 else if (flag_pic == 1)
11566 {
11567 output_asm_insn ("a\t%4,%8-%5(%4)", op);
11568 output_asm_insn ("l\t%4,%0(%4)", op);
11569 }
11570 else if (flag_pic == 2)
11571 {
11572 op[9] = gen_rtx_REG (Pmode, 0);
11573 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
11574 output_asm_insn ("a\t%4,%8-%5(%4)", op);
11575 output_asm_insn ("ar\t%4,%9", op);
11576 output_asm_insn ("l\t%4,0(%4)", op);
11577 }
11578
11579 output_asm_insn ("br\t%4", op);
11580
11581 /* Output literal pool. */
11582 output_asm_insn (".align\t4", op);
11583
11584 if (nonlocal && flag_pic == 2)
11585 output_asm_insn (".long\t%0", op);
11586 if (nonlocal)
11587 {
11588 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
11589 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
11590 }
11591
11592 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
11593 if (!flag_pic)
11594 output_asm_insn (".long\t%0", op);
11595 else
11596 output_asm_insn (".long\t%0-%5", op);
11597
11598 if (op[6])
11599 {
11600 targetm.asm_out.internal_label (file, "L",
11601 CODE_LABEL_NUMBER (op[6]));
11602 output_asm_insn (".long\t%2", op);
11603 }
11604 if (op[7])
11605 {
11606 targetm.asm_out.internal_label (file, "L",
11607 CODE_LABEL_NUMBER (op[7]));
11608 output_asm_insn (".long\t%3", op);
11609 }
11610 }
11611 final_end_function ();
11612 }
11613
11614 static bool
11615 s390_valid_pointer_mode (machine_mode mode)
11616 {
11617 return (mode == SImode || (TARGET_64BIT && mode == DImode));
11618 }
11619
11620 /* Checks whether the given CALL_EXPR would use a caller
11621 saved register. This is used to decide whether sibling call
11622 optimization could be performed on the respective function
11623 call. */
11624
11625 static bool
11626 s390_call_saved_register_used (tree call_expr)
11627 {
11628 CUMULATIVE_ARGS cum_v;
11629 cumulative_args_t cum;
11630 tree parameter;
11631 machine_mode mode;
11632 tree type;
11633 rtx parm_rtx;
11634 int reg, i;
11635
11636 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
11637 cum = pack_cumulative_args (&cum_v);
11638
11639 for (i = 0; i < call_expr_nargs (call_expr); i++)
11640 {
11641 parameter = CALL_EXPR_ARG (call_expr, i);
11642 gcc_assert (parameter);
11643
11644 /* For an undeclared variable passed as parameter we will get
11645 an ERROR_MARK node here. */
11646 if (TREE_CODE (parameter) == ERROR_MARK)
11647 return true;
11648
11649 type = TREE_TYPE (parameter);
11650 gcc_assert (type);
11651
11652 mode = TYPE_MODE (type);
11653 gcc_assert (mode);
11654
11655 /* We assume that in the target function all parameters are
11656 named. This only has an impact on vector argument register
11657 usage none of which is call-saved. */
11658 if (pass_by_reference (&cum_v, mode, type, true))
11659 {
11660 mode = Pmode;
11661 type = build_pointer_type (type);
11662 }
11663
11664 parm_rtx = s390_function_arg (cum, mode, type, true);
11665
11666 s390_function_arg_advance (cum, mode, type, true);
11667
11668 if (!parm_rtx)
11669 continue;
11670
11671 if (REG_P (parm_rtx))
11672 {
11673 for (reg = 0;
11674 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
11675 reg++)
11676 if (!call_used_regs[reg + REGNO (parm_rtx)])
11677 return true;
11678 }
11679
11680 if (GET_CODE (parm_rtx) == PARALLEL)
11681 {
11682 int i;
11683
11684 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
11685 {
11686 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
11687
11688 gcc_assert (REG_P (r));
11689
11690 for (reg = 0;
11691 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
11692 reg++)
11693 if (!call_used_regs[reg + REGNO (r)])
11694 return true;
11695 }
11696 }
11697
11698 }
11699 return false;
11700 }
11701
11702 /* Return true if the given call expression can be
11703 turned into a sibling call.
11704 DECL holds the declaration of the function to be called whereas
11705 EXP is the call expression itself. */
11706
11707 static bool
11708 s390_function_ok_for_sibcall (tree decl, tree exp)
11709 {
11710 /* The TPF epilogue uses register 1. */
11711 if (TARGET_TPF_PROFILING)
11712 return false;
11713
11714 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
11715 which would have to be restored before the sibcall. */
11716 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
11717 return false;
11718
11719 /* Register 6 on s390 is available as an argument register but unfortunately
11720 "caller saved". This makes functions needing this register for arguments
11721 not suitable for sibcalls. */
11722 return !s390_call_saved_register_used (exp);
11723 }
11724
11725 /* Return the fixed registers used for condition codes. */
11726
11727 static bool
11728 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11729 {
11730 *p1 = CC_REGNUM;
11731 *p2 = INVALID_REGNUM;
11732
11733 return true;
11734 }
11735
11736 /* This function is used by the call expanders of the machine description.
11737 It emits the call insn itself together with the necessary operations
11738 to adjust the target address and returns the emitted insn.
11739 ADDR_LOCATION is the target address rtx
11740 TLS_CALL the location of the thread-local symbol
11741 RESULT_REG the register where the result of the call should be stored
11742 RETADDR_REG the register where the return address should be stored
11743 If this parameter is NULL_RTX the call is considered
11744 to be a sibling call. */
11745
11746 rtx_insn *
11747 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
11748 rtx retaddr_reg)
11749 {
11750 bool plt_call = false;
11751 rtx_insn *insn;
11752 rtx call;
11753 rtx clobber;
11754 rtvec vec;
11755
11756 /* Direct function calls need special treatment. */
11757 if (GET_CODE (addr_location) == SYMBOL_REF)
11758 {
11759 /* When calling a global routine in PIC mode, we must
11760 replace the symbol itself with the PLT stub. */
11761 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
11762 {
11763 if (retaddr_reg != NULL_RTX)
11764 {
11765 addr_location = gen_rtx_UNSPEC (Pmode,
11766 gen_rtvec (1, addr_location),
11767 UNSPEC_PLT);
11768 addr_location = gen_rtx_CONST (Pmode, addr_location);
11769 plt_call = true;
11770 }
11771 else
11772 /* For -fpic code the PLT entries might use r12 which is
11773 call-saved. Therefore we cannot do a sibcall when
11774 calling directly using a symbol ref. When reaching
11775 this point we decided (in s390_function_ok_for_sibcall)
11776 to do a sibcall for a function pointer but one of the
11777 optimizers was able to get rid of the function pointer
11778 by propagating the symbol ref into the call. This
11779 optimization is illegal for S/390 so we turn the direct
11780 call into a indirect call again. */
11781 addr_location = force_reg (Pmode, addr_location);
11782 }
11783
11784 /* Unless we can use the bras(l) insn, force the
11785 routine address into a register. */
11786 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
11787 {
11788 if (flag_pic)
11789 addr_location = legitimize_pic_address (addr_location, 0);
11790 else
11791 addr_location = force_reg (Pmode, addr_location);
11792 }
11793 }
11794
11795 /* If it is already an indirect call or the code above moved the
11796 SYMBOL_REF to somewhere else make sure the address can be found in
11797 register 1. */
11798 if (retaddr_reg == NULL_RTX
11799 && GET_CODE (addr_location) != SYMBOL_REF
11800 && !plt_call)
11801 {
11802 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
11803 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
11804 }
11805
11806 addr_location = gen_rtx_MEM (QImode, addr_location);
11807 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
11808
11809 if (result_reg != NULL_RTX)
11810 call = gen_rtx_SET (result_reg, call);
11811
11812 if (retaddr_reg != NULL_RTX)
11813 {
11814 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
11815
11816 if (tls_call != NULL_RTX)
11817 vec = gen_rtvec (3, call, clobber,
11818 gen_rtx_USE (VOIDmode, tls_call));
11819 else
11820 vec = gen_rtvec (2, call, clobber);
11821
11822 call = gen_rtx_PARALLEL (VOIDmode, vec);
11823 }
11824
11825 insn = emit_call_insn (call);
11826
11827 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
11828 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
11829 {
11830 /* s390_function_ok_for_sibcall should
11831 have denied sibcalls in this case. */
11832 gcc_assert (retaddr_reg != NULL_RTX);
11833 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
11834 }
11835 return insn;
11836 }
11837
11838 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
11839
11840 static void
11841 s390_conditional_register_usage (void)
11842 {
11843 int i;
11844
11845 if (flag_pic)
11846 {
11847 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11848 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11849 }
11850 if (TARGET_CPU_ZARCH)
11851 {
11852 fixed_regs[BASE_REGNUM] = 0;
11853 call_used_regs[BASE_REGNUM] = 0;
11854 fixed_regs[RETURN_REGNUM] = 0;
11855 call_used_regs[RETURN_REGNUM] = 0;
11856 }
11857 if (TARGET_64BIT)
11858 {
11859 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11860 call_used_regs[i] = call_really_used_regs[i] = 0;
11861 }
11862 else
11863 {
11864 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
11865 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
11866 }
11867
11868 if (TARGET_SOFT_FLOAT)
11869 {
11870 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
11871 call_used_regs[i] = fixed_regs[i] = 1;
11872 }
11873
11874 /* Disable v16 - v31 for non-vector target. */
11875 if (!TARGET_VX)
11876 {
11877 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
11878 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
11879 }
11880 }
11881
11882 /* Corresponding function to eh_return expander. */
11883
11884 static GTY(()) rtx s390_tpf_eh_return_symbol;
11885 void
11886 s390_emit_tpf_eh_return (rtx target)
11887 {
11888 rtx_insn *insn;
11889 rtx reg, orig_ra;
11890
11891 if (!s390_tpf_eh_return_symbol)
11892 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
11893
11894 reg = gen_rtx_REG (Pmode, 2);
11895 orig_ra = gen_rtx_REG (Pmode, 3);
11896
11897 emit_move_insn (reg, target);
11898 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
11899 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
11900 gen_rtx_REG (Pmode, RETURN_REGNUM));
11901 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
11902 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
11903
11904 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
11905 }
11906
11907 /* Rework the prologue/epilogue to avoid saving/restoring
11908 registers unnecessarily. */
11909
11910 static void
11911 s390_optimize_prologue (void)
11912 {
11913 rtx_insn *insn, *new_insn, *next_insn;
11914
11915 /* Do a final recompute of the frame-related data. */
11916 s390_optimize_register_info ();
11917
11918 /* If all special registers are in fact used, there's nothing we
11919 can do, so no point in walking the insn list. */
11920
11921 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
11922 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
11923 && (TARGET_CPU_ZARCH
11924 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
11925 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
11926 return;
11927
11928 /* Search for prologue/epilogue insns and replace them. */
11929
11930 for (insn = get_insns (); insn; insn = next_insn)
11931 {
11932 int first, last, off;
11933 rtx set, base, offset;
11934 rtx pat;
11935
11936 next_insn = NEXT_INSN (insn);
11937
11938 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
11939 continue;
11940
11941 pat = PATTERN (insn);
11942
11943 /* Remove ldgr/lgdr instructions used for saving and restore
11944 GPRs if possible. */
11945 if (TARGET_Z10
11946 && GET_CODE (pat) == SET
11947 && GET_MODE (SET_SRC (pat)) == DImode
11948 && REG_P (SET_SRC (pat))
11949 && REG_P (SET_DEST (pat)))
11950 {
11951 int src_regno = REGNO (SET_SRC (pat));
11952 int dest_regno = REGNO (SET_DEST (pat));
11953 int gpr_regno;
11954 int fpr_regno;
11955
11956 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
11957 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
11958 continue;
11959
11960 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
11961 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
11962
11963 /* GPR must be call-saved, FPR must be call-clobbered. */
11964 if (!call_really_used_regs[fpr_regno]
11965 || call_really_used_regs[gpr_regno])
11966 continue;
11967
11968 /* It must not happen that what we once saved in an FPR now
11969 needs a stack slot. */
11970 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
11971
11972 if (cfun_gpr_save_slot (gpr_regno) == 0)
11973 {
11974 remove_insn (insn);
11975 continue;
11976 }
11977 }
11978
11979 if (GET_CODE (pat) == PARALLEL
11980 && store_multiple_operation (pat, VOIDmode))
11981 {
11982 set = XVECEXP (pat, 0, 0);
11983 first = REGNO (SET_SRC (set));
11984 last = first + XVECLEN (pat, 0) - 1;
11985 offset = const0_rtx;
11986 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
11987 off = INTVAL (offset);
11988
11989 if (GET_CODE (base) != REG || off < 0)
11990 continue;
11991 if (cfun_frame_layout.first_save_gpr != -1
11992 && (cfun_frame_layout.first_save_gpr < first
11993 || cfun_frame_layout.last_save_gpr > last))
11994 continue;
11995 if (REGNO (base) != STACK_POINTER_REGNUM
11996 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
11997 continue;
11998 if (first > BASE_REGNUM || last < BASE_REGNUM)
11999 continue;
12000
12001 if (cfun_frame_layout.first_save_gpr != -1)
12002 {
12003 rtx s_pat = save_gprs (base,
12004 off + (cfun_frame_layout.first_save_gpr
12005 - first) * UNITS_PER_LONG,
12006 cfun_frame_layout.first_save_gpr,
12007 cfun_frame_layout.last_save_gpr);
12008 new_insn = emit_insn_before (s_pat, insn);
12009 INSN_ADDRESSES_NEW (new_insn, -1);
12010 }
12011
12012 remove_insn (insn);
12013 continue;
12014 }
12015
12016 if (cfun_frame_layout.first_save_gpr == -1
12017 && GET_CODE (pat) == SET
12018 && GENERAL_REG_P (SET_SRC (pat))
12019 && GET_CODE (SET_DEST (pat)) == MEM)
12020 {
12021 set = pat;
12022 first = REGNO (SET_SRC (set));
12023 offset = const0_rtx;
12024 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12025 off = INTVAL (offset);
12026
12027 if (GET_CODE (base) != REG || off < 0)
12028 continue;
12029 if (REGNO (base) != STACK_POINTER_REGNUM
12030 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12031 continue;
12032
12033 remove_insn (insn);
12034 continue;
12035 }
12036
12037 if (GET_CODE (pat) == PARALLEL
12038 && load_multiple_operation (pat, VOIDmode))
12039 {
12040 set = XVECEXP (pat, 0, 0);
12041 first = REGNO (SET_DEST (set));
12042 last = first + XVECLEN (pat, 0) - 1;
12043 offset = const0_rtx;
12044 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12045 off = INTVAL (offset);
12046
12047 if (GET_CODE (base) != REG || off < 0)
12048 continue;
12049
12050 if (cfun_frame_layout.first_restore_gpr != -1
12051 && (cfun_frame_layout.first_restore_gpr < first
12052 || cfun_frame_layout.last_restore_gpr > last))
12053 continue;
12054 if (REGNO (base) != STACK_POINTER_REGNUM
12055 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12056 continue;
12057 if (first > BASE_REGNUM || last < BASE_REGNUM)
12058 continue;
12059
12060 if (cfun_frame_layout.first_restore_gpr != -1)
12061 {
12062 rtx rpat = restore_gprs (base,
12063 off + (cfun_frame_layout.first_restore_gpr
12064 - first) * UNITS_PER_LONG,
12065 cfun_frame_layout.first_restore_gpr,
12066 cfun_frame_layout.last_restore_gpr);
12067
12068 /* Remove REG_CFA_RESTOREs for registers that we no
12069 longer need to save. */
12070 REG_NOTES (rpat) = REG_NOTES (insn);
12071 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
12072 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
12073 && ((int) REGNO (XEXP (*ptr, 0))
12074 < cfun_frame_layout.first_restore_gpr))
12075 *ptr = XEXP (*ptr, 1);
12076 else
12077 ptr = &XEXP (*ptr, 1);
12078 new_insn = emit_insn_before (rpat, insn);
12079 RTX_FRAME_RELATED_P (new_insn) = 1;
12080 INSN_ADDRESSES_NEW (new_insn, -1);
12081 }
12082
12083 remove_insn (insn);
12084 continue;
12085 }
12086
12087 if (cfun_frame_layout.first_restore_gpr == -1
12088 && GET_CODE (pat) == SET
12089 && GENERAL_REG_P (SET_DEST (pat))
12090 && GET_CODE (SET_SRC (pat)) == MEM)
12091 {
12092 set = pat;
12093 first = REGNO (SET_DEST (set));
12094 offset = const0_rtx;
12095 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12096 off = INTVAL (offset);
12097
12098 if (GET_CODE (base) != REG || off < 0)
12099 continue;
12100
12101 if (REGNO (base) != STACK_POINTER_REGNUM
12102 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12103 continue;
12104
12105 remove_insn (insn);
12106 continue;
12107 }
12108 }
12109 }
12110
12111 /* On z10 and later the dynamic branch prediction must see the
12112 backward jump within a certain windows. If not it falls back to
12113 the static prediction. This function rearranges the loop backward
12114 branch in a way which makes the static prediction always correct.
12115 The function returns true if it added an instruction. */
12116 static bool
12117 s390_fix_long_loop_prediction (rtx_insn *insn)
12118 {
12119 rtx set = single_set (insn);
12120 rtx code_label, label_ref, new_label;
12121 rtx_insn *uncond_jump;
12122 rtx_insn *cur_insn;
12123 rtx tmp;
12124 int distance;
12125
12126 /* This will exclude branch on count and branch on index patterns
12127 since these are correctly statically predicted. */
12128 if (!set
12129 || SET_DEST (set) != pc_rtx
12130 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
12131 return false;
12132
12133 /* Skip conditional returns. */
12134 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
12135 && XEXP (SET_SRC (set), 2) == pc_rtx)
12136 return false;
12137
12138 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
12139 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
12140
12141 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
12142
12143 code_label = XEXP (label_ref, 0);
12144
12145 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
12146 || INSN_ADDRESSES (INSN_UID (insn)) == -1
12147 || (INSN_ADDRESSES (INSN_UID (insn))
12148 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
12149 return false;
12150
12151 for (distance = 0, cur_insn = PREV_INSN (insn);
12152 distance < PREDICT_DISTANCE - 6;
12153 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
12154 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
12155 return false;
12156
12157 new_label = gen_label_rtx ();
12158 uncond_jump = emit_jump_insn_after (
12159 gen_rtx_SET (pc_rtx,
12160 gen_rtx_LABEL_REF (VOIDmode, code_label)),
12161 insn);
12162 emit_label_after (new_label, uncond_jump);
12163
12164 tmp = XEXP (SET_SRC (set), 1);
12165 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
12166 XEXP (SET_SRC (set), 2) = tmp;
12167 INSN_CODE (insn) = -1;
12168
12169 XEXP (label_ref, 0) = new_label;
12170 JUMP_LABEL (insn) = new_label;
12171 JUMP_LABEL (uncond_jump) = code_label;
12172
12173 return true;
12174 }
12175
12176 /* Returns 1 if INSN reads the value of REG for purposes not related
12177 to addressing of memory, and 0 otherwise. */
12178 static int
12179 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
12180 {
12181 return reg_referenced_p (reg, PATTERN (insn))
12182 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
12183 }
12184
12185 /* Starting from INSN find_cond_jump looks downwards in the insn
12186 stream for a single jump insn which is the last user of the
12187 condition code set in INSN. */
12188 static rtx_insn *
12189 find_cond_jump (rtx_insn *insn)
12190 {
12191 for (; insn; insn = NEXT_INSN (insn))
12192 {
12193 rtx ite, cc;
12194
12195 if (LABEL_P (insn))
12196 break;
12197
12198 if (!JUMP_P (insn))
12199 {
12200 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
12201 break;
12202 continue;
12203 }
12204
12205 /* This will be triggered by a return. */
12206 if (GET_CODE (PATTERN (insn)) != SET)
12207 break;
12208
12209 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
12210 ite = SET_SRC (PATTERN (insn));
12211
12212 if (GET_CODE (ite) != IF_THEN_ELSE)
12213 break;
12214
12215 cc = XEXP (XEXP (ite, 0), 0);
12216 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
12217 break;
12218
12219 if (find_reg_note (insn, REG_DEAD, cc))
12220 return insn;
12221 break;
12222 }
12223
12224 return NULL;
12225 }
12226
12227 /* Swap the condition in COND and the operands in OP0 and OP1 so that
12228 the semantics does not change. If NULL_RTX is passed as COND the
12229 function tries to find the conditional jump starting with INSN. */
12230 static void
12231 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
12232 {
12233 rtx tmp = *op0;
12234
12235 if (cond == NULL_RTX)
12236 {
12237 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
12238 rtx set = jump ? single_set (jump) : NULL_RTX;
12239
12240 if (set == NULL_RTX)
12241 return;
12242
12243 cond = XEXP (SET_SRC (set), 0);
12244 }
12245
12246 *op0 = *op1;
12247 *op1 = tmp;
12248 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
12249 }
12250
12251 /* On z10, instructions of the compare-and-branch family have the
12252 property to access the register occurring as second operand with
12253 its bits complemented. If such a compare is grouped with a second
12254 instruction that accesses the same register non-complemented, and
12255 if that register's value is delivered via a bypass, then the
12256 pipeline recycles, thereby causing significant performance decline.
12257 This function locates such situations and exchanges the two
12258 operands of the compare. The function return true whenever it
12259 added an insn. */
12260 static bool
12261 s390_z10_optimize_cmp (rtx_insn *insn)
12262 {
12263 rtx_insn *prev_insn, *next_insn;
12264 bool insn_added_p = false;
12265 rtx cond, *op0, *op1;
12266
12267 if (GET_CODE (PATTERN (insn)) == PARALLEL)
12268 {
12269 /* Handle compare and branch and branch on count
12270 instructions. */
12271 rtx pattern = single_set (insn);
12272
12273 if (!pattern
12274 || SET_DEST (pattern) != pc_rtx
12275 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
12276 return false;
12277
12278 cond = XEXP (SET_SRC (pattern), 0);
12279 op0 = &XEXP (cond, 0);
12280 op1 = &XEXP (cond, 1);
12281 }
12282 else if (GET_CODE (PATTERN (insn)) == SET)
12283 {
12284 rtx src, dest;
12285
12286 /* Handle normal compare instructions. */
12287 src = SET_SRC (PATTERN (insn));
12288 dest = SET_DEST (PATTERN (insn));
12289
12290 if (!REG_P (dest)
12291 || !CC_REGNO_P (REGNO (dest))
12292 || GET_CODE (src) != COMPARE)
12293 return false;
12294
12295 /* s390_swap_cmp will try to find the conditional
12296 jump when passing NULL_RTX as condition. */
12297 cond = NULL_RTX;
12298 op0 = &XEXP (src, 0);
12299 op1 = &XEXP (src, 1);
12300 }
12301 else
12302 return false;
12303
12304 if (!REG_P (*op0) || !REG_P (*op1))
12305 return false;
12306
12307 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
12308 return false;
12309
12310 /* Swap the COMPARE arguments and its mask if there is a
12311 conflicting access in the previous insn. */
12312 prev_insn = prev_active_insn (insn);
12313 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12314 && reg_referenced_p (*op1, PATTERN (prev_insn)))
12315 s390_swap_cmp (cond, op0, op1, insn);
12316
12317 /* Check if there is a conflict with the next insn. If there
12318 was no conflict with the previous insn, then swap the
12319 COMPARE arguments and its mask. If we already swapped
12320 the operands, or if swapping them would cause a conflict
12321 with the previous insn, issue a NOP after the COMPARE in
12322 order to separate the two instuctions. */
12323 next_insn = next_active_insn (insn);
12324 if (next_insn != NULL_RTX && INSN_P (next_insn)
12325 && s390_non_addr_reg_read_p (*op1, next_insn))
12326 {
12327 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12328 && s390_non_addr_reg_read_p (*op0, prev_insn))
12329 {
12330 if (REGNO (*op1) == 0)
12331 emit_insn_after (gen_nop1 (), insn);
12332 else
12333 emit_insn_after (gen_nop (), insn);
12334 insn_added_p = true;
12335 }
12336 else
12337 s390_swap_cmp (cond, op0, op1, insn);
12338 }
12339 return insn_added_p;
12340 }
12341
12342 /* Perform machine-dependent processing. */
12343
12344 static void
12345 s390_reorg (void)
12346 {
12347 bool pool_overflow = false;
12348 int hw_before, hw_after;
12349
12350 /* Make sure all splits have been performed; splits after
12351 machine_dependent_reorg might confuse insn length counts. */
12352 split_all_insns_noflow ();
12353
12354 /* Install the main literal pool and the associated base
12355 register load insns.
12356
12357 In addition, there are two problematic situations we need
12358 to correct:
12359
12360 - the literal pool might be > 4096 bytes in size, so that
12361 some of its elements cannot be directly accessed
12362
12363 - a branch target might be > 64K away from the branch, so that
12364 it is not possible to use a PC-relative instruction.
12365
12366 To fix those, we split the single literal pool into multiple
12367 pool chunks, reloading the pool base register at various
12368 points throughout the function to ensure it always points to
12369 the pool chunk the following code expects, and / or replace
12370 PC-relative branches by absolute branches.
12371
12372 However, the two problems are interdependent: splitting the
12373 literal pool can move a branch further away from its target,
12374 causing the 64K limit to overflow, and on the other hand,
12375 replacing a PC-relative branch by an absolute branch means
12376 we need to put the branch target address into the literal
12377 pool, possibly causing it to overflow.
12378
12379 So, we loop trying to fix up both problems until we manage
12380 to satisfy both conditions at the same time. Note that the
12381 loop is guaranteed to terminate as every pass of the loop
12382 strictly decreases the total number of PC-relative branches
12383 in the function. (This is not completely true as there
12384 might be branch-over-pool insns introduced by chunkify_start.
12385 Those never need to be split however.) */
12386
12387 for (;;)
12388 {
12389 struct constant_pool *pool = NULL;
12390
12391 /* Collect the literal pool. */
12392 if (!pool_overflow)
12393 {
12394 pool = s390_mainpool_start ();
12395 if (!pool)
12396 pool_overflow = true;
12397 }
12398
12399 /* If literal pool overflowed, start to chunkify it. */
12400 if (pool_overflow)
12401 pool = s390_chunkify_start ();
12402
12403 /* Split out-of-range branches. If this has created new
12404 literal pool entries, cancel current chunk list and
12405 recompute it. zSeries machines have large branch
12406 instructions, so we never need to split a branch. */
12407 if (!TARGET_CPU_ZARCH && s390_split_branches ())
12408 {
12409 if (pool_overflow)
12410 s390_chunkify_cancel (pool);
12411 else
12412 s390_mainpool_cancel (pool);
12413
12414 continue;
12415 }
12416
12417 /* If we made it up to here, both conditions are satisfied.
12418 Finish up literal pool related changes. */
12419 if (pool_overflow)
12420 s390_chunkify_finish (pool);
12421 else
12422 s390_mainpool_finish (pool);
12423
12424 /* We're done splitting branches. */
12425 cfun->machine->split_branches_pending_p = false;
12426 break;
12427 }
12428
12429 /* Generate out-of-pool execute target insns. */
12430 if (TARGET_CPU_ZARCH)
12431 {
12432 rtx_insn *insn, *target;
12433 rtx label;
12434
12435 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12436 {
12437 label = s390_execute_label (insn);
12438 if (!label)
12439 continue;
12440
12441 gcc_assert (label != const0_rtx);
12442
12443 target = emit_label (XEXP (label, 0));
12444 INSN_ADDRESSES_NEW (target, -1);
12445
12446 target = emit_insn (s390_execute_target (insn));
12447 INSN_ADDRESSES_NEW (target, -1);
12448 }
12449 }
12450
12451 /* Try to optimize prologue and epilogue further. */
12452 s390_optimize_prologue ();
12453
12454 /* Walk over the insns and do some >=z10 specific changes. */
12455 if (s390_tune == PROCESSOR_2097_Z10
12456 || s390_tune == PROCESSOR_2817_Z196
12457 || s390_tune == PROCESSOR_2827_ZEC12
12458 || s390_tune == PROCESSOR_2964_Z13)
12459 {
12460 rtx_insn *insn;
12461 bool insn_added_p = false;
12462
12463 /* The insn lengths and addresses have to be up to date for the
12464 following manipulations. */
12465 shorten_branches (get_insns ());
12466
12467 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12468 {
12469 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
12470 continue;
12471
12472 if (JUMP_P (insn))
12473 insn_added_p |= s390_fix_long_loop_prediction (insn);
12474
12475 if ((GET_CODE (PATTERN (insn)) == PARALLEL
12476 || GET_CODE (PATTERN (insn)) == SET)
12477 && s390_tune == PROCESSOR_2097_Z10)
12478 insn_added_p |= s390_z10_optimize_cmp (insn);
12479 }
12480
12481 /* Adjust branches if we added new instructions. */
12482 if (insn_added_p)
12483 shorten_branches (get_insns ());
12484 }
12485
12486 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
12487 if (hw_after > 0)
12488 {
12489 rtx_insn *insn;
12490
12491 /* Insert NOPs for hotpatching. */
12492 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12493 {
12494 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG)
12495 break;
12496 }
12497 gcc_assert (insn);
12498 /* Output a series of NOPs after the NOTE_INSN_FUNCTION_BEG. */
12499 while (hw_after > 0)
12500 {
12501 if (hw_after >= 3 && TARGET_CPU_ZARCH)
12502 {
12503 insn = emit_insn_after (gen_nop_6_byte (), insn);
12504 hw_after -= 3;
12505 }
12506 else if (hw_after >= 2)
12507 {
12508 insn = emit_insn_after (gen_nop_4_byte (), insn);
12509 hw_after -= 2;
12510 }
12511 else
12512 {
12513 insn = emit_insn_after (gen_nop_2_byte (), insn);
12514 hw_after -= 1;
12515 }
12516 }
12517 gcc_assert (hw_after == 0);
12518 }
12519 }
12520
12521 /* Return true if INSN is a fp load insn writing register REGNO. */
12522 static inline bool
12523 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
12524 {
12525 rtx set;
12526 enum attr_type flag = s390_safe_attr_type (insn);
12527
12528 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
12529 return false;
12530
12531 set = single_set (insn);
12532
12533 if (set == NULL_RTX)
12534 return false;
12535
12536 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
12537 return false;
12538
12539 if (REGNO (SET_DEST (set)) != regno)
12540 return false;
12541
12542 return true;
12543 }
12544
12545 /* This value describes the distance to be avoided between an
12546 aritmetic fp instruction and an fp load writing the same register.
12547 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
12548 fine but the exact value has to be avoided. Otherwise the FP
12549 pipeline will throw an exception causing a major penalty. */
12550 #define Z10_EARLYLOAD_DISTANCE 7
12551
12552 /* Rearrange the ready list in order to avoid the situation described
12553 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
12554 moved to the very end of the ready list. */
12555 static void
12556 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
12557 {
12558 unsigned int regno;
12559 int nready = *nready_p;
12560 rtx_insn *tmp;
12561 int i;
12562 rtx_insn *insn;
12563 rtx set;
12564 enum attr_type flag;
12565 int distance;
12566
12567 /* Skip DISTANCE - 1 active insns. */
12568 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
12569 distance > 0 && insn != NULL_RTX;
12570 distance--, insn = prev_active_insn (insn))
12571 if (CALL_P (insn) || JUMP_P (insn))
12572 return;
12573
12574 if (insn == NULL_RTX)
12575 return;
12576
12577 set = single_set (insn);
12578
12579 if (set == NULL_RTX || !REG_P (SET_DEST (set))
12580 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
12581 return;
12582
12583 flag = s390_safe_attr_type (insn);
12584
12585 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
12586 return;
12587
12588 regno = REGNO (SET_DEST (set));
12589 i = nready - 1;
12590
12591 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
12592 i--;
12593
12594 if (!i)
12595 return;
12596
12597 tmp = ready[i];
12598 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
12599 ready[0] = tmp;
12600 }
12601
12602
12603 /* The s390_sched_state variable tracks the state of the current or
12604 the last instruction group.
12605
12606 0,1,2 number of instructions scheduled in the current group
12607 3 the last group is complete - normal insns
12608 4 the last group was a cracked/expanded insn */
12609
12610 static int s390_sched_state;
12611
12612 #define S390_OOO_SCHED_STATE_NORMAL 3
12613 #define S390_OOO_SCHED_STATE_CRACKED 4
12614
12615 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
12616 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
12617 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
12618 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
12619
12620 static unsigned int
12621 s390_get_sched_attrmask (rtx_insn *insn)
12622 {
12623 unsigned int mask = 0;
12624
12625 if (get_attr_ooo_cracked (insn))
12626 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
12627 if (get_attr_ooo_expanded (insn))
12628 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
12629 if (get_attr_ooo_endgroup (insn))
12630 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
12631 if (get_attr_ooo_groupalone (insn))
12632 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
12633 return mask;
12634 }
12635
12636 /* Return the scheduling score for INSN. The higher the score the
12637 better. The score is calculated from the OOO scheduling attributes
12638 of INSN and the scheduling state s390_sched_state. */
12639 static int
12640 s390_sched_score (rtx_insn *insn)
12641 {
12642 unsigned int mask = s390_get_sched_attrmask (insn);
12643 int score = 0;
12644
12645 switch (s390_sched_state)
12646 {
12647 case 0:
12648 /* Try to put insns into the first slot which would otherwise
12649 break a group. */
12650 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
12651 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
12652 score += 5;
12653 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
12654 score += 10;
12655 case 1:
12656 /* Prefer not cracked insns while trying to put together a
12657 group. */
12658 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
12659 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
12660 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
12661 score += 10;
12662 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
12663 score += 5;
12664 break;
12665 case 2:
12666 /* Prefer not cracked insns while trying to put together a
12667 group. */
12668 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
12669 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
12670 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
12671 score += 10;
12672 /* Prefer endgroup insns in the last slot. */
12673 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
12674 score += 10;
12675 break;
12676 case S390_OOO_SCHED_STATE_NORMAL:
12677 /* Prefer not cracked insns if the last was not cracked. */
12678 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
12679 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
12680 score += 5;
12681 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
12682 score += 10;
12683 break;
12684 case S390_OOO_SCHED_STATE_CRACKED:
12685 /* Try to keep cracked insns together to prevent them from
12686 interrupting groups. */
12687 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
12688 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
12689 score += 5;
12690 break;
12691 }
12692 return score;
12693 }
12694
12695 /* This function is called via hook TARGET_SCHED_REORDER before
12696 issuing one insn from list READY which contains *NREADYP entries.
12697 For target z10 it reorders load instructions to avoid early load
12698 conflicts in the floating point pipeline */
12699 static int
12700 s390_sched_reorder (FILE *file, int verbose,
12701 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
12702 {
12703 if (s390_tune == PROCESSOR_2097_Z10)
12704 if (reload_completed && *nreadyp > 1)
12705 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
12706
12707 if ((s390_tune == PROCESSOR_2827_ZEC12
12708 || s390_tune == PROCESSOR_2964_Z13)
12709 && reload_completed
12710 && *nreadyp > 1)
12711 {
12712 int i;
12713 int last_index = *nreadyp - 1;
12714 int max_index = -1;
12715 int max_score = -1;
12716 rtx_insn *tmp;
12717
12718 /* Just move the insn with the highest score to the top (the
12719 end) of the list. A full sort is not needed since a conflict
12720 in the hazard recognition cannot happen. So the top insn in
12721 the ready list will always be taken. */
12722 for (i = last_index; i >= 0; i--)
12723 {
12724 int score;
12725
12726 if (recog_memoized (ready[i]) < 0)
12727 continue;
12728
12729 score = s390_sched_score (ready[i]);
12730 if (score > max_score)
12731 {
12732 max_score = score;
12733 max_index = i;
12734 }
12735 }
12736
12737 if (max_index != -1)
12738 {
12739 if (max_index != last_index)
12740 {
12741 tmp = ready[max_index];
12742 ready[max_index] = ready[last_index];
12743 ready[last_index] = tmp;
12744
12745 if (verbose > 5)
12746 fprintf (file,
12747 "move insn %d to the top of list\n",
12748 INSN_UID (ready[last_index]));
12749 }
12750 else if (verbose > 5)
12751 fprintf (file,
12752 "best insn %d already on top\n",
12753 INSN_UID (ready[last_index]));
12754 }
12755
12756 if (verbose > 5)
12757 {
12758 fprintf (file, "ready list ooo attributes - sched state: %d\n",
12759 s390_sched_state);
12760
12761 for (i = last_index; i >= 0; i--)
12762 {
12763 if (recog_memoized (ready[i]) < 0)
12764 continue;
12765 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
12766 s390_sched_score (ready[i]));
12767 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
12768 PRINT_OOO_ATTR (ooo_cracked);
12769 PRINT_OOO_ATTR (ooo_expanded);
12770 PRINT_OOO_ATTR (ooo_endgroup);
12771 PRINT_OOO_ATTR (ooo_groupalone);
12772 #undef PRINT_OOO_ATTR
12773 fprintf (file, "\n");
12774 }
12775 }
12776 }
12777
12778 return s390_issue_rate ();
12779 }
12780
12781
12782 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
12783 the scheduler has issued INSN. It stores the last issued insn into
12784 last_scheduled_insn in order to make it available for
12785 s390_sched_reorder. */
12786 static int
12787 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
12788 {
12789 last_scheduled_insn = insn;
12790
12791 if ((s390_tune == PROCESSOR_2827_ZEC12
12792 || s390_tune == PROCESSOR_2964_Z13)
12793 && reload_completed
12794 && recog_memoized (insn) >= 0)
12795 {
12796 unsigned int mask = s390_get_sched_attrmask (insn);
12797
12798 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
12799 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
12800 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
12801 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
12802 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
12803 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
12804 else
12805 {
12806 /* Only normal insns are left (mask == 0). */
12807 switch (s390_sched_state)
12808 {
12809 case 0:
12810 case 1:
12811 case 2:
12812 case S390_OOO_SCHED_STATE_NORMAL:
12813 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
12814 s390_sched_state = 1;
12815 else
12816 s390_sched_state++;
12817
12818 break;
12819 case S390_OOO_SCHED_STATE_CRACKED:
12820 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
12821 break;
12822 }
12823 }
12824 if (verbose > 5)
12825 {
12826 fprintf (file, "insn %d: ", INSN_UID (insn));
12827 #define PRINT_OOO_ATTR(ATTR) \
12828 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
12829 PRINT_OOO_ATTR (ooo_cracked);
12830 PRINT_OOO_ATTR (ooo_expanded);
12831 PRINT_OOO_ATTR (ooo_endgroup);
12832 PRINT_OOO_ATTR (ooo_groupalone);
12833 #undef PRINT_OOO_ATTR
12834 fprintf (file, "\n");
12835 fprintf (file, "sched state: %d\n", s390_sched_state);
12836 }
12837 }
12838
12839 if (GET_CODE (PATTERN (insn)) != USE
12840 && GET_CODE (PATTERN (insn)) != CLOBBER)
12841 return more - 1;
12842 else
12843 return more;
12844 }
12845
12846 static void
12847 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
12848 int verbose ATTRIBUTE_UNUSED,
12849 int max_ready ATTRIBUTE_UNUSED)
12850 {
12851 last_scheduled_insn = NULL;
12852 s390_sched_state = 0;
12853 }
12854
12855 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
12856 a new number struct loop *loop should be unrolled if tuned for cpus with
12857 a built-in stride prefetcher.
12858 The loop is analyzed for memory accesses by calling check_dpu for
12859 each rtx of the loop. Depending on the loop_depth and the amount of
12860 memory accesses a new number <=nunroll is returned to improve the
12861 behaviour of the hardware prefetch unit. */
12862 static unsigned
12863 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
12864 {
12865 basic_block *bbs;
12866 rtx_insn *insn;
12867 unsigned i;
12868 unsigned mem_count = 0;
12869
12870 if (s390_tune != PROCESSOR_2097_Z10
12871 && s390_tune != PROCESSOR_2817_Z196
12872 && s390_tune != PROCESSOR_2827_ZEC12
12873 && s390_tune != PROCESSOR_2964_Z13)
12874 return nunroll;
12875
12876 /* Count the number of memory references within the loop body. */
12877 bbs = get_loop_body (loop);
12878 subrtx_iterator::array_type array;
12879 for (i = 0; i < loop->num_nodes; i++)
12880 FOR_BB_INSNS (bbs[i], insn)
12881 if (INSN_P (insn) && INSN_CODE (insn) != -1)
12882 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
12883 if (MEM_P (*iter))
12884 mem_count += 1;
12885 free (bbs);
12886
12887 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
12888 if (mem_count == 0)
12889 return nunroll;
12890
12891 switch (loop_depth(loop))
12892 {
12893 case 1:
12894 return MIN (nunroll, 28 / mem_count);
12895 case 2:
12896 return MIN (nunroll, 22 / mem_count);
12897 default:
12898 return MIN (nunroll, 16 / mem_count);
12899 }
12900 }
12901
12902 static void
12903 s390_option_override (void)
12904 {
12905 unsigned int i;
12906 cl_deferred_option *opt;
12907 vec<cl_deferred_option> *v =
12908 (vec<cl_deferred_option> *) s390_deferred_options;
12909
12910 if (v)
12911 FOR_EACH_VEC_ELT (*v, i, opt)
12912 {
12913 switch (opt->opt_index)
12914 {
12915 case OPT_mhotpatch_:
12916 {
12917 int val1;
12918 int val2;
12919 char s[256];
12920 char *t;
12921
12922 strncpy (s, opt->arg, 256);
12923 s[255] = 0;
12924 t = strchr (s, ',');
12925 if (t != NULL)
12926 {
12927 *t = 0;
12928 t++;
12929 val1 = integral_argument (s);
12930 val2 = integral_argument (t);
12931 }
12932 else
12933 {
12934 val1 = -1;
12935 val2 = -1;
12936 }
12937 if (val1 == -1 || val2 == -1)
12938 {
12939 /* argument is not a plain number */
12940 error ("arguments to %qs should be non-negative integers",
12941 "-mhotpatch=n,m");
12942 break;
12943 }
12944 else if (val1 > s390_hotpatch_hw_max
12945 || val2 > s390_hotpatch_hw_max)
12946 {
12947 error ("argument to %qs is too large (max. %d)",
12948 "-mhotpatch=n,m", s390_hotpatch_hw_max);
12949 break;
12950 }
12951 s390_hotpatch_hw_before_label = val1;
12952 s390_hotpatch_hw_after_label = val2;
12953 break;
12954 }
12955 default:
12956 gcc_unreachable ();
12957 }
12958 }
12959
12960 /* Set up function hooks. */
12961 init_machine_status = s390_init_machine_status;
12962
12963 /* Architecture mode defaults according to ABI. */
12964 if (!(target_flags_explicit & MASK_ZARCH))
12965 {
12966 if (TARGET_64BIT)
12967 target_flags |= MASK_ZARCH;
12968 else
12969 target_flags &= ~MASK_ZARCH;
12970 }
12971
12972 /* Set the march default in case it hasn't been specified on
12973 cmdline. */
12974 if (s390_arch == PROCESSOR_max)
12975 {
12976 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
12977 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
12978 s390_arch_flags = processor_flags_table[(int)s390_arch];
12979 }
12980
12981 /* Determine processor to tune for. */
12982 if (s390_tune == PROCESSOR_max)
12983 {
12984 s390_tune = s390_arch;
12985 s390_tune_flags = s390_arch_flags;
12986 }
12987
12988 /* Sanity checks. */
12989 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
12990 error ("z/Architecture mode not supported on %s", s390_arch_string);
12991 if (TARGET_64BIT && !TARGET_ZARCH)
12992 error ("64-bit ABI not supported in ESA/390 mode");
12993
12994 /* Use hardware DFP if available and not explicitly disabled by
12995 user. E.g. with -m31 -march=z10 -mzarch */
12996 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
12997 target_flags |= MASK_HARD_DFP;
12998
12999 /* Enable hardware transactions if available and not explicitly
13000 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
13001 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
13002 target_flags |= MASK_OPT_HTM;
13003
13004 if (target_flags_explicit & MASK_OPT_VX)
13005 {
13006 if (TARGET_OPT_VX)
13007 {
13008 if (!TARGET_CPU_VX)
13009 error ("hardware vector support not available on %s",
13010 s390_arch_string);
13011 if (TARGET_SOFT_FLOAT)
13012 error ("hardware vector support not available with -msoft-float");
13013 }
13014 }
13015 else if (TARGET_CPU_VX)
13016 /* Enable vector support if available and not explicitly disabled
13017 by user. E.g. with -m31 -march=z13 -mzarch */
13018 target_flags |= MASK_OPT_VX;
13019
13020 if (TARGET_HARD_DFP && !TARGET_DFP)
13021 {
13022 if (target_flags_explicit & MASK_HARD_DFP)
13023 {
13024 if (!TARGET_CPU_DFP)
13025 error ("hardware decimal floating point instructions"
13026 " not available on %s", s390_arch_string);
13027 if (!TARGET_ZARCH)
13028 error ("hardware decimal floating point instructions"
13029 " not available in ESA/390 mode");
13030 }
13031 else
13032 target_flags &= ~MASK_HARD_DFP;
13033 }
13034
13035 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
13036 {
13037 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
13038 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13039
13040 target_flags &= ~MASK_HARD_DFP;
13041 }
13042
13043 /* Set processor cost function. */
13044 switch (s390_tune)
13045 {
13046 case PROCESSOR_2084_Z990:
13047 s390_cost = &z990_cost;
13048 break;
13049 case PROCESSOR_2094_Z9_109:
13050 s390_cost = &z9_109_cost;
13051 break;
13052 case PROCESSOR_2097_Z10:
13053 s390_cost = &z10_cost;
13054 break;
13055 case PROCESSOR_2817_Z196:
13056 s390_cost = &z196_cost;
13057 break;
13058 case PROCESSOR_2827_ZEC12:
13059 case PROCESSOR_2964_Z13:
13060 s390_cost = &zEC12_cost;
13061 break;
13062 default:
13063 s390_cost = &z900_cost;
13064 }
13065
13066 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
13067 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13068 "in combination");
13069
13070 if (s390_stack_size)
13071 {
13072 if (s390_stack_guard >= s390_stack_size)
13073 error ("stack size must be greater than the stack guard value");
13074 else if (s390_stack_size > 1 << 16)
13075 error ("stack size must not be greater than 64k");
13076 }
13077 else if (s390_stack_guard)
13078 error ("-mstack-guard implies use of -mstack-size");
13079
13080 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
13081 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
13082 target_flags |= MASK_LONG_DOUBLE_128;
13083 #endif
13084
13085 if (s390_tune == PROCESSOR_2097_Z10
13086 || s390_tune == PROCESSOR_2817_Z196
13087 || s390_tune == PROCESSOR_2827_ZEC12
13088 || s390_tune == PROCESSOR_2964_Z13)
13089 {
13090 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
13091 global_options.x_param_values,
13092 global_options_set.x_param_values);
13093 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
13094 global_options.x_param_values,
13095 global_options_set.x_param_values);
13096 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
13097 global_options.x_param_values,
13098 global_options_set.x_param_values);
13099 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
13100 global_options.x_param_values,
13101 global_options_set.x_param_values);
13102 }
13103
13104 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
13105 global_options.x_param_values,
13106 global_options_set.x_param_values);
13107 /* values for loop prefetching */
13108 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
13109 global_options.x_param_values,
13110 global_options_set.x_param_values);
13111 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
13112 global_options.x_param_values,
13113 global_options_set.x_param_values);
13114 /* s390 has more than 2 levels and the size is much larger. Since
13115 we are always running virtualized assume that we only get a small
13116 part of the caches above l1. */
13117 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
13118 global_options.x_param_values,
13119 global_options_set.x_param_values);
13120 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
13121 global_options.x_param_values,
13122 global_options_set.x_param_values);
13123 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
13124 global_options.x_param_values,
13125 global_options_set.x_param_values);
13126
13127 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
13128 requires the arch flags to be evaluated already. Since prefetching
13129 is beneficial on s390, we enable it if available. */
13130 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
13131 flag_prefetch_loop_arrays = 1;
13132
13133 /* Use the alternative scheduling-pressure algorithm by default. */
13134 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
13135 global_options.x_param_values,
13136 global_options_set.x_param_values);
13137
13138 if (TARGET_TPF)
13139 {
13140 /* Don't emit DWARF3/4 unless specifically selected. The TPF
13141 debuggers do not yet support DWARF 3/4. */
13142 if (!global_options_set.x_dwarf_strict)
13143 dwarf_strict = 1;
13144 if (!global_options_set.x_dwarf_version)
13145 dwarf_version = 2;
13146 }
13147
13148 /* Register a target-specific optimization-and-lowering pass
13149 to run immediately before prologue and epilogue generation.
13150
13151 Registering the pass must be done at start up. It's
13152 convenient to do it here. */
13153 opt_pass *new_pass = new pass_s390_early_mach (g);
13154 struct register_pass_info insert_pass_s390_early_mach =
13155 {
13156 new_pass, /* pass */
13157 "pro_and_epilogue", /* reference_pass_name */
13158 1, /* ref_pass_instance_number */
13159 PASS_POS_INSERT_BEFORE /* po_op */
13160 };
13161 register_pass (&insert_pass_s390_early_mach);
13162 }
13163
13164 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13165
13166 static bool
13167 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13168 unsigned int align ATTRIBUTE_UNUSED,
13169 enum by_pieces_operation op ATTRIBUTE_UNUSED,
13170 bool speed_p ATTRIBUTE_UNUSED)
13171 {
13172 return (size == 1 || size == 2
13173 || size == 4 || (TARGET_ZARCH && size == 8));
13174 }
13175
13176 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13177
13178 static void
13179 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13180 {
13181 tree sfpc = s390_builtin_decls[S390_BUILTIN_S390_SFPC];
13182 tree efpc = s390_builtin_decls[S390_BUILTIN_S390_EFPC];
13183 tree call_efpc = build_call_expr (efpc, 0);
13184 tree fenv_var = create_tmp_var (unsigned_type_node);
13185
13186 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
13187 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
13188 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
13189 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
13190 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
13191 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
13192
13193 /* Generates the equivalent of feholdexcept (&fenv_var)
13194
13195 fenv_var = __builtin_s390_efpc ();
13196 __builtin_s390_sfpc (fenv_var & mask) */
13197 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
13198 tree new_fpc =
13199 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13200 build_int_cst (unsigned_type_node,
13201 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
13202 FPC_EXCEPTION_MASK)));
13203 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
13204 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
13205
13206 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
13207
13208 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
13209 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
13210 build_int_cst (unsigned_type_node,
13211 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
13212 *clear = build_call_expr (sfpc, 1, new_fpc);
13213
13214 /* Generates the equivalent of feupdateenv (fenv_var)
13215
13216 old_fpc = __builtin_s390_efpc ();
13217 __builtin_s390_sfpc (fenv_var);
13218 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
13219
13220 old_fpc = create_tmp_var (unsigned_type_node);
13221 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
13222 old_fpc, call_efpc);
13223
13224 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
13225
13226 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
13227 build_int_cst (unsigned_type_node,
13228 FPC_FLAGS_MASK));
13229 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
13230 build_int_cst (unsigned_type_node,
13231 FPC_FLAGS_SHIFT));
13232 tree atomic_feraiseexcept
13233 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13234 raise_old_except = build_call_expr (atomic_feraiseexcept,
13235 1, raise_old_except);
13236
13237 *update = build2 (COMPOUND_EXPR, void_type_node,
13238 build2 (COMPOUND_EXPR, void_type_node,
13239 store_old_fpc, set_new_fpc),
13240 raise_old_except);
13241
13242 #undef FPC_EXCEPTION_MASK
13243 #undef FPC_FLAGS_MASK
13244 #undef FPC_DXC_MASK
13245 #undef FPC_EXCEPTION_MASK_SHIFT
13246 #undef FPC_FLAGS_SHIFT
13247 #undef FPC_DXC_SHIFT
13248 }
13249
13250 /* Return the vector mode to be used for inner mode MODE when doing
13251 vectorization. */
13252 static machine_mode
13253 s390_preferred_simd_mode (machine_mode mode)
13254 {
13255 if (TARGET_VX)
13256 switch (mode)
13257 {
13258 case DFmode:
13259 return V2DFmode;
13260 case DImode:
13261 return V2DImode;
13262 case SImode:
13263 return V4SImode;
13264 case HImode:
13265 return V8HImode;
13266 case QImode:
13267 return V16QImode;
13268 default:;
13269 }
13270 return word_mode;
13271 }
13272
13273 /* Our hardware does not require vectors to be strictly aligned. */
13274 static bool
13275 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
13276 const_tree type ATTRIBUTE_UNUSED,
13277 int misalignment ATTRIBUTE_UNUSED,
13278 bool is_packed ATTRIBUTE_UNUSED)
13279 {
13280 return true;
13281 }
13282
13283 /* The vector ABI requires vector types to be aligned on an 8 byte
13284 boundary (our stack alignment). However, we allow this to be
13285 overriden by the user, while this definitely breaks the ABI. */
13286 static HOST_WIDE_INT
13287 s390_vector_alignment (const_tree type)
13288 {
13289 if (!TARGET_VX_ABI)
13290 return default_vector_alignment (type);
13291
13292 if (TYPE_USER_ALIGN (type))
13293 return TYPE_ALIGN (type);
13294
13295 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
13296 }
13297
13298
13299 /* Initialize GCC target structure. */
13300
13301 #undef TARGET_ASM_ALIGNED_HI_OP
13302 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
13303 #undef TARGET_ASM_ALIGNED_DI_OP
13304 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
13305 #undef TARGET_ASM_INTEGER
13306 #define TARGET_ASM_INTEGER s390_assemble_integer
13307
13308 #undef TARGET_ASM_OPEN_PAREN
13309 #define TARGET_ASM_OPEN_PAREN ""
13310
13311 #undef TARGET_ASM_CLOSE_PAREN
13312 #define TARGET_ASM_CLOSE_PAREN ""
13313
13314 #undef TARGET_OPTION_OVERRIDE
13315 #define TARGET_OPTION_OVERRIDE s390_option_override
13316
13317 #undef TARGET_ENCODE_SECTION_INFO
13318 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
13319
13320 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13321 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13322
13323 #ifdef HAVE_AS_TLS
13324 #undef TARGET_HAVE_TLS
13325 #define TARGET_HAVE_TLS true
13326 #endif
13327 #undef TARGET_CANNOT_FORCE_CONST_MEM
13328 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
13329
13330 #undef TARGET_DELEGITIMIZE_ADDRESS
13331 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
13332
13333 #undef TARGET_LEGITIMIZE_ADDRESS
13334 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
13335
13336 #undef TARGET_RETURN_IN_MEMORY
13337 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
13338
13339 #undef TARGET_INIT_BUILTINS
13340 #define TARGET_INIT_BUILTINS s390_init_builtins
13341 #undef TARGET_EXPAND_BUILTIN
13342 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
13343 #undef TARGET_BUILTIN_DECL
13344 #define TARGET_BUILTIN_DECL s390_builtin_decl
13345
13346 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
13347 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
13348
13349 #undef TARGET_ASM_OUTPUT_MI_THUNK
13350 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
13351 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13352 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
13353
13354 #undef TARGET_SCHED_ADJUST_PRIORITY
13355 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
13356 #undef TARGET_SCHED_ISSUE_RATE
13357 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
13358 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13359 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
13360
13361 #undef TARGET_SCHED_VARIABLE_ISSUE
13362 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
13363 #undef TARGET_SCHED_REORDER
13364 #define TARGET_SCHED_REORDER s390_sched_reorder
13365 #undef TARGET_SCHED_INIT
13366 #define TARGET_SCHED_INIT s390_sched_init
13367
13368 #undef TARGET_CANNOT_COPY_INSN_P
13369 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
13370 #undef TARGET_RTX_COSTS
13371 #define TARGET_RTX_COSTS s390_rtx_costs
13372 #undef TARGET_ADDRESS_COST
13373 #define TARGET_ADDRESS_COST s390_address_cost
13374 #undef TARGET_REGISTER_MOVE_COST
13375 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
13376 #undef TARGET_MEMORY_MOVE_COST
13377 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
13378
13379 #undef TARGET_MACHINE_DEPENDENT_REORG
13380 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
13381
13382 #undef TARGET_VALID_POINTER_MODE
13383 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
13384
13385 #undef TARGET_BUILD_BUILTIN_VA_LIST
13386 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
13387 #undef TARGET_EXPAND_BUILTIN_VA_START
13388 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
13389 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13390 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
13391
13392 #undef TARGET_PROMOTE_FUNCTION_MODE
13393 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
13394 #undef TARGET_PASS_BY_REFERENCE
13395 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
13396
13397 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13398 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
13399 #undef TARGET_FUNCTION_ARG
13400 #define TARGET_FUNCTION_ARG s390_function_arg
13401 #undef TARGET_FUNCTION_ARG_ADVANCE
13402 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
13403 #undef TARGET_FUNCTION_VALUE
13404 #define TARGET_FUNCTION_VALUE s390_function_value
13405 #undef TARGET_LIBCALL_VALUE
13406 #define TARGET_LIBCALL_VALUE s390_libcall_value
13407 #undef TARGET_STRICT_ARGUMENT_NAMING
13408 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
13409
13410 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
13411 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
13412
13413 #undef TARGET_FIXED_CONDITION_CODE_REGS
13414 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
13415
13416 #undef TARGET_CC_MODES_COMPATIBLE
13417 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
13418
13419 #undef TARGET_INVALID_WITHIN_DOLOOP
13420 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
13421
13422 #ifdef HAVE_AS_TLS
13423 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
13424 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
13425 #endif
13426
13427 #undef TARGET_DWARF_FRAME_REG_MODE
13428 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
13429
13430 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
13431 #undef TARGET_MANGLE_TYPE
13432 #define TARGET_MANGLE_TYPE s390_mangle_type
13433 #endif
13434
13435 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13436 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13437
13438 #undef TARGET_VECTOR_MODE_SUPPORTED_P
13439 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
13440
13441 #undef TARGET_PREFERRED_RELOAD_CLASS
13442 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
13443
13444 #undef TARGET_SECONDARY_RELOAD
13445 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
13446
13447 #undef TARGET_LIBGCC_CMP_RETURN_MODE
13448 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
13449
13450 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
13451 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
13452
13453 #undef TARGET_LEGITIMATE_ADDRESS_P
13454 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
13455
13456 #undef TARGET_LEGITIMATE_CONSTANT_P
13457 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
13458
13459 #undef TARGET_LRA_P
13460 #define TARGET_LRA_P s390_lra_p
13461
13462 #undef TARGET_CAN_ELIMINATE
13463 #define TARGET_CAN_ELIMINATE s390_can_eliminate
13464
13465 #undef TARGET_CONDITIONAL_REGISTER_USAGE
13466 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
13467
13468 #undef TARGET_LOOP_UNROLL_ADJUST
13469 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
13470
13471 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
13472 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
13473 #undef TARGET_TRAMPOLINE_INIT
13474 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
13475
13476 #undef TARGET_UNWIND_WORD_MODE
13477 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
13478
13479 #undef TARGET_CANONICALIZE_COMPARISON
13480 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
13481
13482 #undef TARGET_HARD_REGNO_SCRATCH_OK
13483 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
13484
13485 #undef TARGET_ATTRIBUTE_TABLE
13486 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
13487
13488 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
13489 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
13490
13491 #undef TARGET_SET_UP_BY_PROLOGUE
13492 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
13493
13494 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
13495 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
13496 s390_use_by_pieces_infrastructure_p
13497
13498 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
13499 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
13500
13501 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
13502 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
13503
13504 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
13505 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
13506
13507 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
13508 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
13509
13510 #undef TARGET_VECTOR_ALIGNMENT
13511 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
13512
13513 struct gcc_target targetm = TARGET_INITIALIZER;
13514
13515 #include "gt-s390.h"