s390: Constraints, predicates, and op letters for contiguous bitmasks
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
3 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
5 Ulrich Weigand (uweigand@de.ibm.com) and
6 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "tm_p.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "reload.h"
43 #include "diagnostic-core.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "debug.h"
49 #include "langhooks.h"
50 #include "optabs.h"
51 #include "gimple.h"
52 #include "df.h"
53 #include "params.h"
54 #include "cfgloop.h"
55 #include "opts.h"
56
57 /* Define the specific costs for a given cpu. */
58
59 struct processor_costs
60 {
61 /* multiplication */
62 const int m; /* cost of an M instruction. */
63 const int mghi; /* cost of an MGHI instruction. */
64 const int mh; /* cost of an MH instruction. */
65 const int mhi; /* cost of an MHI instruction. */
66 const int ml; /* cost of an ML instruction. */
67 const int mr; /* cost of an MR instruction. */
68 const int ms; /* cost of an MS instruction. */
69 const int msg; /* cost of an MSG instruction. */
70 const int msgf; /* cost of an MSGF instruction. */
71 const int msgfr; /* cost of an MSGFR instruction. */
72 const int msgr; /* cost of an MSGR instruction. */
73 const int msr; /* cost of an MSR instruction. */
74 const int mult_df; /* cost of multiplication in DFmode. */
75 const int mxbr;
76 /* square root */
77 const int sqxbr; /* cost of square root in TFmode. */
78 const int sqdbr; /* cost of square root in DFmode. */
79 const int sqebr; /* cost of square root in SFmode. */
80 /* multiply and add */
81 const int madbr; /* cost of multiply and add in DFmode. */
82 const int maebr; /* cost of multiply and add in SFmode. */
83 /* division */
84 const int dxbr;
85 const int ddbr;
86 const int debr;
87 const int dlgr;
88 const int dlr;
89 const int dr;
90 const int dsgfr;
91 const int dsgr;
92 };
93
94 const struct processor_costs *s390_cost;
95
96 static const
97 struct processor_costs z900_cost =
98 {
99 COSTS_N_INSNS (5), /* M */
100 COSTS_N_INSNS (10), /* MGHI */
101 COSTS_N_INSNS (5), /* MH */
102 COSTS_N_INSNS (4), /* MHI */
103 COSTS_N_INSNS (5), /* ML */
104 COSTS_N_INSNS (5), /* MR */
105 COSTS_N_INSNS (4), /* MS */
106 COSTS_N_INSNS (15), /* MSG */
107 COSTS_N_INSNS (7), /* MSGF */
108 COSTS_N_INSNS (7), /* MSGFR */
109 COSTS_N_INSNS (10), /* MSGR */
110 COSTS_N_INSNS (4), /* MSR */
111 COSTS_N_INSNS (7), /* multiplication in DFmode */
112 COSTS_N_INSNS (13), /* MXBR */
113 COSTS_N_INSNS (136), /* SQXBR */
114 COSTS_N_INSNS (44), /* SQDBR */
115 COSTS_N_INSNS (35), /* SQEBR */
116 COSTS_N_INSNS (18), /* MADBR */
117 COSTS_N_INSNS (13), /* MAEBR */
118 COSTS_N_INSNS (134), /* DXBR */
119 COSTS_N_INSNS (30), /* DDBR */
120 COSTS_N_INSNS (27), /* DEBR */
121 COSTS_N_INSNS (220), /* DLGR */
122 COSTS_N_INSNS (34), /* DLR */
123 COSTS_N_INSNS (34), /* DR */
124 COSTS_N_INSNS (32), /* DSGFR */
125 COSTS_N_INSNS (32), /* DSGR */
126 };
127
128 static const
129 struct processor_costs z990_cost =
130 {
131 COSTS_N_INSNS (4), /* M */
132 COSTS_N_INSNS (2), /* MGHI */
133 COSTS_N_INSNS (2), /* MH */
134 COSTS_N_INSNS (2), /* MHI */
135 COSTS_N_INSNS (4), /* ML */
136 COSTS_N_INSNS (4), /* MR */
137 COSTS_N_INSNS (5), /* MS */
138 COSTS_N_INSNS (6), /* MSG */
139 COSTS_N_INSNS (4), /* MSGF */
140 COSTS_N_INSNS (4), /* MSGFR */
141 COSTS_N_INSNS (4), /* MSGR */
142 COSTS_N_INSNS (4), /* MSR */
143 COSTS_N_INSNS (1), /* multiplication in DFmode */
144 COSTS_N_INSNS (28), /* MXBR */
145 COSTS_N_INSNS (130), /* SQXBR */
146 COSTS_N_INSNS (66), /* SQDBR */
147 COSTS_N_INSNS (38), /* SQEBR */
148 COSTS_N_INSNS (1), /* MADBR */
149 COSTS_N_INSNS (1), /* MAEBR */
150 COSTS_N_INSNS (60), /* DXBR */
151 COSTS_N_INSNS (40), /* DDBR */
152 COSTS_N_INSNS (26), /* DEBR */
153 COSTS_N_INSNS (176), /* DLGR */
154 COSTS_N_INSNS (31), /* DLR */
155 COSTS_N_INSNS (31), /* DR */
156 COSTS_N_INSNS (31), /* DSGFR */
157 COSTS_N_INSNS (31), /* DSGR */
158 };
159
160 static const
161 struct processor_costs z9_109_cost =
162 {
163 COSTS_N_INSNS (4), /* M */
164 COSTS_N_INSNS (2), /* MGHI */
165 COSTS_N_INSNS (2), /* MH */
166 COSTS_N_INSNS (2), /* MHI */
167 COSTS_N_INSNS (4), /* ML */
168 COSTS_N_INSNS (4), /* MR */
169 COSTS_N_INSNS (5), /* MS */
170 COSTS_N_INSNS (6), /* MSG */
171 COSTS_N_INSNS (4), /* MSGF */
172 COSTS_N_INSNS (4), /* MSGFR */
173 COSTS_N_INSNS (4), /* MSGR */
174 COSTS_N_INSNS (4), /* MSR */
175 COSTS_N_INSNS (1), /* multiplication in DFmode */
176 COSTS_N_INSNS (28), /* MXBR */
177 COSTS_N_INSNS (130), /* SQXBR */
178 COSTS_N_INSNS (66), /* SQDBR */
179 COSTS_N_INSNS (38), /* SQEBR */
180 COSTS_N_INSNS (1), /* MADBR */
181 COSTS_N_INSNS (1), /* MAEBR */
182 COSTS_N_INSNS (60), /* DXBR */
183 COSTS_N_INSNS (40), /* DDBR */
184 COSTS_N_INSNS (26), /* DEBR */
185 COSTS_N_INSNS (30), /* DLGR */
186 COSTS_N_INSNS (23), /* DLR */
187 COSTS_N_INSNS (23), /* DR */
188 COSTS_N_INSNS (24), /* DSGFR */
189 COSTS_N_INSNS (24), /* DSGR */
190 };
191
192 static const
193 struct processor_costs z10_cost =
194 {
195 COSTS_N_INSNS (10), /* M */
196 COSTS_N_INSNS (10), /* MGHI */
197 COSTS_N_INSNS (10), /* MH */
198 COSTS_N_INSNS (10), /* MHI */
199 COSTS_N_INSNS (10), /* ML */
200 COSTS_N_INSNS (10), /* MR */
201 COSTS_N_INSNS (10), /* MS */
202 COSTS_N_INSNS (10), /* MSG */
203 COSTS_N_INSNS (10), /* MSGF */
204 COSTS_N_INSNS (10), /* MSGFR */
205 COSTS_N_INSNS (10), /* MSGR */
206 COSTS_N_INSNS (10), /* MSR */
207 COSTS_N_INSNS (1) , /* multiplication in DFmode */
208 COSTS_N_INSNS (50), /* MXBR */
209 COSTS_N_INSNS (120), /* SQXBR */
210 COSTS_N_INSNS (52), /* SQDBR */
211 COSTS_N_INSNS (38), /* SQEBR */
212 COSTS_N_INSNS (1), /* MADBR */
213 COSTS_N_INSNS (1), /* MAEBR */
214 COSTS_N_INSNS (111), /* DXBR */
215 COSTS_N_INSNS (39), /* DDBR */
216 COSTS_N_INSNS (32), /* DEBR */
217 COSTS_N_INSNS (160), /* DLGR */
218 COSTS_N_INSNS (71), /* DLR */
219 COSTS_N_INSNS (71), /* DR */
220 COSTS_N_INSNS (71), /* DSGFR */
221 COSTS_N_INSNS (71), /* DSGR */
222 };
223
224 static const
225 struct processor_costs z196_cost =
226 {
227 COSTS_N_INSNS (7), /* M */
228 COSTS_N_INSNS (5), /* MGHI */
229 COSTS_N_INSNS (5), /* MH */
230 COSTS_N_INSNS (5), /* MHI */
231 COSTS_N_INSNS (7), /* ML */
232 COSTS_N_INSNS (7), /* MR */
233 COSTS_N_INSNS (6), /* MS */
234 COSTS_N_INSNS (8), /* MSG */
235 COSTS_N_INSNS (6), /* MSGF */
236 COSTS_N_INSNS (6), /* MSGFR */
237 COSTS_N_INSNS (8), /* MSGR */
238 COSTS_N_INSNS (6), /* MSR */
239 COSTS_N_INSNS (1) , /* multiplication in DFmode */
240 COSTS_N_INSNS (40), /* MXBR B+40 */
241 COSTS_N_INSNS (100), /* SQXBR B+100 */
242 COSTS_N_INSNS (42), /* SQDBR B+42 */
243 COSTS_N_INSNS (28), /* SQEBR B+28 */
244 COSTS_N_INSNS (1), /* MADBR B */
245 COSTS_N_INSNS (1), /* MAEBR B */
246 COSTS_N_INSNS (101), /* DXBR B+101 */
247 COSTS_N_INSNS (29), /* DDBR */
248 COSTS_N_INSNS (22), /* DEBR */
249 COSTS_N_INSNS (160), /* DLGR cracked */
250 COSTS_N_INSNS (160), /* DLR cracked */
251 COSTS_N_INSNS (160), /* DR expanded */
252 COSTS_N_INSNS (160), /* DSGFR cracked */
253 COSTS_N_INSNS (160), /* DSGR cracked */
254 };
255
256 static const
257 struct processor_costs zEC12_cost =
258 {
259 COSTS_N_INSNS (7), /* M */
260 COSTS_N_INSNS (5), /* MGHI */
261 COSTS_N_INSNS (5), /* MH */
262 COSTS_N_INSNS (5), /* MHI */
263 COSTS_N_INSNS (7), /* ML */
264 COSTS_N_INSNS (7), /* MR */
265 COSTS_N_INSNS (6), /* MS */
266 COSTS_N_INSNS (8), /* MSG */
267 COSTS_N_INSNS (6), /* MSGF */
268 COSTS_N_INSNS (6), /* MSGFR */
269 COSTS_N_INSNS (8), /* MSGR */
270 COSTS_N_INSNS (6), /* MSR */
271 COSTS_N_INSNS (1) , /* multiplication in DFmode */
272 COSTS_N_INSNS (40), /* MXBR B+40 */
273 COSTS_N_INSNS (100), /* SQXBR B+100 */
274 COSTS_N_INSNS (42), /* SQDBR B+42 */
275 COSTS_N_INSNS (28), /* SQEBR B+28 */
276 COSTS_N_INSNS (1), /* MADBR B */
277 COSTS_N_INSNS (1), /* MAEBR B */
278 COSTS_N_INSNS (131), /* DXBR B+131 */
279 COSTS_N_INSNS (29), /* DDBR */
280 COSTS_N_INSNS (22), /* DEBR */
281 COSTS_N_INSNS (160), /* DLGR cracked */
282 COSTS_N_INSNS (160), /* DLR cracked */
283 COSTS_N_INSNS (160), /* DR expanded */
284 COSTS_N_INSNS (160), /* DSGFR cracked */
285 COSTS_N_INSNS (160), /* DSGR cracked */
286 };
287
288 extern int reload_completed;
289
290 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
291 static rtx last_scheduled_insn;
292
293 /* Structure used to hold the components of a S/390 memory
294 address. A legitimate address on S/390 is of the general
295 form
296 base + index + displacement
297 where any of the components is optional.
298
299 base and index are registers of the class ADDR_REGS,
300 displacement is an unsigned 12-bit immediate constant. */
301
302 struct s390_address
303 {
304 rtx base;
305 rtx indx;
306 rtx disp;
307 bool pointer;
308 bool literal_pool;
309 };
310
311 /* The following structure is embedded in the machine
312 specific part of struct function. */
313
314 struct GTY (()) s390_frame_layout
315 {
316 /* Offset within stack frame. */
317 HOST_WIDE_INT gprs_offset;
318 HOST_WIDE_INT f0_offset;
319 HOST_WIDE_INT f4_offset;
320 HOST_WIDE_INT f8_offset;
321 HOST_WIDE_INT backchain_offset;
322
323 /* Number of first and last gpr where slots in the register
324 save area are reserved for. */
325 int first_save_gpr_slot;
326 int last_save_gpr_slot;
327
328 /* Number of first and last gpr to be saved, restored. */
329 int first_save_gpr;
330 int first_restore_gpr;
331 int last_save_gpr;
332 int last_restore_gpr;
333
334 /* Bits standing for floating point registers. Set, if the
335 respective register has to be saved. Starting with reg 16 (f0)
336 at the rightmost bit.
337 Bit 15 - 8 7 6 5 4 3 2 1 0
338 fpr 15 - 8 7 5 3 1 6 4 2 0
339 reg 31 - 24 23 22 21 20 19 18 17 16 */
340 unsigned int fpr_bitmap;
341
342 /* Number of floating point registers f8-f15 which must be saved. */
343 int high_fprs;
344
345 /* Set if return address needs to be saved.
346 This flag is set by s390_return_addr_rtx if it could not use
347 the initial value of r14 and therefore depends on r14 saved
348 to the stack. */
349 bool save_return_addr_p;
350
351 /* Size of stack frame. */
352 HOST_WIDE_INT frame_size;
353 };
354
355 /* Define the structure for the machine field in struct function. */
356
357 struct GTY(()) machine_function
358 {
359 struct s390_frame_layout frame_layout;
360
361 /* Literal pool base register. */
362 rtx base_reg;
363
364 /* True if we may need to perform branch splitting. */
365 bool split_branches_pending_p;
366
367 /* Some local-dynamic TLS symbol name. */
368 const char *some_ld_name;
369
370 bool has_landing_pad_p;
371 };
372
373 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
374
375 #define cfun_frame_layout (cfun->machine->frame_layout)
376 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
377 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
378 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
379 #define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |= \
380 (1 << (BITNUM)))
381 #define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap & \
382 (1 << (BITNUM))))
383
384 /* Number of GPRs and FPRs used for argument passing. */
385 #define GP_ARG_NUM_REG 5
386 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
387
388 /* A couple of shortcuts. */
389 #define CONST_OK_FOR_J(x) \
390 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
391 #define CONST_OK_FOR_K(x) \
392 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
393 #define CONST_OK_FOR_Os(x) \
394 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
395 #define CONST_OK_FOR_Op(x) \
396 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
397 #define CONST_OK_FOR_On(x) \
398 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
399
400 #define REGNO_PAIR_OK(REGNO, MODE) \
401 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
402
403 /* That's the read ahead of the dynamic branch prediction unit in
404 bytes on a z10 (or higher) CPU. */
405 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
406
407 /* Return the alignment for LABEL. We default to the -falign-labels
408 value except for the literal pool base label. */
409 int
410 s390_label_align (rtx label)
411 {
412 rtx prev_insn = prev_active_insn (label);
413
414 if (prev_insn == NULL_RTX)
415 goto old;
416
417 prev_insn = single_set (prev_insn);
418
419 if (prev_insn == NULL_RTX)
420 goto old;
421
422 prev_insn = SET_SRC (prev_insn);
423
424 /* Don't align literal pool base labels. */
425 if (GET_CODE (prev_insn) == UNSPEC
426 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
427 return 0;
428
429 old:
430 return align_labels_log;
431 }
432
433 static enum machine_mode
434 s390_libgcc_cmp_return_mode (void)
435 {
436 return TARGET_64BIT ? DImode : SImode;
437 }
438
439 static enum machine_mode
440 s390_libgcc_shift_count_mode (void)
441 {
442 return TARGET_64BIT ? DImode : SImode;
443 }
444
445 static enum machine_mode
446 s390_unwind_word_mode (void)
447 {
448 return TARGET_64BIT ? DImode : SImode;
449 }
450
451 /* Return true if the back end supports mode MODE. */
452 static bool
453 s390_scalar_mode_supported_p (enum machine_mode mode)
454 {
455 /* In contrast to the default implementation reject TImode constants on 31bit
456 TARGET_ZARCH for ABI compliance. */
457 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
458 return false;
459
460 if (DECIMAL_FLOAT_MODE_P (mode))
461 return default_decimal_float_supported_p ();
462
463 return default_scalar_mode_supported_p (mode);
464 }
465
466 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
467
468 void
469 s390_set_has_landing_pad_p (bool value)
470 {
471 cfun->machine->has_landing_pad_p = value;
472 }
473
474 /* If two condition code modes are compatible, return a condition code
475 mode which is compatible with both. Otherwise, return
476 VOIDmode. */
477
478 static enum machine_mode
479 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
480 {
481 if (m1 == m2)
482 return m1;
483
484 switch (m1)
485 {
486 case CCZmode:
487 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
488 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
489 return m2;
490 return VOIDmode;
491
492 case CCSmode:
493 case CCUmode:
494 case CCTmode:
495 case CCSRmode:
496 case CCURmode:
497 case CCZ1mode:
498 if (m2 == CCZmode)
499 return m1;
500
501 return VOIDmode;
502
503 default:
504 return VOIDmode;
505 }
506 return VOIDmode;
507 }
508
509 /* Return true if SET either doesn't set the CC register, or else
510 the source and destination have matching CC modes and that
511 CC mode is at least as constrained as REQ_MODE. */
512
513 static bool
514 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
515 {
516 enum machine_mode set_mode;
517
518 gcc_assert (GET_CODE (set) == SET);
519
520 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
521 return 1;
522
523 set_mode = GET_MODE (SET_DEST (set));
524 switch (set_mode)
525 {
526 case CCSmode:
527 case CCSRmode:
528 case CCUmode:
529 case CCURmode:
530 case CCLmode:
531 case CCL1mode:
532 case CCL2mode:
533 case CCL3mode:
534 case CCT1mode:
535 case CCT2mode:
536 case CCT3mode:
537 if (req_mode != set_mode)
538 return 0;
539 break;
540
541 case CCZmode:
542 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
543 && req_mode != CCSRmode && req_mode != CCURmode)
544 return 0;
545 break;
546
547 case CCAPmode:
548 case CCANmode:
549 if (req_mode != CCAmode)
550 return 0;
551 break;
552
553 default:
554 gcc_unreachable ();
555 }
556
557 return (GET_MODE (SET_SRC (set)) == set_mode);
558 }
559
560 /* Return true if every SET in INSN that sets the CC register
561 has source and destination with matching CC modes and that
562 CC mode is at least as constrained as REQ_MODE.
563 If REQ_MODE is VOIDmode, always return false. */
564
565 bool
566 s390_match_ccmode (rtx insn, enum machine_mode req_mode)
567 {
568 int i;
569
570 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
571 if (req_mode == VOIDmode)
572 return false;
573
574 if (GET_CODE (PATTERN (insn)) == SET)
575 return s390_match_ccmode_set (PATTERN (insn), req_mode);
576
577 if (GET_CODE (PATTERN (insn)) == PARALLEL)
578 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
579 {
580 rtx set = XVECEXP (PATTERN (insn), 0, i);
581 if (GET_CODE (set) == SET)
582 if (!s390_match_ccmode_set (set, req_mode))
583 return false;
584 }
585
586 return true;
587 }
588
589 /* If a test-under-mask instruction can be used to implement
590 (compare (and ... OP1) OP2), return the CC mode required
591 to do that. Otherwise, return VOIDmode.
592 MIXED is true if the instruction can distinguish between
593 CC1 and CC2 for mixed selected bits (TMxx), it is false
594 if the instruction cannot (TM). */
595
596 enum machine_mode
597 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
598 {
599 int bit0, bit1;
600
601 /* ??? Fixme: should work on CONST_DOUBLE as well. */
602 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
603 return VOIDmode;
604
605 /* Selected bits all zero: CC0.
606 e.g.: int a; if ((a & (16 + 128)) == 0) */
607 if (INTVAL (op2) == 0)
608 return CCTmode;
609
610 /* Selected bits all one: CC3.
611 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
612 if (INTVAL (op2) == INTVAL (op1))
613 return CCT3mode;
614
615 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
616 int a;
617 if ((a & (16 + 128)) == 16) -> CCT1
618 if ((a & (16 + 128)) == 128) -> CCT2 */
619 if (mixed)
620 {
621 bit1 = exact_log2 (INTVAL (op2));
622 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
623 if (bit0 != -1 && bit1 != -1)
624 return bit0 > bit1 ? CCT1mode : CCT2mode;
625 }
626
627 return VOIDmode;
628 }
629
630 /* Given a comparison code OP (EQ, NE, etc.) and the operands
631 OP0 and OP1 of a COMPARE, return the mode to be used for the
632 comparison. */
633
634 enum machine_mode
635 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
636 {
637 switch (code)
638 {
639 case EQ:
640 case NE:
641 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
642 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
643 return CCAPmode;
644 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
645 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
646 return CCAPmode;
647 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
648 || GET_CODE (op1) == NEG)
649 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
650 return CCLmode;
651
652 if (GET_CODE (op0) == AND)
653 {
654 /* Check whether we can potentially do it via TM. */
655 enum machine_mode ccmode;
656 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
657 if (ccmode != VOIDmode)
658 {
659 /* Relax CCTmode to CCZmode to allow fall-back to AND
660 if that turns out to be beneficial. */
661 return ccmode == CCTmode ? CCZmode : ccmode;
662 }
663 }
664
665 if (register_operand (op0, HImode)
666 && GET_CODE (op1) == CONST_INT
667 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
668 return CCT3mode;
669 if (register_operand (op0, QImode)
670 && GET_CODE (op1) == CONST_INT
671 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
672 return CCT3mode;
673
674 return CCZmode;
675
676 case LE:
677 case LT:
678 case GE:
679 case GT:
680 /* The only overflow condition of NEG and ABS happens when
681 -INT_MAX is used as parameter, which stays negative. So
682 we have an overflow from a positive value to a negative.
683 Using CCAP mode the resulting cc can be used for comparisons. */
684 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
685 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
686 return CCAPmode;
687
688 /* If constants are involved in an add instruction it is possible to use
689 the resulting cc for comparisons with zero. Knowing the sign of the
690 constant the overflow behavior gets predictable. e.g.:
691 int a, b; if ((b = a + c) > 0)
692 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
693 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
694 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
695 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
696 /* Avoid INT32_MIN on 32 bit. */
697 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
698 {
699 if (INTVAL (XEXP((op0), 1)) < 0)
700 return CCANmode;
701 else
702 return CCAPmode;
703 }
704 /* Fall through. */
705 case UNORDERED:
706 case ORDERED:
707 case UNEQ:
708 case UNLE:
709 case UNLT:
710 case UNGE:
711 case UNGT:
712 case LTGT:
713 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
714 && GET_CODE (op1) != CONST_INT)
715 return CCSRmode;
716 return CCSmode;
717
718 case LTU:
719 case GEU:
720 if (GET_CODE (op0) == PLUS
721 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
722 return CCL1mode;
723
724 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
725 && GET_CODE (op1) != CONST_INT)
726 return CCURmode;
727 return CCUmode;
728
729 case LEU:
730 case GTU:
731 if (GET_CODE (op0) == MINUS
732 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
733 return CCL2mode;
734
735 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
736 && GET_CODE (op1) != CONST_INT)
737 return CCURmode;
738 return CCUmode;
739
740 default:
741 gcc_unreachable ();
742 }
743 }
744
745 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
746 that we can implement more efficiently. */
747
748 static void
749 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
750 bool op0_preserve_value)
751 {
752 if (op0_preserve_value)
753 return;
754
755 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
756 if ((*code == EQ || *code == NE)
757 && *op1 == const0_rtx
758 && GET_CODE (*op0) == ZERO_EXTRACT
759 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
760 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
761 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
762 {
763 rtx inner = XEXP (*op0, 0);
764 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
765 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
766 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
767
768 if (len > 0 && len < modesize
769 && pos >= 0 && pos + len <= modesize
770 && modesize <= HOST_BITS_PER_WIDE_INT)
771 {
772 unsigned HOST_WIDE_INT block;
773 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
774 block <<= modesize - pos - len;
775
776 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
777 gen_int_mode (block, GET_MODE (inner)));
778 }
779 }
780
781 /* Narrow AND of memory against immediate to enable TM. */
782 if ((*code == EQ || *code == NE)
783 && *op1 == const0_rtx
784 && GET_CODE (*op0) == AND
785 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
786 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
787 {
788 rtx inner = XEXP (*op0, 0);
789 rtx mask = XEXP (*op0, 1);
790
791 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
792 if (GET_CODE (inner) == SUBREG
793 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
794 && (GET_MODE_SIZE (GET_MODE (inner))
795 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
796 && ((INTVAL (mask)
797 & GET_MODE_MASK (GET_MODE (inner))
798 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
799 == 0))
800 inner = SUBREG_REG (inner);
801
802 /* Do not change volatile MEMs. */
803 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
804 {
805 int part = s390_single_part (XEXP (*op0, 1),
806 GET_MODE (inner), QImode, 0);
807 if (part >= 0)
808 {
809 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
810 inner = adjust_address_nv (inner, QImode, part);
811 *op0 = gen_rtx_AND (QImode, inner, mask);
812 }
813 }
814 }
815
816 /* Narrow comparisons against 0xffff to HImode if possible. */
817 if ((*code == EQ || *code == NE)
818 && GET_CODE (*op1) == CONST_INT
819 && INTVAL (*op1) == 0xffff
820 && SCALAR_INT_MODE_P (GET_MODE (*op0))
821 && (nonzero_bits (*op0, GET_MODE (*op0))
822 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
823 {
824 *op0 = gen_lowpart (HImode, *op0);
825 *op1 = constm1_rtx;
826 }
827
828 /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */
829 if (GET_CODE (*op0) == UNSPEC
830 && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
831 && XVECLEN (*op0, 0) == 1
832 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
833 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
834 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
835 && *op1 == const0_rtx)
836 {
837 enum rtx_code new_code = UNKNOWN;
838 switch (*code)
839 {
840 case EQ: new_code = EQ; break;
841 case NE: new_code = NE; break;
842 case LT: new_code = GTU; break;
843 case GT: new_code = LTU; break;
844 case LE: new_code = GEU; break;
845 case GE: new_code = LEU; break;
846 default: break;
847 }
848
849 if (new_code != UNKNOWN)
850 {
851 *op0 = XVECEXP (*op0, 0, 0);
852 *code = new_code;
853 }
854 }
855
856 /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */
857 if (GET_CODE (*op0) == UNSPEC
858 && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
859 && XVECLEN (*op0, 0) == 1
860 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
861 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
862 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
863 && *op1 == const0_rtx)
864 {
865 enum rtx_code new_code = UNKNOWN;
866 switch (*code)
867 {
868 case EQ: new_code = EQ; break;
869 case NE: new_code = NE; break;
870 default: break;
871 }
872
873 if (new_code != UNKNOWN)
874 {
875 *op0 = XVECEXP (*op0, 0, 0);
876 *code = new_code;
877 }
878 }
879
880 /* Simplify cascaded EQ, NE with const0_rtx. */
881 if ((*code == NE || *code == EQ)
882 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
883 && GET_MODE (*op0) == SImode
884 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
885 && REG_P (XEXP (*op0, 0))
886 && XEXP (*op0, 1) == const0_rtx
887 && *op1 == const0_rtx)
888 {
889 if ((*code == EQ && GET_CODE (*op0) == NE)
890 || (*code == NE && GET_CODE (*op0) == EQ))
891 *code = EQ;
892 else
893 *code = NE;
894 *op0 = XEXP (*op0, 0);
895 }
896
897 /* Prefer register over memory as first operand. */
898 if (MEM_P (*op0) && REG_P (*op1))
899 {
900 rtx tem = *op0; *op0 = *op1; *op1 = tem;
901 *code = (int)swap_condition ((enum rtx_code)*code);
902 }
903 }
904
905 /* Emit a compare instruction suitable to implement the comparison
906 OP0 CODE OP1. Return the correct condition RTL to be placed in
907 the IF_THEN_ELSE of the conditional branch testing the result. */
908
909 rtx
910 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
911 {
912 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
913 rtx cc;
914
915 /* Do not output a redundant compare instruction if a compare_and_swap
916 pattern already computed the result and the machine modes are compatible. */
917 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
918 {
919 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
920 == GET_MODE (op0));
921 cc = op0;
922 }
923 else
924 {
925 cc = gen_rtx_REG (mode, CC_REGNUM);
926 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
927 }
928
929 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
930 }
931
932 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
933 matches CMP.
934 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
935 conditional branch testing the result. */
936
937 static rtx
938 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
939 rtx cmp, rtx new_rtx)
940 {
941 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
942 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
943 const0_rtx);
944 }
945
946 /* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
947 unconditional jump, else a conditional jump under condition COND. */
948
949 void
950 s390_emit_jump (rtx target, rtx cond)
951 {
952 rtx insn;
953
954 target = gen_rtx_LABEL_REF (VOIDmode, target);
955 if (cond)
956 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
957
958 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
959 emit_jump_insn (insn);
960 }
961
962 /* Return branch condition mask to implement a branch
963 specified by CODE. Return -1 for invalid comparisons. */
964
965 int
966 s390_branch_condition_mask (rtx code)
967 {
968 const int CC0 = 1 << 3;
969 const int CC1 = 1 << 2;
970 const int CC2 = 1 << 1;
971 const int CC3 = 1 << 0;
972
973 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
974 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
975 gcc_assert (XEXP (code, 1) == const0_rtx);
976
977 switch (GET_MODE (XEXP (code, 0)))
978 {
979 case CCZmode:
980 case CCZ1mode:
981 switch (GET_CODE (code))
982 {
983 case EQ: return CC0;
984 case NE: return CC1 | CC2 | CC3;
985 default: return -1;
986 }
987 break;
988
989 case CCT1mode:
990 switch (GET_CODE (code))
991 {
992 case EQ: return CC1;
993 case NE: return CC0 | CC2 | CC3;
994 default: return -1;
995 }
996 break;
997
998 case CCT2mode:
999 switch (GET_CODE (code))
1000 {
1001 case EQ: return CC2;
1002 case NE: return CC0 | CC1 | CC3;
1003 default: return -1;
1004 }
1005 break;
1006
1007 case CCT3mode:
1008 switch (GET_CODE (code))
1009 {
1010 case EQ: return CC3;
1011 case NE: return CC0 | CC1 | CC2;
1012 default: return -1;
1013 }
1014 break;
1015
1016 case CCLmode:
1017 switch (GET_CODE (code))
1018 {
1019 case EQ: return CC0 | CC2;
1020 case NE: return CC1 | CC3;
1021 default: return -1;
1022 }
1023 break;
1024
1025 case CCL1mode:
1026 switch (GET_CODE (code))
1027 {
1028 case LTU: return CC2 | CC3; /* carry */
1029 case GEU: return CC0 | CC1; /* no carry */
1030 default: return -1;
1031 }
1032 break;
1033
1034 case CCL2mode:
1035 switch (GET_CODE (code))
1036 {
1037 case GTU: return CC0 | CC1; /* borrow */
1038 case LEU: return CC2 | CC3; /* no borrow */
1039 default: return -1;
1040 }
1041 break;
1042
1043 case CCL3mode:
1044 switch (GET_CODE (code))
1045 {
1046 case EQ: return CC0 | CC2;
1047 case NE: return CC1 | CC3;
1048 case LTU: return CC1;
1049 case GTU: return CC3;
1050 case LEU: return CC1 | CC2;
1051 case GEU: return CC2 | CC3;
1052 default: return -1;
1053 }
1054
1055 case CCUmode:
1056 switch (GET_CODE (code))
1057 {
1058 case EQ: return CC0;
1059 case NE: return CC1 | CC2 | CC3;
1060 case LTU: return CC1;
1061 case GTU: return CC2;
1062 case LEU: return CC0 | CC1;
1063 case GEU: return CC0 | CC2;
1064 default: return -1;
1065 }
1066 break;
1067
1068 case CCURmode:
1069 switch (GET_CODE (code))
1070 {
1071 case EQ: return CC0;
1072 case NE: return CC2 | CC1 | CC3;
1073 case LTU: return CC2;
1074 case GTU: return CC1;
1075 case LEU: return CC0 | CC2;
1076 case GEU: return CC0 | CC1;
1077 default: return -1;
1078 }
1079 break;
1080
1081 case CCAPmode:
1082 switch (GET_CODE (code))
1083 {
1084 case EQ: return CC0;
1085 case NE: return CC1 | CC2 | CC3;
1086 case LT: return CC1 | CC3;
1087 case GT: return CC2;
1088 case LE: return CC0 | CC1 | CC3;
1089 case GE: return CC0 | CC2;
1090 default: return -1;
1091 }
1092 break;
1093
1094 case CCANmode:
1095 switch (GET_CODE (code))
1096 {
1097 case EQ: return CC0;
1098 case NE: return CC1 | CC2 | CC3;
1099 case LT: return CC1;
1100 case GT: return CC2 | CC3;
1101 case LE: return CC0 | CC1;
1102 case GE: return CC0 | CC2 | CC3;
1103 default: return -1;
1104 }
1105 break;
1106
1107 case CCSmode:
1108 switch (GET_CODE (code))
1109 {
1110 case EQ: return CC0;
1111 case NE: return CC1 | CC2 | CC3;
1112 case LT: return CC1;
1113 case GT: return CC2;
1114 case LE: return CC0 | CC1;
1115 case GE: return CC0 | CC2;
1116 case UNORDERED: return CC3;
1117 case ORDERED: return CC0 | CC1 | CC2;
1118 case UNEQ: return CC0 | CC3;
1119 case UNLT: return CC1 | CC3;
1120 case UNGT: return CC2 | CC3;
1121 case UNLE: return CC0 | CC1 | CC3;
1122 case UNGE: return CC0 | CC2 | CC3;
1123 case LTGT: return CC1 | CC2;
1124 default: return -1;
1125 }
1126 break;
1127
1128 case CCSRmode:
1129 switch (GET_CODE (code))
1130 {
1131 case EQ: return CC0;
1132 case NE: return CC2 | CC1 | CC3;
1133 case LT: return CC2;
1134 case GT: return CC1;
1135 case LE: return CC0 | CC2;
1136 case GE: return CC0 | CC1;
1137 case UNORDERED: return CC3;
1138 case ORDERED: return CC0 | CC2 | CC1;
1139 case UNEQ: return CC0 | CC3;
1140 case UNLT: return CC2 | CC3;
1141 case UNGT: return CC1 | CC3;
1142 case UNLE: return CC0 | CC2 | CC3;
1143 case UNGE: return CC0 | CC1 | CC3;
1144 case LTGT: return CC2 | CC1;
1145 default: return -1;
1146 }
1147 break;
1148
1149 default:
1150 return -1;
1151 }
1152 }
1153
1154
1155 /* Return branch condition mask to implement a compare and branch
1156 specified by CODE. Return -1 for invalid comparisons. */
1157
1158 int
1159 s390_compare_and_branch_condition_mask (rtx code)
1160 {
1161 const int CC0 = 1 << 3;
1162 const int CC1 = 1 << 2;
1163 const int CC2 = 1 << 1;
1164
1165 switch (GET_CODE (code))
1166 {
1167 case EQ:
1168 return CC0;
1169 case NE:
1170 return CC1 | CC2;
1171 case LT:
1172 case LTU:
1173 return CC1;
1174 case GT:
1175 case GTU:
1176 return CC2;
1177 case LE:
1178 case LEU:
1179 return CC0 | CC1;
1180 case GE:
1181 case GEU:
1182 return CC0 | CC2;
1183 default:
1184 gcc_unreachable ();
1185 }
1186 return -1;
1187 }
1188
1189 /* If INV is false, return assembler mnemonic string to implement
1190 a branch specified by CODE. If INV is true, return mnemonic
1191 for the corresponding inverted branch. */
1192
1193 static const char *
1194 s390_branch_condition_mnemonic (rtx code, int inv)
1195 {
1196 int mask;
1197
1198 static const char *const mnemonic[16] =
1199 {
1200 NULL, "o", "h", "nle",
1201 "l", "nhe", "lh", "ne",
1202 "e", "nlh", "he", "nl",
1203 "le", "nh", "no", NULL
1204 };
1205
1206 if (GET_CODE (XEXP (code, 0)) == REG
1207 && REGNO (XEXP (code, 0)) == CC_REGNUM
1208 && XEXP (code, 1) == const0_rtx)
1209 mask = s390_branch_condition_mask (code);
1210 else
1211 mask = s390_compare_and_branch_condition_mask (code);
1212
1213 gcc_assert (mask >= 0);
1214
1215 if (inv)
1216 mask ^= 15;
1217
1218 gcc_assert (mask >= 1 && mask <= 14);
1219
1220 return mnemonic[mask];
1221 }
1222
1223 /* Return the part of op which has a value different from def.
1224 The size of the part is determined by mode.
1225 Use this function only if you already know that op really
1226 contains such a part. */
1227
1228 unsigned HOST_WIDE_INT
1229 s390_extract_part (rtx op, enum machine_mode mode, int def)
1230 {
1231 unsigned HOST_WIDE_INT value = 0;
1232 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1233 int part_bits = GET_MODE_BITSIZE (mode);
1234 unsigned HOST_WIDE_INT part_mask
1235 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1236 int i;
1237
1238 for (i = 0; i < max_parts; i++)
1239 {
1240 if (i == 0)
1241 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1242 else
1243 value >>= part_bits;
1244
1245 if ((value & part_mask) != (def & part_mask))
1246 return value & part_mask;
1247 }
1248
1249 gcc_unreachable ();
1250 }
1251
1252 /* If OP is an integer constant of mode MODE with exactly one
1253 part of mode PART_MODE unequal to DEF, return the number of that
1254 part. Otherwise, return -1. */
1255
1256 int
1257 s390_single_part (rtx op,
1258 enum machine_mode mode,
1259 enum machine_mode part_mode,
1260 int def)
1261 {
1262 unsigned HOST_WIDE_INT value = 0;
1263 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1264 unsigned HOST_WIDE_INT part_mask
1265 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1266 int i, part = -1;
1267
1268 if (GET_CODE (op) != CONST_INT)
1269 return -1;
1270
1271 for (i = 0; i < n_parts; i++)
1272 {
1273 if (i == 0)
1274 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1275 else
1276 value >>= GET_MODE_BITSIZE (part_mode);
1277
1278 if ((value & part_mask) != (def & part_mask))
1279 {
1280 if (part != -1)
1281 return -1;
1282 else
1283 part = i;
1284 }
1285 }
1286 return part == -1 ? -1 : n_parts - 1 - part;
1287 }
1288
1289 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1290 bits and no other bits are set in IN. POS and LENGTH can be used
1291 to obtain the start position and the length of the bitfield.
1292
1293 POS gives the position of the first bit of the bitfield counting
1294 from the lowest order bit starting with zero. In order to use this
1295 value for S/390 instructions this has to be converted to "bits big
1296 endian" style. */
1297
1298 bool
1299 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1300 int *pos, int *length)
1301 {
1302 int tmp_pos = 0;
1303 int tmp_length = 0;
1304 int i;
1305 unsigned HOST_WIDE_INT mask = 1ULL;
1306 bool contiguous = false;
1307
1308 for (i = 0; i < size; mask <<= 1, i++)
1309 {
1310 if (contiguous)
1311 {
1312 if (mask & in)
1313 tmp_length++;
1314 else
1315 break;
1316 }
1317 else
1318 {
1319 if (mask & in)
1320 {
1321 contiguous = true;
1322 tmp_length++;
1323 }
1324 else
1325 tmp_pos++;
1326 }
1327 }
1328
1329 if (!tmp_length)
1330 return false;
1331
1332 /* Calculate a mask for all bits beyond the contiguous bits. */
1333 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1334
1335 if (mask & in)
1336 return false;
1337
1338 if (tmp_length + tmp_pos - 1 > size)
1339 return false;
1340
1341 if (length)
1342 *length = tmp_length;
1343
1344 if (pos)
1345 *pos = tmp_pos;
1346
1347 return true;
1348 }
1349
1350 /* Check whether we can (and want to) split a double-word
1351 move in mode MODE from SRC to DST into two single-word
1352 moves, moving the subword FIRST_SUBWORD first. */
1353
1354 bool
1355 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1356 {
1357 /* Floating point registers cannot be split. */
1358 if (FP_REG_P (src) || FP_REG_P (dst))
1359 return false;
1360
1361 /* We don't need to split if operands are directly accessible. */
1362 if (s_operand (src, mode) || s_operand (dst, mode))
1363 return false;
1364
1365 /* Non-offsettable memory references cannot be split. */
1366 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1367 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1368 return false;
1369
1370 /* Moving the first subword must not clobber a register
1371 needed to move the second subword. */
1372 if (register_operand (dst, mode))
1373 {
1374 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1375 if (reg_overlap_mentioned_p (subreg, src))
1376 return false;
1377 }
1378
1379 return true;
1380 }
1381
1382 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1383 and [MEM2, MEM2 + SIZE] do overlap and false
1384 otherwise. */
1385
1386 bool
1387 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1388 {
1389 rtx addr1, addr2, addr_delta;
1390 HOST_WIDE_INT delta;
1391
1392 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1393 return true;
1394
1395 if (size == 0)
1396 return false;
1397
1398 addr1 = XEXP (mem1, 0);
1399 addr2 = XEXP (mem2, 0);
1400
1401 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1402
1403 /* This overlapping check is used by peepholes merging memory block operations.
1404 Overlapping operations would otherwise be recognized by the S/390 hardware
1405 and would fall back to a slower implementation. Allowing overlapping
1406 operations would lead to slow code but not to wrong code. Therefore we are
1407 somewhat optimistic if we cannot prove that the memory blocks are
1408 overlapping.
1409 That's why we return false here although this may accept operations on
1410 overlapping memory areas. */
1411 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1412 return false;
1413
1414 delta = INTVAL (addr_delta);
1415
1416 if (delta == 0
1417 || (delta > 0 && delta < size)
1418 || (delta < 0 && -delta < size))
1419 return true;
1420
1421 return false;
1422 }
1423
1424 /* Check whether the address of memory reference MEM2 equals exactly
1425 the address of memory reference MEM1 plus DELTA. Return true if
1426 we can prove this to be the case, false otherwise. */
1427
1428 bool
1429 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1430 {
1431 rtx addr1, addr2, addr_delta;
1432
1433 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1434 return false;
1435
1436 addr1 = XEXP (mem1, 0);
1437 addr2 = XEXP (mem2, 0);
1438
1439 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1440 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1441 return false;
1442
1443 return true;
1444 }
1445
1446 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1447
1448 void
1449 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1450 rtx *operands)
1451 {
1452 enum machine_mode wmode = mode;
1453 rtx dst = operands[0];
1454 rtx src1 = operands[1];
1455 rtx src2 = operands[2];
1456 rtx op, clob, tem;
1457
1458 /* If we cannot handle the operation directly, use a temp register. */
1459 if (!s390_logical_operator_ok_p (operands))
1460 dst = gen_reg_rtx (mode);
1461
1462 /* QImode and HImode patterns make sense only if we have a destination
1463 in memory. Otherwise perform the operation in SImode. */
1464 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1465 wmode = SImode;
1466
1467 /* Widen operands if required. */
1468 if (mode != wmode)
1469 {
1470 if (GET_CODE (dst) == SUBREG
1471 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1472 dst = tem;
1473 else if (REG_P (dst))
1474 dst = gen_rtx_SUBREG (wmode, dst, 0);
1475 else
1476 dst = gen_reg_rtx (wmode);
1477
1478 if (GET_CODE (src1) == SUBREG
1479 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1480 src1 = tem;
1481 else if (GET_MODE (src1) != VOIDmode)
1482 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1483
1484 if (GET_CODE (src2) == SUBREG
1485 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1486 src2 = tem;
1487 else if (GET_MODE (src2) != VOIDmode)
1488 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1489 }
1490
1491 /* Emit the instruction. */
1492 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1493 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1494 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1495
1496 /* Fix up the destination if needed. */
1497 if (dst != operands[0])
1498 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1499 }
1500
1501 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1502
1503 bool
1504 s390_logical_operator_ok_p (rtx *operands)
1505 {
1506 /* If the destination operand is in memory, it needs to coincide
1507 with one of the source operands. After reload, it has to be
1508 the first source operand. */
1509 if (GET_CODE (operands[0]) == MEM)
1510 return rtx_equal_p (operands[0], operands[1])
1511 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1512
1513 return true;
1514 }
1515
1516 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1517 operand IMMOP to switch from SS to SI type instructions. */
1518
1519 void
1520 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1521 {
1522 int def = code == AND ? -1 : 0;
1523 HOST_WIDE_INT mask;
1524 int part;
1525
1526 gcc_assert (GET_CODE (*memop) == MEM);
1527 gcc_assert (!MEM_VOLATILE_P (*memop));
1528
1529 mask = s390_extract_part (*immop, QImode, def);
1530 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1531 gcc_assert (part >= 0);
1532
1533 *memop = adjust_address (*memop, QImode, part);
1534 *immop = gen_int_mode (mask, QImode);
1535 }
1536
1537
1538 /* How to allocate a 'struct machine_function'. */
1539
1540 static struct machine_function *
1541 s390_init_machine_status (void)
1542 {
1543 return ggc_alloc_cleared_machine_function ();
1544 }
1545
1546 static void
1547 s390_option_override (void)
1548 {
1549 /* Set up function hooks. */
1550 init_machine_status = s390_init_machine_status;
1551
1552 /* Architecture mode defaults according to ABI. */
1553 if (!(target_flags_explicit & MASK_ZARCH))
1554 {
1555 if (TARGET_64BIT)
1556 target_flags |= MASK_ZARCH;
1557 else
1558 target_flags &= ~MASK_ZARCH;
1559 }
1560
1561 /* Set the march default in case it hasn't been specified on
1562 cmdline. */
1563 if (s390_arch == PROCESSOR_max)
1564 {
1565 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
1566 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
1567 s390_arch_flags = processor_flags_table[(int)s390_arch];
1568 }
1569
1570 /* Determine processor to tune for. */
1571 if (s390_tune == PROCESSOR_max)
1572 {
1573 s390_tune = s390_arch;
1574 s390_tune_flags = s390_arch_flags;
1575 }
1576
1577 /* Sanity checks. */
1578 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
1579 error ("z/Architecture mode not supported on %s", s390_arch_string);
1580 if (TARGET_64BIT && !TARGET_ZARCH)
1581 error ("64-bit ABI not supported in ESA/390 mode");
1582
1583 /* Use hardware DFP if available and not explicitly disabled by
1584 user. E.g. with -m31 -march=z10 -mzarch */
1585 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
1586 target_flags |= MASK_HARD_DFP;
1587
1588 if (TARGET_HARD_DFP && !TARGET_DFP)
1589 {
1590 if (target_flags_explicit & MASK_HARD_DFP)
1591 {
1592 if (!TARGET_CPU_DFP)
1593 error ("hardware decimal floating point instructions"
1594 " not available on %s", s390_arch_string);
1595 if (!TARGET_ZARCH)
1596 error ("hardware decimal floating point instructions"
1597 " not available in ESA/390 mode");
1598 }
1599 else
1600 target_flags &= ~MASK_HARD_DFP;
1601 }
1602
1603 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
1604 {
1605 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
1606 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
1607
1608 target_flags &= ~MASK_HARD_DFP;
1609 }
1610
1611 /* Set processor cost function. */
1612 switch (s390_tune)
1613 {
1614 case PROCESSOR_2084_Z990:
1615 s390_cost = &z990_cost;
1616 break;
1617 case PROCESSOR_2094_Z9_109:
1618 s390_cost = &z9_109_cost;
1619 break;
1620 case PROCESSOR_2097_Z10:
1621 s390_cost = &z10_cost;
1622 break;
1623 case PROCESSOR_2817_Z196:
1624 s390_cost = &z196_cost;
1625 break;
1626 case PROCESSOR_2827_ZEC12:
1627 s390_cost = &zEC12_cost;
1628 break;
1629 default:
1630 s390_cost = &z900_cost;
1631 }
1632
1633 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
1634 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
1635 "in combination");
1636
1637 if (s390_stack_size)
1638 {
1639 if (s390_stack_guard >= s390_stack_size)
1640 error ("stack size must be greater than the stack guard value");
1641 else if (s390_stack_size > 1 << 16)
1642 error ("stack size must not be greater than 64k");
1643 }
1644 else if (s390_stack_guard)
1645 error ("-mstack-guard implies use of -mstack-size");
1646
1647 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1648 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1649 target_flags |= MASK_LONG_DOUBLE_128;
1650 #endif
1651
1652 if (s390_tune == PROCESSOR_2097_Z10
1653 || s390_tune == PROCESSOR_2817_Z196
1654 || s390_tune == PROCESSOR_2827_ZEC12)
1655 {
1656 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
1657 global_options.x_param_values,
1658 global_options_set.x_param_values);
1659 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
1660 global_options.x_param_values,
1661 global_options_set.x_param_values);
1662 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
1663 global_options.x_param_values,
1664 global_options_set.x_param_values);
1665 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
1666 global_options.x_param_values,
1667 global_options_set.x_param_values);
1668 }
1669
1670 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
1671 global_options.x_param_values,
1672 global_options_set.x_param_values);
1673 /* values for loop prefetching */
1674 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
1675 global_options.x_param_values,
1676 global_options_set.x_param_values);
1677 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
1678 global_options.x_param_values,
1679 global_options_set.x_param_values);
1680 /* s390 has more than 2 levels and the size is much larger. Since
1681 we are always running virtualized assume that we only get a small
1682 part of the caches above l1. */
1683 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
1684 global_options.x_param_values,
1685 global_options_set.x_param_values);
1686 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
1687 global_options.x_param_values,
1688 global_options_set.x_param_values);
1689 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
1690 global_options.x_param_values,
1691 global_options_set.x_param_values);
1692
1693 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
1694 requires the arch flags to be evaluated already. Since prefetching
1695 is beneficial on s390, we enable it if available. */
1696 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
1697 flag_prefetch_loop_arrays = 1;
1698
1699 /* Use the alternative scheduling-pressure algorithm by default. */
1700 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
1701 global_options.x_param_values,
1702 global_options_set.x_param_values);
1703
1704 if (TARGET_TPF)
1705 {
1706 /* Don't emit DWARF3/4 unless specifically selected. The TPF
1707 debuggers do not yet support DWARF 3/4. */
1708 if (!global_options_set.x_dwarf_strict)
1709 dwarf_strict = 1;
1710 if (!global_options_set.x_dwarf_version)
1711 dwarf_version = 2;
1712 }
1713 }
1714
1715 /* Map for smallest class containing reg regno. */
1716
1717 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1718 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1719 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1720 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1721 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1722 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1723 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1724 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1725 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1726 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1727 ACCESS_REGS, ACCESS_REGS
1728 };
1729
1730 /* Return attribute type of insn. */
1731
1732 static enum attr_type
1733 s390_safe_attr_type (rtx insn)
1734 {
1735 if (recog_memoized (insn) >= 0)
1736 return get_attr_type (insn);
1737 else
1738 return TYPE_NONE;
1739 }
1740
1741 /* Return true if DISP is a valid short displacement. */
1742
1743 static bool
1744 s390_short_displacement (rtx disp)
1745 {
1746 /* No displacement is OK. */
1747 if (!disp)
1748 return true;
1749
1750 /* Without the long displacement facility we don't need to
1751 distingiush between long and short displacement. */
1752 if (!TARGET_LONG_DISPLACEMENT)
1753 return true;
1754
1755 /* Integer displacement in range. */
1756 if (GET_CODE (disp) == CONST_INT)
1757 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1758
1759 /* GOT offset is not OK, the GOT can be large. */
1760 if (GET_CODE (disp) == CONST
1761 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1762 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1763 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1764 return false;
1765
1766 /* All other symbolic constants are literal pool references,
1767 which are OK as the literal pool must be small. */
1768 if (GET_CODE (disp) == CONST)
1769 return true;
1770
1771 return false;
1772 }
1773
1774 /* Decompose a RTL expression ADDR for a memory address into
1775 its components, returned in OUT.
1776
1777 Returns false if ADDR is not a valid memory address, true
1778 otherwise. If OUT is NULL, don't return the components,
1779 but check for validity only.
1780
1781 Note: Only addresses in canonical form are recognized.
1782 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1783 canonical form so that they will be recognized. */
1784
1785 static int
1786 s390_decompose_address (rtx addr, struct s390_address *out)
1787 {
1788 HOST_WIDE_INT offset = 0;
1789 rtx base = NULL_RTX;
1790 rtx indx = NULL_RTX;
1791 rtx disp = NULL_RTX;
1792 rtx orig_disp;
1793 bool pointer = false;
1794 bool base_ptr = false;
1795 bool indx_ptr = false;
1796 bool literal_pool = false;
1797
1798 /* We may need to substitute the literal pool base register into the address
1799 below. However, at this point we do not know which register is going to
1800 be used as base, so we substitute the arg pointer register. This is going
1801 to be treated as holding a pointer below -- it shouldn't be used for any
1802 other purpose. */
1803 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1804
1805 /* Decompose address into base + index + displacement. */
1806
1807 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1808 base = addr;
1809
1810 else if (GET_CODE (addr) == PLUS)
1811 {
1812 rtx op0 = XEXP (addr, 0);
1813 rtx op1 = XEXP (addr, 1);
1814 enum rtx_code code0 = GET_CODE (op0);
1815 enum rtx_code code1 = GET_CODE (op1);
1816
1817 if (code0 == REG || code0 == UNSPEC)
1818 {
1819 if (code1 == REG || code1 == UNSPEC)
1820 {
1821 indx = op0; /* index + base */
1822 base = op1;
1823 }
1824
1825 else
1826 {
1827 base = op0; /* base + displacement */
1828 disp = op1;
1829 }
1830 }
1831
1832 else if (code0 == PLUS)
1833 {
1834 indx = XEXP (op0, 0); /* index + base + disp */
1835 base = XEXP (op0, 1);
1836 disp = op1;
1837 }
1838
1839 else
1840 {
1841 return false;
1842 }
1843 }
1844
1845 else
1846 disp = addr; /* displacement */
1847
1848 /* Extract integer part of displacement. */
1849 orig_disp = disp;
1850 if (disp)
1851 {
1852 if (GET_CODE (disp) == CONST_INT)
1853 {
1854 offset = INTVAL (disp);
1855 disp = NULL_RTX;
1856 }
1857 else if (GET_CODE (disp) == CONST
1858 && GET_CODE (XEXP (disp, 0)) == PLUS
1859 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1860 {
1861 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1862 disp = XEXP (XEXP (disp, 0), 0);
1863 }
1864 }
1865
1866 /* Strip off CONST here to avoid special case tests later. */
1867 if (disp && GET_CODE (disp) == CONST)
1868 disp = XEXP (disp, 0);
1869
1870 /* We can convert literal pool addresses to
1871 displacements by basing them off the base register. */
1872 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1873 {
1874 /* Either base or index must be free to hold the base register. */
1875 if (!base)
1876 base = fake_pool_base, literal_pool = true;
1877 else if (!indx)
1878 indx = fake_pool_base, literal_pool = true;
1879 else
1880 return false;
1881
1882 /* Mark up the displacement. */
1883 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1884 UNSPEC_LTREL_OFFSET);
1885 }
1886
1887 /* Validate base register. */
1888 if (base)
1889 {
1890 if (GET_CODE (base) == UNSPEC)
1891 switch (XINT (base, 1))
1892 {
1893 case UNSPEC_LTREF:
1894 if (!disp)
1895 disp = gen_rtx_UNSPEC (Pmode,
1896 gen_rtvec (1, XVECEXP (base, 0, 0)),
1897 UNSPEC_LTREL_OFFSET);
1898 else
1899 return false;
1900
1901 base = XVECEXP (base, 0, 1);
1902 break;
1903
1904 case UNSPEC_LTREL_BASE:
1905 if (XVECLEN (base, 0) == 1)
1906 base = fake_pool_base, literal_pool = true;
1907 else
1908 base = XVECEXP (base, 0, 1);
1909 break;
1910
1911 default:
1912 return false;
1913 }
1914
1915 if (!REG_P (base)
1916 || (GET_MODE (base) != SImode
1917 && GET_MODE (base) != Pmode))
1918 return false;
1919
1920 if (REGNO (base) == STACK_POINTER_REGNUM
1921 || REGNO (base) == FRAME_POINTER_REGNUM
1922 || ((reload_completed || reload_in_progress)
1923 && frame_pointer_needed
1924 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1925 || REGNO (base) == ARG_POINTER_REGNUM
1926 || (flag_pic
1927 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1928 pointer = base_ptr = true;
1929
1930 if ((reload_completed || reload_in_progress)
1931 && base == cfun->machine->base_reg)
1932 pointer = base_ptr = literal_pool = true;
1933 }
1934
1935 /* Validate index register. */
1936 if (indx)
1937 {
1938 if (GET_CODE (indx) == UNSPEC)
1939 switch (XINT (indx, 1))
1940 {
1941 case UNSPEC_LTREF:
1942 if (!disp)
1943 disp = gen_rtx_UNSPEC (Pmode,
1944 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1945 UNSPEC_LTREL_OFFSET);
1946 else
1947 return false;
1948
1949 indx = XVECEXP (indx, 0, 1);
1950 break;
1951
1952 case UNSPEC_LTREL_BASE:
1953 if (XVECLEN (indx, 0) == 1)
1954 indx = fake_pool_base, literal_pool = true;
1955 else
1956 indx = XVECEXP (indx, 0, 1);
1957 break;
1958
1959 default:
1960 return false;
1961 }
1962
1963 if (!REG_P (indx)
1964 || (GET_MODE (indx) != SImode
1965 && GET_MODE (indx) != Pmode))
1966 return false;
1967
1968 if (REGNO (indx) == STACK_POINTER_REGNUM
1969 || REGNO (indx) == FRAME_POINTER_REGNUM
1970 || ((reload_completed || reload_in_progress)
1971 && frame_pointer_needed
1972 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1973 || REGNO (indx) == ARG_POINTER_REGNUM
1974 || (flag_pic
1975 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1976 pointer = indx_ptr = true;
1977
1978 if ((reload_completed || reload_in_progress)
1979 && indx == cfun->machine->base_reg)
1980 pointer = indx_ptr = literal_pool = true;
1981 }
1982
1983 /* Prefer to use pointer as base, not index. */
1984 if (base && indx && !base_ptr
1985 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
1986 {
1987 rtx tmp = base;
1988 base = indx;
1989 indx = tmp;
1990 }
1991
1992 /* Validate displacement. */
1993 if (!disp)
1994 {
1995 /* If virtual registers are involved, the displacement will change later
1996 anyway as the virtual registers get eliminated. This could make a
1997 valid displacement invalid, but it is more likely to make an invalid
1998 displacement valid, because we sometimes access the register save area
1999 via negative offsets to one of those registers.
2000 Thus we don't check the displacement for validity here. If after
2001 elimination the displacement turns out to be invalid after all,
2002 this is fixed up by reload in any case. */
2003 if (base != arg_pointer_rtx
2004 && indx != arg_pointer_rtx
2005 && base != return_address_pointer_rtx
2006 && indx != return_address_pointer_rtx
2007 && base != frame_pointer_rtx
2008 && indx != frame_pointer_rtx
2009 && base != virtual_stack_vars_rtx
2010 && indx != virtual_stack_vars_rtx)
2011 if (!DISP_IN_RANGE (offset))
2012 return false;
2013 }
2014 else
2015 {
2016 /* All the special cases are pointers. */
2017 pointer = true;
2018
2019 /* In the small-PIC case, the linker converts @GOT
2020 and @GOTNTPOFF offsets to possible displacements. */
2021 if (GET_CODE (disp) == UNSPEC
2022 && (XINT (disp, 1) == UNSPEC_GOT
2023 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2024 && flag_pic == 1)
2025 {
2026 ;
2027 }
2028
2029 /* Accept pool label offsets. */
2030 else if (GET_CODE (disp) == UNSPEC
2031 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2032 ;
2033
2034 /* Accept literal pool references. */
2035 else if (GET_CODE (disp) == UNSPEC
2036 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2037 {
2038 /* In case CSE pulled a non literal pool reference out of
2039 the pool we have to reject the address. This is
2040 especially important when loading the GOT pointer on non
2041 zarch CPUs. In this case the literal pool contains an lt
2042 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2043 will most likely exceed the displacement. */
2044 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2045 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2046 return false;
2047
2048 orig_disp = gen_rtx_CONST (Pmode, disp);
2049 if (offset)
2050 {
2051 /* If we have an offset, make sure it does not
2052 exceed the size of the constant pool entry. */
2053 rtx sym = XVECEXP (disp, 0, 0);
2054 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2055 return false;
2056
2057 orig_disp = plus_constant (Pmode, orig_disp, offset);
2058 }
2059 }
2060
2061 else
2062 return false;
2063 }
2064
2065 if (!base && !indx)
2066 pointer = true;
2067
2068 if (out)
2069 {
2070 out->base = base;
2071 out->indx = indx;
2072 out->disp = orig_disp;
2073 out->pointer = pointer;
2074 out->literal_pool = literal_pool;
2075 }
2076
2077 return true;
2078 }
2079
2080 /* Decompose a RTL expression OP for a shift count into its components,
2081 and return the base register in BASE and the offset in OFFSET.
2082
2083 Return true if OP is a valid shift count, false if not. */
2084
2085 bool
2086 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2087 {
2088 HOST_WIDE_INT off = 0;
2089
2090 /* We can have an integer constant, an address register,
2091 or a sum of the two. */
2092 if (GET_CODE (op) == CONST_INT)
2093 {
2094 off = INTVAL (op);
2095 op = NULL_RTX;
2096 }
2097 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2098 {
2099 off = INTVAL (XEXP (op, 1));
2100 op = XEXP (op, 0);
2101 }
2102 while (op && GET_CODE (op) == SUBREG)
2103 op = SUBREG_REG (op);
2104
2105 if (op && GET_CODE (op) != REG)
2106 return false;
2107
2108 if (offset)
2109 *offset = off;
2110 if (base)
2111 *base = op;
2112
2113 return true;
2114 }
2115
2116
2117 /* Return true if CODE is a valid address without index. */
2118
2119 bool
2120 s390_legitimate_address_without_index_p (rtx op)
2121 {
2122 struct s390_address addr;
2123
2124 if (!s390_decompose_address (XEXP (op, 0), &addr))
2125 return false;
2126 if (addr.indx)
2127 return false;
2128
2129 return true;
2130 }
2131
2132
2133 /* Return true if ADDR is of kind symbol_ref or symbol_ref + const_int
2134 and return these parts in SYMREF and ADDEND. You can pass NULL in
2135 SYMREF and/or ADDEND if you are not interested in these values.
2136 Literal pool references are *not* considered symbol references. */
2137
2138 static bool
2139 s390_symref_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2140 {
2141 HOST_WIDE_INT tmpaddend = 0;
2142
2143 if (GET_CODE (addr) == CONST)
2144 addr = XEXP (addr, 0);
2145
2146 if (GET_CODE (addr) == PLUS)
2147 {
2148 if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
2149 && !CONSTANT_POOL_ADDRESS_P (XEXP (addr, 0))
2150 && CONST_INT_P (XEXP (addr, 1)))
2151 {
2152 tmpaddend = INTVAL (XEXP (addr, 1));
2153 addr = XEXP (addr, 0);
2154 }
2155 else
2156 return false;
2157 }
2158 else
2159 if (GET_CODE (addr) != SYMBOL_REF || CONSTANT_POOL_ADDRESS_P (addr))
2160 return false;
2161
2162 if (symref)
2163 *symref = addr;
2164 if (addend)
2165 *addend = tmpaddend;
2166
2167 return true;
2168 }
2169
2170 /* Return TRUE if ADDR is an operand valid for a load/store relative
2171 instructions. Be aware that the alignment of the operand needs to
2172 be checked separately. */
2173 static bool
2174 s390_loadrelative_operand_p (rtx addr)
2175 {
2176 if (GET_CODE (addr) == CONST)
2177 addr = XEXP (addr, 0);
2178
2179 /* Enable load relative for symbol@GOTENT. */
2180 if (GET_CODE (addr) == UNSPEC
2181 && XINT (addr, 1) == UNSPEC_GOTENT)
2182 return true;
2183
2184 return s390_symref_operand_p (addr, NULL, NULL);
2185 }
2186
2187 /* Return true if the address in OP is valid for constraint letter C
2188 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2189 pool MEMs should be accepted. Only the Q, R, S, T constraint
2190 letters are allowed for C. */
2191
2192 static int
2193 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2194 {
2195 struct s390_address addr;
2196 bool decomposed = false;
2197
2198 /* This check makes sure that no symbolic address (except literal
2199 pool references) are accepted by the R or T constraints. */
2200 if (s390_loadrelative_operand_p (op))
2201 return 0;
2202
2203 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2204 if (!lit_pool_ok)
2205 {
2206 if (!s390_decompose_address (op, &addr))
2207 return 0;
2208 if (addr.literal_pool)
2209 return 0;
2210 decomposed = true;
2211 }
2212
2213 switch (c)
2214 {
2215 case 'Q': /* no index short displacement */
2216 if (!decomposed && !s390_decompose_address (op, &addr))
2217 return 0;
2218 if (addr.indx)
2219 return 0;
2220 if (!s390_short_displacement (addr.disp))
2221 return 0;
2222 break;
2223
2224 case 'R': /* with index short displacement */
2225 if (TARGET_LONG_DISPLACEMENT)
2226 {
2227 if (!decomposed && !s390_decompose_address (op, &addr))
2228 return 0;
2229 if (!s390_short_displacement (addr.disp))
2230 return 0;
2231 }
2232 /* Any invalid address here will be fixed up by reload,
2233 so accept it for the most generic constraint. */
2234 break;
2235
2236 case 'S': /* no index long displacement */
2237 if (!TARGET_LONG_DISPLACEMENT)
2238 return 0;
2239 if (!decomposed && !s390_decompose_address (op, &addr))
2240 return 0;
2241 if (addr.indx)
2242 return 0;
2243 if (s390_short_displacement (addr.disp))
2244 return 0;
2245 break;
2246
2247 case 'T': /* with index long displacement */
2248 if (!TARGET_LONG_DISPLACEMENT)
2249 return 0;
2250 /* Any invalid address here will be fixed up by reload,
2251 so accept it for the most generic constraint. */
2252 if ((decomposed || s390_decompose_address (op, &addr))
2253 && s390_short_displacement (addr.disp))
2254 return 0;
2255 break;
2256 default:
2257 return 0;
2258 }
2259 return 1;
2260 }
2261
2262
2263 /* Evaluates constraint strings described by the regular expression
2264 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2265 the constraint given in STR, or 0 else. */
2266
2267 int
2268 s390_mem_constraint (const char *str, rtx op)
2269 {
2270 char c = str[0];
2271
2272 switch (c)
2273 {
2274 case 'A':
2275 /* Check for offsettable variants of memory constraints. */
2276 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2277 return 0;
2278 if ((reload_completed || reload_in_progress)
2279 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2280 return 0;
2281 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2282 case 'B':
2283 /* Check for non-literal-pool variants of memory constraints. */
2284 if (!MEM_P (op))
2285 return 0;
2286 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2287 case 'Q':
2288 case 'R':
2289 case 'S':
2290 case 'T':
2291 if (GET_CODE (op) != MEM)
2292 return 0;
2293 return s390_check_qrst_address (c, XEXP (op, 0), true);
2294 case 'U':
2295 return (s390_check_qrst_address ('Q', op, true)
2296 || s390_check_qrst_address ('R', op, true));
2297 case 'W':
2298 return (s390_check_qrst_address ('S', op, true)
2299 || s390_check_qrst_address ('T', op, true));
2300 case 'Y':
2301 /* Simply check for the basic form of a shift count. Reload will
2302 take care of making sure we have a proper base register. */
2303 if (!s390_decompose_shift_count (op, NULL, NULL))
2304 return 0;
2305 break;
2306 case 'Z':
2307 return s390_check_qrst_address (str[1], op, true);
2308 default:
2309 return 0;
2310 }
2311 return 1;
2312 }
2313
2314
2315 /* Evaluates constraint strings starting with letter O. Input
2316 parameter C is the second letter following the "O" in the constraint
2317 string. Returns 1 if VALUE meets the respective constraint and 0
2318 otherwise. */
2319
2320 int
2321 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2322 {
2323 if (!TARGET_EXTIMM)
2324 return 0;
2325
2326 switch (c)
2327 {
2328 case 's':
2329 return trunc_int_for_mode (value, SImode) == value;
2330
2331 case 'p':
2332 return value == 0
2333 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2334
2335 case 'n':
2336 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2337
2338 default:
2339 gcc_unreachable ();
2340 }
2341 }
2342
2343
2344 /* Evaluates constraint strings starting with letter N. Parameter STR
2345 contains the letters following letter "N" in the constraint string.
2346 Returns true if VALUE matches the constraint. */
2347
2348 int
2349 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2350 {
2351 enum machine_mode mode, part_mode;
2352 int def;
2353 int part, part_goal;
2354
2355
2356 if (str[0] == 'x')
2357 part_goal = -1;
2358 else
2359 part_goal = str[0] - '0';
2360
2361 switch (str[1])
2362 {
2363 case 'Q':
2364 part_mode = QImode;
2365 break;
2366 case 'H':
2367 part_mode = HImode;
2368 break;
2369 case 'S':
2370 part_mode = SImode;
2371 break;
2372 default:
2373 return 0;
2374 }
2375
2376 switch (str[2])
2377 {
2378 case 'H':
2379 mode = HImode;
2380 break;
2381 case 'S':
2382 mode = SImode;
2383 break;
2384 case 'D':
2385 mode = DImode;
2386 break;
2387 default:
2388 return 0;
2389 }
2390
2391 switch (str[3])
2392 {
2393 case '0':
2394 def = 0;
2395 break;
2396 case 'F':
2397 def = -1;
2398 break;
2399 default:
2400 return 0;
2401 }
2402
2403 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2404 return 0;
2405
2406 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2407 if (part < 0)
2408 return 0;
2409 if (part_goal != -1 && part_goal != part)
2410 return 0;
2411
2412 return 1;
2413 }
2414
2415
2416 /* Returns true if the input parameter VALUE is a float zero. */
2417
2418 int
2419 s390_float_const_zero_p (rtx value)
2420 {
2421 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2422 && value == CONST0_RTX (GET_MODE (value)));
2423 }
2424
2425 /* Implement TARGET_REGISTER_MOVE_COST. */
2426
2427 static int
2428 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2429 reg_class_t from, reg_class_t to)
2430 {
2431 /* On s390, copy between fprs and gprs is expensive. */
2432 if ((reg_classes_intersect_p (from, GENERAL_REGS)
2433 && reg_classes_intersect_p (to, FP_REGS))
2434 || (reg_classes_intersect_p (from, FP_REGS)
2435 && reg_classes_intersect_p (to, GENERAL_REGS)))
2436 return 10;
2437
2438 return 1;
2439 }
2440
2441 /* Implement TARGET_MEMORY_MOVE_COST. */
2442
2443 static int
2444 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2445 reg_class_t rclass ATTRIBUTE_UNUSED,
2446 bool in ATTRIBUTE_UNUSED)
2447 {
2448 return 1;
2449 }
2450
2451 /* Compute a (partial) cost for rtx X. Return true if the complete
2452 cost has been computed, and false if subexpressions should be
2453 scanned. In either case, *TOTAL contains the cost result.
2454 CODE contains GET_CODE (x), OUTER_CODE contains the code
2455 of the superexpression of x. */
2456
2457 static bool
2458 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2459 int *total, bool speed ATTRIBUTE_UNUSED)
2460 {
2461 switch (code)
2462 {
2463 case CONST:
2464 case CONST_INT:
2465 case LABEL_REF:
2466 case SYMBOL_REF:
2467 case CONST_DOUBLE:
2468 case MEM:
2469 *total = 0;
2470 return true;
2471
2472 case ASHIFT:
2473 case ASHIFTRT:
2474 case LSHIFTRT:
2475 case ROTATE:
2476 case ROTATERT:
2477 case AND:
2478 case IOR:
2479 case XOR:
2480 case NEG:
2481 case NOT:
2482 *total = COSTS_N_INSNS (1);
2483 return false;
2484
2485 case PLUS:
2486 case MINUS:
2487 *total = COSTS_N_INSNS (1);
2488 return false;
2489
2490 case MULT:
2491 switch (GET_MODE (x))
2492 {
2493 case SImode:
2494 {
2495 rtx left = XEXP (x, 0);
2496 rtx right = XEXP (x, 1);
2497 if (GET_CODE (right) == CONST_INT
2498 && CONST_OK_FOR_K (INTVAL (right)))
2499 *total = s390_cost->mhi;
2500 else if (GET_CODE (left) == SIGN_EXTEND)
2501 *total = s390_cost->mh;
2502 else
2503 *total = s390_cost->ms; /* msr, ms, msy */
2504 break;
2505 }
2506 case DImode:
2507 {
2508 rtx left = XEXP (x, 0);
2509 rtx right = XEXP (x, 1);
2510 if (TARGET_ZARCH)
2511 {
2512 if (GET_CODE (right) == CONST_INT
2513 && CONST_OK_FOR_K (INTVAL (right)))
2514 *total = s390_cost->mghi;
2515 else if (GET_CODE (left) == SIGN_EXTEND)
2516 *total = s390_cost->msgf;
2517 else
2518 *total = s390_cost->msg; /* msgr, msg */
2519 }
2520 else /* TARGET_31BIT */
2521 {
2522 if (GET_CODE (left) == SIGN_EXTEND
2523 && GET_CODE (right) == SIGN_EXTEND)
2524 /* mulsidi case: mr, m */
2525 *total = s390_cost->m;
2526 else if (GET_CODE (left) == ZERO_EXTEND
2527 && GET_CODE (right) == ZERO_EXTEND
2528 && TARGET_CPU_ZARCH)
2529 /* umulsidi case: ml, mlr */
2530 *total = s390_cost->ml;
2531 else
2532 /* Complex calculation is required. */
2533 *total = COSTS_N_INSNS (40);
2534 }
2535 break;
2536 }
2537 case SFmode:
2538 case DFmode:
2539 *total = s390_cost->mult_df;
2540 break;
2541 case TFmode:
2542 *total = s390_cost->mxbr;
2543 break;
2544 default:
2545 return false;
2546 }
2547 return false;
2548
2549 case FMA:
2550 switch (GET_MODE (x))
2551 {
2552 case DFmode:
2553 *total = s390_cost->madbr;
2554 break;
2555 case SFmode:
2556 *total = s390_cost->maebr;
2557 break;
2558 default:
2559 return false;
2560 }
2561 /* Negate in the third argument is free: FMSUB. */
2562 if (GET_CODE (XEXP (x, 2)) == NEG)
2563 {
2564 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2565 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2566 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2567 return true;
2568 }
2569 return false;
2570
2571 case UDIV:
2572 case UMOD:
2573 if (GET_MODE (x) == TImode) /* 128 bit division */
2574 *total = s390_cost->dlgr;
2575 else if (GET_MODE (x) == DImode)
2576 {
2577 rtx right = XEXP (x, 1);
2578 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2579 *total = s390_cost->dlr;
2580 else /* 64 by 64 bit division */
2581 *total = s390_cost->dlgr;
2582 }
2583 else if (GET_MODE (x) == SImode) /* 32 bit division */
2584 *total = s390_cost->dlr;
2585 return false;
2586
2587 case DIV:
2588 case MOD:
2589 if (GET_MODE (x) == DImode)
2590 {
2591 rtx right = XEXP (x, 1);
2592 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2593 if (TARGET_ZARCH)
2594 *total = s390_cost->dsgfr;
2595 else
2596 *total = s390_cost->dr;
2597 else /* 64 by 64 bit division */
2598 *total = s390_cost->dsgr;
2599 }
2600 else if (GET_MODE (x) == SImode) /* 32 bit division */
2601 *total = s390_cost->dlr;
2602 else if (GET_MODE (x) == SFmode)
2603 {
2604 *total = s390_cost->debr;
2605 }
2606 else if (GET_MODE (x) == DFmode)
2607 {
2608 *total = s390_cost->ddbr;
2609 }
2610 else if (GET_MODE (x) == TFmode)
2611 {
2612 *total = s390_cost->dxbr;
2613 }
2614 return false;
2615
2616 case SQRT:
2617 if (GET_MODE (x) == SFmode)
2618 *total = s390_cost->sqebr;
2619 else if (GET_MODE (x) == DFmode)
2620 *total = s390_cost->sqdbr;
2621 else /* TFmode */
2622 *total = s390_cost->sqxbr;
2623 return false;
2624
2625 case SIGN_EXTEND:
2626 case ZERO_EXTEND:
2627 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2628 || outer_code == PLUS || outer_code == MINUS
2629 || outer_code == COMPARE)
2630 *total = 0;
2631 return false;
2632
2633 case COMPARE:
2634 *total = COSTS_N_INSNS (1);
2635 if (GET_CODE (XEXP (x, 0)) == AND
2636 && GET_CODE (XEXP (x, 1)) == CONST_INT
2637 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2638 {
2639 rtx op0 = XEXP (XEXP (x, 0), 0);
2640 rtx op1 = XEXP (XEXP (x, 0), 1);
2641 rtx op2 = XEXP (x, 1);
2642
2643 if (memory_operand (op0, GET_MODE (op0))
2644 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2645 return true;
2646 if (register_operand (op0, GET_MODE (op0))
2647 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2648 return true;
2649 }
2650 return false;
2651
2652 default:
2653 return false;
2654 }
2655 }
2656
2657 /* Return the cost of an address rtx ADDR. */
2658
2659 static int
2660 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2661 addr_space_t as ATTRIBUTE_UNUSED,
2662 bool speed ATTRIBUTE_UNUSED)
2663 {
2664 struct s390_address ad;
2665 if (!s390_decompose_address (addr, &ad))
2666 return 1000;
2667
2668 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2669 }
2670
2671 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2672 otherwise return 0. */
2673
2674 int
2675 tls_symbolic_operand (rtx op)
2676 {
2677 if (GET_CODE (op) != SYMBOL_REF)
2678 return 0;
2679 return SYMBOL_REF_TLS_MODEL (op);
2680 }
2681 \f
2682 /* Split DImode access register reference REG (on 64-bit) into its constituent
2683 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2684 gen_highpart cannot be used as they assume all registers are word-sized,
2685 while our access registers have only half that size. */
2686
2687 void
2688 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2689 {
2690 gcc_assert (TARGET_64BIT);
2691 gcc_assert (ACCESS_REG_P (reg));
2692 gcc_assert (GET_MODE (reg) == DImode);
2693 gcc_assert (!(REGNO (reg) & 1));
2694
2695 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2696 *hi = gen_rtx_REG (SImode, REGNO (reg));
2697 }
2698
2699 /* Return true if OP contains a symbol reference */
2700
2701 bool
2702 symbolic_reference_mentioned_p (rtx op)
2703 {
2704 const char *fmt;
2705 int i;
2706
2707 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2708 return 1;
2709
2710 fmt = GET_RTX_FORMAT (GET_CODE (op));
2711 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2712 {
2713 if (fmt[i] == 'E')
2714 {
2715 int j;
2716
2717 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2718 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2719 return 1;
2720 }
2721
2722 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2723 return 1;
2724 }
2725
2726 return 0;
2727 }
2728
2729 /* Return true if OP contains a reference to a thread-local symbol. */
2730
2731 bool
2732 tls_symbolic_reference_mentioned_p (rtx op)
2733 {
2734 const char *fmt;
2735 int i;
2736
2737 if (GET_CODE (op) == SYMBOL_REF)
2738 return tls_symbolic_operand (op);
2739
2740 fmt = GET_RTX_FORMAT (GET_CODE (op));
2741 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2742 {
2743 if (fmt[i] == 'E')
2744 {
2745 int j;
2746
2747 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2748 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2749 return true;
2750 }
2751
2752 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2753 return true;
2754 }
2755
2756 return false;
2757 }
2758
2759
2760 /* Return true if OP is a legitimate general operand when
2761 generating PIC code. It is given that flag_pic is on
2762 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2763
2764 int
2765 legitimate_pic_operand_p (rtx op)
2766 {
2767 /* Accept all non-symbolic constants. */
2768 if (!SYMBOLIC_CONST (op))
2769 return 1;
2770
2771 /* Reject everything else; must be handled
2772 via emit_symbolic_move. */
2773 return 0;
2774 }
2775
2776 /* Returns true if the constant value OP is a legitimate general operand.
2777 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2778
2779 static bool
2780 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2781 {
2782 /* Accept all non-symbolic constants. */
2783 if (!SYMBOLIC_CONST (op))
2784 return 1;
2785
2786 /* Accept immediate LARL operands. */
2787 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2788 return 1;
2789
2790 /* Thread-local symbols are never legal constants. This is
2791 so that emit_call knows that computing such addresses
2792 might require a function call. */
2793 if (TLS_SYMBOLIC_CONST (op))
2794 return 0;
2795
2796 /* In the PIC case, symbolic constants must *not* be
2797 forced into the literal pool. We accept them here,
2798 so that they will be handled by emit_symbolic_move. */
2799 if (flag_pic)
2800 return 1;
2801
2802 /* All remaining non-PIC symbolic constants are
2803 forced into the literal pool. */
2804 return 0;
2805 }
2806
2807 /* Determine if it's legal to put X into the constant pool. This
2808 is not possible if X contains the address of a symbol that is
2809 not constant (TLS) or not known at final link time (PIC). */
2810
2811 static bool
2812 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2813 {
2814 switch (GET_CODE (x))
2815 {
2816 case CONST_INT:
2817 case CONST_DOUBLE:
2818 /* Accept all non-symbolic constants. */
2819 return false;
2820
2821 case LABEL_REF:
2822 /* Labels are OK iff we are non-PIC. */
2823 return flag_pic != 0;
2824
2825 case SYMBOL_REF:
2826 /* 'Naked' TLS symbol references are never OK,
2827 non-TLS symbols are OK iff we are non-PIC. */
2828 if (tls_symbolic_operand (x))
2829 return true;
2830 else
2831 return flag_pic != 0;
2832
2833 case CONST:
2834 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2835 case PLUS:
2836 case MINUS:
2837 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2838 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2839
2840 case UNSPEC:
2841 switch (XINT (x, 1))
2842 {
2843 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2844 case UNSPEC_LTREL_OFFSET:
2845 case UNSPEC_GOT:
2846 case UNSPEC_GOTOFF:
2847 case UNSPEC_PLTOFF:
2848 case UNSPEC_TLSGD:
2849 case UNSPEC_TLSLDM:
2850 case UNSPEC_NTPOFF:
2851 case UNSPEC_DTPOFF:
2852 case UNSPEC_GOTNTPOFF:
2853 case UNSPEC_INDNTPOFF:
2854 return false;
2855
2856 /* If the literal pool shares the code section, be put
2857 execute template placeholders into the pool as well. */
2858 case UNSPEC_INSN:
2859 return TARGET_CPU_ZARCH;
2860
2861 default:
2862 return true;
2863 }
2864 break;
2865
2866 default:
2867 gcc_unreachable ();
2868 }
2869 }
2870
2871 /* Returns true if the constant value OP is a legitimate general
2872 operand during and after reload. The difference to
2873 legitimate_constant_p is that this function will not accept
2874 a constant that would need to be forced to the literal pool
2875 before it can be used as operand.
2876 This function accepts all constants which can be loaded directly
2877 into a GPR. */
2878
2879 bool
2880 legitimate_reload_constant_p (rtx op)
2881 {
2882 /* Accept la(y) operands. */
2883 if (GET_CODE (op) == CONST_INT
2884 && DISP_IN_RANGE (INTVAL (op)))
2885 return true;
2886
2887 /* Accept l(g)hi/l(g)fi operands. */
2888 if (GET_CODE (op) == CONST_INT
2889 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2890 return true;
2891
2892 /* Accept lliXX operands. */
2893 if (TARGET_ZARCH
2894 && GET_CODE (op) == CONST_INT
2895 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2896 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2897 return true;
2898
2899 if (TARGET_EXTIMM
2900 && GET_CODE (op) == CONST_INT
2901 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2902 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2903 return true;
2904
2905 /* Accept larl operands. */
2906 if (TARGET_CPU_ZARCH
2907 && larl_operand (op, VOIDmode))
2908 return true;
2909
2910 /* Accept floating-point zero operands that fit into a single GPR. */
2911 if (GET_CODE (op) == CONST_DOUBLE
2912 && s390_float_const_zero_p (op)
2913 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2914 return true;
2915
2916 /* Accept double-word operands that can be split. */
2917 if (GET_CODE (op) == CONST_INT
2918 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2919 {
2920 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2921 rtx hi = operand_subword (op, 0, 0, dword_mode);
2922 rtx lo = operand_subword (op, 1, 0, dword_mode);
2923 return legitimate_reload_constant_p (hi)
2924 && legitimate_reload_constant_p (lo);
2925 }
2926
2927 /* Everything else cannot be handled without reload. */
2928 return false;
2929 }
2930
2931 /* Returns true if the constant value OP is a legitimate fp operand
2932 during and after reload.
2933 This function accepts all constants which can be loaded directly
2934 into an FPR. */
2935
2936 static bool
2937 legitimate_reload_fp_constant_p (rtx op)
2938 {
2939 /* Accept floating-point zero operands if the load zero instruction
2940 can be used. Prior to z196 the load fp zero instruction caused a
2941 performance penalty if the result is used as BFP number. */
2942 if (TARGET_Z196
2943 && GET_CODE (op) == CONST_DOUBLE
2944 && s390_float_const_zero_p (op))
2945 return true;
2946
2947 return false;
2948 }
2949
2950 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2951 return the class of reg to actually use. */
2952
2953 static reg_class_t
2954 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2955 {
2956 switch (GET_CODE (op))
2957 {
2958 /* Constants we cannot reload into general registers
2959 must be forced into the literal pool. */
2960 case CONST_DOUBLE:
2961 case CONST_INT:
2962 if (reg_class_subset_p (GENERAL_REGS, rclass)
2963 && legitimate_reload_constant_p (op))
2964 return GENERAL_REGS;
2965 else if (reg_class_subset_p (ADDR_REGS, rclass)
2966 && legitimate_reload_constant_p (op))
2967 return ADDR_REGS;
2968 else if (reg_class_subset_p (FP_REGS, rclass)
2969 && legitimate_reload_fp_constant_p (op))
2970 return FP_REGS;
2971 return NO_REGS;
2972
2973 /* If a symbolic constant or a PLUS is reloaded,
2974 it is most likely being used as an address, so
2975 prefer ADDR_REGS. If 'class' is not a superset
2976 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2977 case LABEL_REF:
2978 case SYMBOL_REF:
2979 case CONST:
2980 if (!legitimate_reload_constant_p (op))
2981 return NO_REGS;
2982 /* fallthrough */
2983 case PLUS:
2984 /* load address will be used. */
2985 if (reg_class_subset_p (ADDR_REGS, rclass))
2986 return ADDR_REGS;
2987 else
2988 return NO_REGS;
2989
2990 default:
2991 break;
2992 }
2993
2994 return rclass;
2995 }
2996
2997 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
2998 multiple of ALIGNMENT and the SYMBOL_REF being naturally
2999 aligned. */
3000
3001 bool
3002 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3003 {
3004 HOST_WIDE_INT addend;
3005 rtx symref;
3006
3007 /* Accept symbol@GOTENT with pointer size alignment. */
3008 if (GET_CODE (addr) == CONST
3009 && GET_CODE (XEXP (addr, 0)) == UNSPEC
3010 && XINT (XEXP (addr, 0), 1) == UNSPEC_GOTENT
3011 && alignment <= UNITS_PER_LONG)
3012 return true;
3013
3014 if (!s390_symref_operand_p (addr, &symref, &addend))
3015 return false;
3016
3017 return (!SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)
3018 && !(addend & (alignment - 1)));
3019 }
3020
3021 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3022 operand SCRATCH is used to reload the even part of the address and
3023 adding one. */
3024
3025 void
3026 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3027 {
3028 HOST_WIDE_INT addend;
3029 rtx symref;
3030
3031 if (!s390_symref_operand_p (addr, &symref, &addend))
3032 gcc_unreachable ();
3033
3034 if (!(addend & 1))
3035 /* Easy case. The addend is even so larl will do fine. */
3036 emit_move_insn (reg, addr);
3037 else
3038 {
3039 /* We can leave the scratch register untouched if the target
3040 register is a valid base register. */
3041 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3042 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3043 scratch = reg;
3044
3045 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3046 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3047
3048 if (addend != 1)
3049 emit_move_insn (scratch,
3050 gen_rtx_CONST (Pmode,
3051 gen_rtx_PLUS (Pmode, symref,
3052 GEN_INT (addend - 1))));
3053 else
3054 emit_move_insn (scratch, symref);
3055
3056 /* Increment the address using la in order to avoid clobbering cc. */
3057 emit_move_insn (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3058 }
3059 }
3060
3061 /* Generate what is necessary to move between REG and MEM using
3062 SCRATCH. The direction is given by TOMEM. */
3063
3064 void
3065 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3066 {
3067 /* Reload might have pulled a constant out of the literal pool.
3068 Force it back in. */
3069 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3070 || GET_CODE (mem) == CONST)
3071 mem = force_const_mem (GET_MODE (reg), mem);
3072
3073 gcc_assert (MEM_P (mem));
3074
3075 /* For a load from memory we can leave the scratch register
3076 untouched if the target register is a valid base register. */
3077 if (!tomem
3078 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3079 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3080 && GET_MODE (reg) == GET_MODE (scratch))
3081 scratch = reg;
3082
3083 /* Load address into scratch register. Since we can't have a
3084 secondary reload for a secondary reload we have to cover the case
3085 where larl would need a secondary reload here as well. */
3086 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3087
3088 /* Now we can use a standard load/store to do the move. */
3089 if (tomem)
3090 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3091 else
3092 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3093 }
3094
3095 /* Inform reload about cases where moving X with a mode MODE to a register in
3096 RCLASS requires an extra scratch or immediate register. Return the class
3097 needed for the immediate register. */
3098
3099 static reg_class_t
3100 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3101 enum machine_mode mode, secondary_reload_info *sri)
3102 {
3103 enum reg_class rclass = (enum reg_class) rclass_i;
3104
3105 /* Intermediate register needed. */
3106 if (reg_classes_intersect_p (CC_REGS, rclass))
3107 return GENERAL_REGS;
3108
3109 if (TARGET_Z10)
3110 {
3111 HOST_WIDE_INT offset;
3112 rtx symref;
3113
3114 /* On z10 several optimizer steps may generate larl operands with
3115 an odd addend. */
3116 if (in_p
3117 && s390_symref_operand_p (x, &symref, &offset)
3118 && mode == Pmode
3119 && !SYMBOL_REF_ALIGN1_P (symref)
3120 && (offset & 1) == 1)
3121 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3122 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3123
3124 /* On z10 we need a scratch register when moving QI, TI or floating
3125 point mode values from or to a memory location with a SYMBOL_REF
3126 or if the symref addend of a SI or DI move is not aligned to the
3127 width of the access. */
3128 if (MEM_P (x)
3129 && s390_symref_operand_p (XEXP (x, 0), NULL, NULL)
3130 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3131 || (!TARGET_ZARCH && mode == DImode)
3132 || ((mode == HImode || mode == SImode || mode == DImode)
3133 && (!s390_check_symref_alignment (XEXP (x, 0),
3134 GET_MODE_SIZE (mode))))))
3135 {
3136 #define __SECONDARY_RELOAD_CASE(M,m) \
3137 case M##mode: \
3138 if (TARGET_64BIT) \
3139 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3140 CODE_FOR_reload##m##di_tomem_z10; \
3141 else \
3142 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3143 CODE_FOR_reload##m##si_tomem_z10; \
3144 break;
3145
3146 switch (GET_MODE (x))
3147 {
3148 __SECONDARY_RELOAD_CASE (QI, qi);
3149 __SECONDARY_RELOAD_CASE (HI, hi);
3150 __SECONDARY_RELOAD_CASE (SI, si);
3151 __SECONDARY_RELOAD_CASE (DI, di);
3152 __SECONDARY_RELOAD_CASE (TI, ti);
3153 __SECONDARY_RELOAD_CASE (SF, sf);
3154 __SECONDARY_RELOAD_CASE (DF, df);
3155 __SECONDARY_RELOAD_CASE (TF, tf);
3156 __SECONDARY_RELOAD_CASE (SD, sd);
3157 __SECONDARY_RELOAD_CASE (DD, dd);
3158 __SECONDARY_RELOAD_CASE (TD, td);
3159
3160 default:
3161 gcc_unreachable ();
3162 }
3163 #undef __SECONDARY_RELOAD_CASE
3164 }
3165 }
3166
3167 /* We need a scratch register when loading a PLUS expression which
3168 is not a legitimate operand of the LOAD ADDRESS instruction. */
3169 if (in_p && s390_plus_operand (x, mode))
3170 sri->icode = (TARGET_64BIT ?
3171 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3172
3173 /* Performing a multiword move from or to memory we have to make sure the
3174 second chunk in memory is addressable without causing a displacement
3175 overflow. If that would be the case we calculate the address in
3176 a scratch register. */
3177 if (MEM_P (x)
3178 && GET_CODE (XEXP (x, 0)) == PLUS
3179 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3180 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3181 + GET_MODE_SIZE (mode) - 1))
3182 {
3183 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3184 in a s_operand address since we may fallback to lm/stm. So we only
3185 have to care about overflows in the b+i+d case. */
3186 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3187 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3188 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3189 /* For FP_REGS no lm/stm is available so this check is triggered
3190 for displacement overflows in b+i+d and b+d like addresses. */
3191 || (reg_classes_intersect_p (FP_REGS, rclass)
3192 && s390_class_max_nregs (FP_REGS, mode) > 1))
3193 {
3194 if (in_p)
3195 sri->icode = (TARGET_64BIT ?
3196 CODE_FOR_reloaddi_nonoffmem_in :
3197 CODE_FOR_reloadsi_nonoffmem_in);
3198 else
3199 sri->icode = (TARGET_64BIT ?
3200 CODE_FOR_reloaddi_nonoffmem_out :
3201 CODE_FOR_reloadsi_nonoffmem_out);
3202 }
3203 }
3204
3205 /* A scratch address register is needed when a symbolic constant is
3206 copied to r0 compiling with -fPIC. In other cases the target
3207 register might be used as temporary (see legitimize_pic_address). */
3208 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3209 sri->icode = (TARGET_64BIT ?
3210 CODE_FOR_reloaddi_PIC_addr :
3211 CODE_FOR_reloadsi_PIC_addr);
3212
3213 /* Either scratch or no register needed. */
3214 return NO_REGS;
3215 }
3216
3217 /* Generate code to load SRC, which is PLUS that is not a
3218 legitimate operand for the LA instruction, into TARGET.
3219 SCRATCH may be used as scratch register. */
3220
3221 void
3222 s390_expand_plus_operand (rtx target, rtx src,
3223 rtx scratch)
3224 {
3225 rtx sum1, sum2;
3226 struct s390_address ad;
3227
3228 /* src must be a PLUS; get its two operands. */
3229 gcc_assert (GET_CODE (src) == PLUS);
3230 gcc_assert (GET_MODE (src) == Pmode);
3231
3232 /* Check if any of the two operands is already scheduled
3233 for replacement by reload. This can happen e.g. when
3234 float registers occur in an address. */
3235 sum1 = find_replacement (&XEXP (src, 0));
3236 sum2 = find_replacement (&XEXP (src, 1));
3237 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3238
3239 /* If the address is already strictly valid, there's nothing to do. */
3240 if (!s390_decompose_address (src, &ad)
3241 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3242 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3243 {
3244 /* Otherwise, one of the operands cannot be an address register;
3245 we reload its value into the scratch register. */
3246 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3247 {
3248 emit_move_insn (scratch, sum1);
3249 sum1 = scratch;
3250 }
3251 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3252 {
3253 emit_move_insn (scratch, sum2);
3254 sum2 = scratch;
3255 }
3256
3257 /* According to the way these invalid addresses are generated
3258 in reload.c, it should never happen (at least on s390) that
3259 *neither* of the PLUS components, after find_replacements
3260 was applied, is an address register. */
3261 if (sum1 == scratch && sum2 == scratch)
3262 {
3263 debug_rtx (src);
3264 gcc_unreachable ();
3265 }
3266
3267 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3268 }
3269
3270 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3271 is only ever performed on addresses, so we can mark the
3272 sum as legitimate for LA in any case. */
3273 s390_load_address (target, src);
3274 }
3275
3276
3277 /* Return true if ADDR is a valid memory address.
3278 STRICT specifies whether strict register checking applies. */
3279
3280 static bool
3281 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3282 {
3283 struct s390_address ad;
3284
3285 if (TARGET_Z10
3286 && larl_operand (addr, VOIDmode)
3287 && (mode == VOIDmode
3288 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3289 return true;
3290
3291 if (!s390_decompose_address (addr, &ad))
3292 return false;
3293
3294 if (strict)
3295 {
3296 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3297 return false;
3298
3299 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3300 return false;
3301 }
3302 else
3303 {
3304 if (ad.base
3305 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3306 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3307 return false;
3308
3309 if (ad.indx
3310 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3311 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3312 return false;
3313 }
3314 return true;
3315 }
3316
3317 /* Return true if OP is a valid operand for the LA instruction.
3318 In 31-bit, we need to prove that the result is used as an
3319 address, as LA performs only a 31-bit addition. */
3320
3321 bool
3322 legitimate_la_operand_p (rtx op)
3323 {
3324 struct s390_address addr;
3325 if (!s390_decompose_address (op, &addr))
3326 return false;
3327
3328 return (TARGET_64BIT || addr.pointer);
3329 }
3330
3331 /* Return true if it is valid *and* preferable to use LA to
3332 compute the sum of OP1 and OP2. */
3333
3334 bool
3335 preferred_la_operand_p (rtx op1, rtx op2)
3336 {
3337 struct s390_address addr;
3338
3339 if (op2 != const0_rtx)
3340 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3341
3342 if (!s390_decompose_address (op1, &addr))
3343 return false;
3344 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3345 return false;
3346 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3347 return false;
3348
3349 /* Avoid LA instructions with index register on z196; it is
3350 preferable to use regular add instructions when possible.
3351 Starting with zEC12 the la with index register is "uncracked"
3352 again. */
3353 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3354 return false;
3355
3356 if (!TARGET_64BIT && !addr.pointer)
3357 return false;
3358
3359 if (addr.pointer)
3360 return true;
3361
3362 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3363 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3364 return true;
3365
3366 return false;
3367 }
3368
3369 /* Emit a forced load-address operation to load SRC into DST.
3370 This will use the LOAD ADDRESS instruction even in situations
3371 where legitimate_la_operand_p (SRC) returns false. */
3372
3373 void
3374 s390_load_address (rtx dst, rtx src)
3375 {
3376 if (TARGET_64BIT)
3377 emit_move_insn (dst, src);
3378 else
3379 emit_insn (gen_force_la_31 (dst, src));
3380 }
3381
3382 /* Return a legitimate reference for ORIG (an address) using the
3383 register REG. If REG is 0, a new pseudo is generated.
3384
3385 There are two types of references that must be handled:
3386
3387 1. Global data references must load the address from the GOT, via
3388 the PIC reg. An insn is emitted to do this load, and the reg is
3389 returned.
3390
3391 2. Static data references, constant pool addresses, and code labels
3392 compute the address as an offset from the GOT, whose base is in
3393 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3394 differentiate them from global data objects. The returned
3395 address is the PIC reg + an unspec constant.
3396
3397 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3398 reg also appears in the address. */
3399
3400 rtx
3401 legitimize_pic_address (rtx orig, rtx reg)
3402 {
3403 rtx addr = orig;
3404 rtx new_rtx = orig;
3405 rtx base;
3406
3407 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3408
3409 if (GET_CODE (addr) == LABEL_REF
3410 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr)))
3411 {
3412 /* This is a local symbol. */
3413 if (TARGET_CPU_ZARCH && larl_operand (addr, VOIDmode))
3414 {
3415 /* Access local symbols PC-relative via LARL.
3416 This is the same as in the non-PIC case, so it is
3417 handled automatically ... */
3418 }
3419 else
3420 {
3421 /* Access local symbols relative to the GOT. */
3422
3423 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3424
3425 if (reload_in_progress || reload_completed)
3426 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3427
3428 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3429 addr = gen_rtx_CONST (Pmode, addr);
3430 addr = force_const_mem (Pmode, addr);
3431 emit_move_insn (temp, addr);
3432
3433 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3434 if (reg != 0)
3435 {
3436 s390_load_address (reg, new_rtx);
3437 new_rtx = reg;
3438 }
3439 }
3440 }
3441 else if (GET_CODE (addr) == SYMBOL_REF)
3442 {
3443 if (reg == 0)
3444 reg = gen_reg_rtx (Pmode);
3445
3446 if (flag_pic == 1)
3447 {
3448 /* Assume GOT offset < 4k. This is handled the same way
3449 in both 31- and 64-bit code (@GOT). */
3450
3451 if (reload_in_progress || reload_completed)
3452 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3453
3454 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3455 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3456 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3457 new_rtx = gen_const_mem (Pmode, new_rtx);
3458 emit_move_insn (reg, new_rtx);
3459 new_rtx = reg;
3460 }
3461 else if (TARGET_CPU_ZARCH)
3462 {
3463 /* If the GOT offset might be >= 4k, we determine the position
3464 of the GOT entry via a PC-relative LARL (@GOTENT). */
3465
3466 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3467
3468 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3469 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3470
3471 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3472 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3473
3474 if (!TARGET_Z10)
3475 {
3476 emit_move_insn (temp, new_rtx);
3477 new_rtx = gen_const_mem (Pmode, temp);
3478 }
3479 else
3480 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3481 emit_move_insn (reg, new_rtx);
3482 new_rtx = reg;
3483 }
3484 else
3485 {
3486 /* If the GOT offset might be >= 4k, we have to load it
3487 from the literal pool (@GOT). */
3488
3489 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3490
3491 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3492 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3493
3494 if (reload_in_progress || reload_completed)
3495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3496
3497 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3498 addr = gen_rtx_CONST (Pmode, addr);
3499 addr = force_const_mem (Pmode, addr);
3500 emit_move_insn (temp, addr);
3501
3502 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3503 new_rtx = gen_const_mem (Pmode, new_rtx);
3504 emit_move_insn (reg, new_rtx);
3505 new_rtx = reg;
3506 }
3507 }
3508 else
3509 {
3510 if (GET_CODE (addr) == CONST)
3511 {
3512 addr = XEXP (addr, 0);
3513 if (GET_CODE (addr) == UNSPEC)
3514 {
3515 gcc_assert (XVECLEN (addr, 0) == 1);
3516 switch (XINT (addr, 1))
3517 {
3518 /* If someone moved a GOT-relative UNSPEC
3519 out of the literal pool, force them back in. */
3520 case UNSPEC_GOTOFF:
3521 case UNSPEC_PLTOFF:
3522 new_rtx = force_const_mem (Pmode, orig);
3523 break;
3524
3525 /* @GOT is OK as is if small. */
3526 case UNSPEC_GOT:
3527 if (flag_pic == 2)
3528 new_rtx = force_const_mem (Pmode, orig);
3529 break;
3530
3531 /* @GOTENT is OK as is. */
3532 case UNSPEC_GOTENT:
3533 break;
3534
3535 /* @PLT is OK as is on 64-bit, must be converted to
3536 GOT-relative @PLTOFF on 31-bit. */
3537 case UNSPEC_PLT:
3538 if (!TARGET_CPU_ZARCH)
3539 {
3540 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3541
3542 if (reload_in_progress || reload_completed)
3543 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3544
3545 addr = XVECEXP (addr, 0, 0);
3546 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3547 UNSPEC_PLTOFF);
3548 addr = gen_rtx_CONST (Pmode, addr);
3549 addr = force_const_mem (Pmode, addr);
3550 emit_move_insn (temp, addr);
3551
3552 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3553 if (reg != 0)
3554 {
3555 s390_load_address (reg, new_rtx);
3556 new_rtx = reg;
3557 }
3558 }
3559 break;
3560
3561 /* Everything else cannot happen. */
3562 default:
3563 gcc_unreachable ();
3564 }
3565 }
3566 else
3567 gcc_assert (GET_CODE (addr) == PLUS);
3568 }
3569 if (GET_CODE (addr) == PLUS)
3570 {
3571 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3572
3573 gcc_assert (!TLS_SYMBOLIC_CONST (op0));
3574 gcc_assert (!TLS_SYMBOLIC_CONST (op1));
3575
3576 /* Check first to see if this is a constant offset
3577 from a local symbol reference. */
3578 if ((GET_CODE (op0) == LABEL_REF
3579 || (GET_CODE (op0) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (op0)))
3580 && GET_CODE (op1) == CONST_INT)
3581 {
3582 if (TARGET_CPU_ZARCH
3583 && larl_operand (op0, VOIDmode)
3584 && INTVAL (op1) < (HOST_WIDE_INT)1 << 31
3585 && INTVAL (op1) >= -((HOST_WIDE_INT)1 << 31))
3586 {
3587 if (INTVAL (op1) & 1)
3588 {
3589 /* LARL can't handle odd offsets, so emit a
3590 pair of LARL and LA. */
3591 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3592
3593 if (!DISP_IN_RANGE (INTVAL (op1)))
3594 {
3595 HOST_WIDE_INT even = INTVAL (op1) - 1;
3596 op0 = gen_rtx_PLUS (Pmode, op0, GEN_INT (even));
3597 op0 = gen_rtx_CONST (Pmode, op0);
3598 op1 = const1_rtx;
3599 }
3600
3601 emit_move_insn (temp, op0);
3602 new_rtx = gen_rtx_PLUS (Pmode, temp, op1);
3603
3604 if (reg != 0)
3605 {
3606 s390_load_address (reg, new_rtx);
3607 new_rtx = reg;
3608 }
3609 }
3610 else
3611 {
3612 /* If the offset is even, we can just use LARL.
3613 This will happen automatically. */
3614 }
3615 }
3616 else
3617 {
3618 /* Access local symbols relative to the GOT. */
3619
3620 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3621
3622 if (reload_in_progress || reload_completed)
3623 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3624
3625 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
3626 UNSPEC_GOTOFF);
3627 addr = gen_rtx_PLUS (Pmode, addr, op1);
3628 addr = gen_rtx_CONST (Pmode, addr);
3629 addr = force_const_mem (Pmode, addr);
3630 emit_move_insn (temp, addr);
3631
3632 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3633 if (reg != 0)
3634 {
3635 s390_load_address (reg, new_rtx);
3636 new_rtx = reg;
3637 }
3638 }
3639 }
3640
3641 /* Now, check whether it is a GOT relative symbol plus offset
3642 that was pulled out of the literal pool. Force it back in. */
3643
3644 else if (GET_CODE (op0) == UNSPEC
3645 && GET_CODE (op1) == CONST_INT
3646 && XINT (op0, 1) == UNSPEC_GOTOFF)
3647 {
3648 gcc_assert (XVECLEN (op0, 0) == 1);
3649
3650 new_rtx = force_const_mem (Pmode, orig);
3651 }
3652
3653 /* Otherwise, compute the sum. */
3654 else
3655 {
3656 base = legitimize_pic_address (XEXP (addr, 0), reg);
3657 new_rtx = legitimize_pic_address (XEXP (addr, 1),
3658 base == reg ? NULL_RTX : reg);
3659 if (GET_CODE (new_rtx) == CONST_INT)
3660 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3661 else
3662 {
3663 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3664 {
3665 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3666 new_rtx = XEXP (new_rtx, 1);
3667 }
3668 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3669 }
3670
3671 if (GET_CODE (new_rtx) == CONST)
3672 new_rtx = XEXP (new_rtx, 0);
3673 new_rtx = force_operand (new_rtx, 0);
3674 }
3675 }
3676 }
3677 return new_rtx;
3678 }
3679
3680 /* Load the thread pointer into a register. */
3681
3682 rtx
3683 s390_get_thread_pointer (void)
3684 {
3685 rtx tp = gen_reg_rtx (Pmode);
3686
3687 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3688 mark_reg_pointer (tp, BITS_PER_WORD);
3689
3690 return tp;
3691 }
3692
3693 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3694 in s390_tls_symbol which always refers to __tls_get_offset.
3695 The returned offset is written to RESULT_REG and an USE rtx is
3696 generated for TLS_CALL. */
3697
3698 static GTY(()) rtx s390_tls_symbol;
3699
3700 static void
3701 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3702 {
3703 rtx insn;
3704
3705 if (!flag_pic)
3706 emit_insn (s390_load_got ());
3707
3708 if (!s390_tls_symbol)
3709 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3710
3711 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3712 gen_rtx_REG (Pmode, RETURN_REGNUM));
3713
3714 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3715 RTL_CONST_CALL_P (insn) = 1;
3716 }
3717
3718 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3719 this (thread-local) address. REG may be used as temporary. */
3720
3721 static rtx
3722 legitimize_tls_address (rtx addr, rtx reg)
3723 {
3724 rtx new_rtx, tls_call, temp, base, r2, insn;
3725
3726 if (GET_CODE (addr) == SYMBOL_REF)
3727 switch (tls_symbolic_operand (addr))
3728 {
3729 case TLS_MODEL_GLOBAL_DYNAMIC:
3730 start_sequence ();
3731 r2 = gen_rtx_REG (Pmode, 2);
3732 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3733 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3734 new_rtx = force_const_mem (Pmode, new_rtx);
3735 emit_move_insn (r2, new_rtx);
3736 s390_emit_tls_call_insn (r2, tls_call);
3737 insn = get_insns ();
3738 end_sequence ();
3739
3740 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3741 temp = gen_reg_rtx (Pmode);
3742 emit_libcall_block (insn, temp, r2, new_rtx);
3743
3744 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3745 if (reg != 0)
3746 {
3747 s390_load_address (reg, new_rtx);
3748 new_rtx = reg;
3749 }
3750 break;
3751
3752 case TLS_MODEL_LOCAL_DYNAMIC:
3753 start_sequence ();
3754 r2 = gen_rtx_REG (Pmode, 2);
3755 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3756 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3757 new_rtx = force_const_mem (Pmode, new_rtx);
3758 emit_move_insn (r2, new_rtx);
3759 s390_emit_tls_call_insn (r2, tls_call);
3760 insn = get_insns ();
3761 end_sequence ();
3762
3763 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3764 temp = gen_reg_rtx (Pmode);
3765 emit_libcall_block (insn, temp, r2, new_rtx);
3766
3767 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3768 base = gen_reg_rtx (Pmode);
3769 s390_load_address (base, new_rtx);
3770
3771 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3772 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3773 new_rtx = force_const_mem (Pmode, new_rtx);
3774 temp = gen_reg_rtx (Pmode);
3775 emit_move_insn (temp, new_rtx);
3776
3777 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3778 if (reg != 0)
3779 {
3780 s390_load_address (reg, new_rtx);
3781 new_rtx = reg;
3782 }
3783 break;
3784
3785 case TLS_MODEL_INITIAL_EXEC:
3786 if (flag_pic == 1)
3787 {
3788 /* Assume GOT offset < 4k. This is handled the same way
3789 in both 31- and 64-bit code. */
3790
3791 if (reload_in_progress || reload_completed)
3792 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3793
3794 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3795 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3796 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3797 new_rtx = gen_const_mem (Pmode, new_rtx);
3798 temp = gen_reg_rtx (Pmode);
3799 emit_move_insn (temp, new_rtx);
3800 }
3801 else if (TARGET_CPU_ZARCH)
3802 {
3803 /* If the GOT offset might be >= 4k, we determine the position
3804 of the GOT entry via a PC-relative LARL. */
3805
3806 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3807 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3808 temp = gen_reg_rtx (Pmode);
3809 emit_move_insn (temp, new_rtx);
3810
3811 new_rtx = gen_const_mem (Pmode, temp);
3812 temp = gen_reg_rtx (Pmode);
3813 emit_move_insn (temp, new_rtx);
3814 }
3815 else if (flag_pic)
3816 {
3817 /* If the GOT offset might be >= 4k, we have to load it
3818 from the literal pool. */
3819
3820 if (reload_in_progress || reload_completed)
3821 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3822
3823 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3824 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3825 new_rtx = force_const_mem (Pmode, new_rtx);
3826 temp = gen_reg_rtx (Pmode);
3827 emit_move_insn (temp, new_rtx);
3828
3829 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3830 new_rtx = gen_const_mem (Pmode, new_rtx);
3831
3832 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3833 temp = gen_reg_rtx (Pmode);
3834 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3835 }
3836 else
3837 {
3838 /* In position-dependent code, load the absolute address of
3839 the GOT entry from the literal pool. */
3840
3841 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3842 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3843 new_rtx = force_const_mem (Pmode, new_rtx);
3844 temp = gen_reg_rtx (Pmode);
3845 emit_move_insn (temp, new_rtx);
3846
3847 new_rtx = temp;
3848 new_rtx = gen_const_mem (Pmode, new_rtx);
3849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3850 temp = gen_reg_rtx (Pmode);
3851 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3852 }
3853
3854 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3855 if (reg != 0)
3856 {
3857 s390_load_address (reg, new_rtx);
3858 new_rtx = reg;
3859 }
3860 break;
3861
3862 case TLS_MODEL_LOCAL_EXEC:
3863 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3864 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3865 new_rtx = force_const_mem (Pmode, new_rtx);
3866 temp = gen_reg_rtx (Pmode);
3867 emit_move_insn (temp, new_rtx);
3868
3869 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3870 if (reg != 0)
3871 {
3872 s390_load_address (reg, new_rtx);
3873 new_rtx = reg;
3874 }
3875 break;
3876
3877 default:
3878 gcc_unreachable ();
3879 }
3880
3881 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3882 {
3883 switch (XINT (XEXP (addr, 0), 1))
3884 {
3885 case UNSPEC_INDNTPOFF:
3886 gcc_assert (TARGET_CPU_ZARCH);
3887 new_rtx = addr;
3888 break;
3889
3890 default:
3891 gcc_unreachable ();
3892 }
3893 }
3894
3895 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3896 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3897 {
3898 new_rtx = XEXP (XEXP (addr, 0), 0);
3899 if (GET_CODE (new_rtx) != SYMBOL_REF)
3900 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3901
3902 new_rtx = legitimize_tls_address (new_rtx, reg);
3903 new_rtx = plus_constant (Pmode, new_rtx,
3904 INTVAL (XEXP (XEXP (addr, 0), 1)));
3905 new_rtx = force_operand (new_rtx, 0);
3906 }
3907
3908 else
3909 gcc_unreachable (); /* for now ... */
3910
3911 return new_rtx;
3912 }
3913
3914 /* Emit insns making the address in operands[1] valid for a standard
3915 move to operands[0]. operands[1] is replaced by an address which
3916 should be used instead of the former RTX to emit the move
3917 pattern. */
3918
3919 void
3920 emit_symbolic_move (rtx *operands)
3921 {
3922 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3923
3924 if (GET_CODE (operands[0]) == MEM)
3925 operands[1] = force_reg (Pmode, operands[1]);
3926 else if (TLS_SYMBOLIC_CONST (operands[1]))
3927 operands[1] = legitimize_tls_address (operands[1], temp);
3928 else if (flag_pic)
3929 operands[1] = legitimize_pic_address (operands[1], temp);
3930 }
3931
3932 /* Try machine-dependent ways of modifying an illegitimate address X
3933 to be legitimate. If we find one, return the new, valid address.
3934
3935 OLDX is the address as it was before break_out_memory_refs was called.
3936 In some cases it is useful to look at this to decide what needs to be done.
3937
3938 MODE is the mode of the operand pointed to by X.
3939
3940 When -fpic is used, special handling is needed for symbolic references.
3941 See comments by legitimize_pic_address for details. */
3942
3943 static rtx
3944 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3945 enum machine_mode mode ATTRIBUTE_UNUSED)
3946 {
3947 rtx constant_term = const0_rtx;
3948
3949 if (TLS_SYMBOLIC_CONST (x))
3950 {
3951 x = legitimize_tls_address (x, 0);
3952
3953 if (s390_legitimate_address_p (mode, x, FALSE))
3954 return x;
3955 }
3956 else if (GET_CODE (x) == PLUS
3957 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3958 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3959 {
3960 return x;
3961 }
3962 else if (flag_pic)
3963 {
3964 if (SYMBOLIC_CONST (x)
3965 || (GET_CODE (x) == PLUS
3966 && (SYMBOLIC_CONST (XEXP (x, 0))
3967 || SYMBOLIC_CONST (XEXP (x, 1)))))
3968 x = legitimize_pic_address (x, 0);
3969
3970 if (s390_legitimate_address_p (mode, x, FALSE))
3971 return x;
3972 }
3973
3974 x = eliminate_constant_term (x, &constant_term);
3975
3976 /* Optimize loading of large displacements by splitting them
3977 into the multiple of 4K and the rest; this allows the
3978 former to be CSE'd if possible.
3979
3980 Don't do this if the displacement is added to a register
3981 pointing into the stack frame, as the offsets will
3982 change later anyway. */
3983
3984 if (GET_CODE (constant_term) == CONST_INT
3985 && !TARGET_LONG_DISPLACEMENT
3986 && !DISP_IN_RANGE (INTVAL (constant_term))
3987 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
3988 {
3989 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
3990 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
3991
3992 rtx temp = gen_reg_rtx (Pmode);
3993 rtx val = force_operand (GEN_INT (upper), temp);
3994 if (val != temp)
3995 emit_move_insn (temp, val);
3996
3997 x = gen_rtx_PLUS (Pmode, x, temp);
3998 constant_term = GEN_INT (lower);
3999 }
4000
4001 if (GET_CODE (x) == PLUS)
4002 {
4003 if (GET_CODE (XEXP (x, 0)) == REG)
4004 {
4005 rtx temp = gen_reg_rtx (Pmode);
4006 rtx val = force_operand (XEXP (x, 1), temp);
4007 if (val != temp)
4008 emit_move_insn (temp, val);
4009
4010 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4011 }
4012
4013 else if (GET_CODE (XEXP (x, 1)) == REG)
4014 {
4015 rtx temp = gen_reg_rtx (Pmode);
4016 rtx val = force_operand (XEXP (x, 0), temp);
4017 if (val != temp)
4018 emit_move_insn (temp, val);
4019
4020 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4021 }
4022 }
4023
4024 if (constant_term != const0_rtx)
4025 x = gen_rtx_PLUS (Pmode, x, constant_term);
4026
4027 return x;
4028 }
4029
4030 /* Try a machine-dependent way of reloading an illegitimate address AD
4031 operand. If we find one, push the reload and return the new address.
4032
4033 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4034 and TYPE is the reload type of the current reload. */
4035
4036 rtx
4037 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4038 int opnum, int type)
4039 {
4040 if (!optimize || TARGET_LONG_DISPLACEMENT)
4041 return NULL_RTX;
4042
4043 if (GET_CODE (ad) == PLUS)
4044 {
4045 rtx tem = simplify_binary_operation (PLUS, Pmode,
4046 XEXP (ad, 0), XEXP (ad, 1));
4047 if (tem)
4048 ad = tem;
4049 }
4050
4051 if (GET_CODE (ad) == PLUS
4052 && GET_CODE (XEXP (ad, 0)) == REG
4053 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4054 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4055 {
4056 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4057 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4058 rtx cst, tem, new_rtx;
4059
4060 cst = GEN_INT (upper);
4061 if (!legitimate_reload_constant_p (cst))
4062 cst = force_const_mem (Pmode, cst);
4063
4064 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4065 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4066
4067 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4068 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4069 opnum, (enum reload_type) type);
4070 return new_rtx;
4071 }
4072
4073 return NULL_RTX;
4074 }
4075
4076 /* Emit code to move LEN bytes from DST to SRC. */
4077
4078 bool
4079 s390_expand_movmem (rtx dst, rtx src, rtx len)
4080 {
4081 /* When tuning for z10 or higher we rely on the Glibc functions to
4082 do the right thing. Only for constant lengths below 64k we will
4083 generate inline code. */
4084 if (s390_tune >= PROCESSOR_2097_Z10
4085 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4086 return false;
4087
4088 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4089 {
4090 if (INTVAL (len) > 0)
4091 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4092 }
4093
4094 else if (TARGET_MVCLE)
4095 {
4096 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4097 }
4098
4099 else
4100 {
4101 rtx dst_addr, src_addr, count, blocks, temp;
4102 rtx loop_start_label = gen_label_rtx ();
4103 rtx loop_end_label = gen_label_rtx ();
4104 rtx end_label = gen_label_rtx ();
4105 enum machine_mode mode;
4106
4107 mode = GET_MODE (len);
4108 if (mode == VOIDmode)
4109 mode = Pmode;
4110
4111 dst_addr = gen_reg_rtx (Pmode);
4112 src_addr = gen_reg_rtx (Pmode);
4113 count = gen_reg_rtx (mode);
4114 blocks = gen_reg_rtx (mode);
4115
4116 convert_move (count, len, 1);
4117 emit_cmp_and_jump_insns (count, const0_rtx,
4118 EQ, NULL_RTX, mode, 1, end_label);
4119
4120 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4121 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4122 dst = change_address (dst, VOIDmode, dst_addr);
4123 src = change_address (src, VOIDmode, src_addr);
4124
4125 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4126 OPTAB_DIRECT);
4127 if (temp != count)
4128 emit_move_insn (count, temp);
4129
4130 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4131 OPTAB_DIRECT);
4132 if (temp != blocks)
4133 emit_move_insn (blocks, temp);
4134
4135 emit_cmp_and_jump_insns (blocks, const0_rtx,
4136 EQ, NULL_RTX, mode, 1, loop_end_label);
4137
4138 emit_label (loop_start_label);
4139
4140 if (TARGET_Z10
4141 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4142 {
4143 rtx prefetch;
4144
4145 /* Issue a read prefetch for the +3 cache line. */
4146 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4147 const0_rtx, const0_rtx);
4148 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4149 emit_insn (prefetch);
4150
4151 /* Issue a write prefetch for the +3 cache line. */
4152 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4153 const1_rtx, const0_rtx);
4154 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4155 emit_insn (prefetch);
4156 }
4157
4158 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4159 s390_load_address (dst_addr,
4160 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4161 s390_load_address (src_addr,
4162 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4163
4164 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4165 OPTAB_DIRECT);
4166 if (temp != blocks)
4167 emit_move_insn (blocks, temp);
4168
4169 emit_cmp_and_jump_insns (blocks, const0_rtx,
4170 EQ, NULL_RTX, mode, 1, loop_end_label);
4171
4172 emit_jump (loop_start_label);
4173 emit_label (loop_end_label);
4174
4175 emit_insn (gen_movmem_short (dst, src,
4176 convert_to_mode (Pmode, count, 1)));
4177 emit_label (end_label);
4178 }
4179 return true;
4180 }
4181
4182 /* Emit code to set LEN bytes at DST to VAL.
4183 Make use of clrmem if VAL is zero. */
4184
4185 void
4186 s390_expand_setmem (rtx dst, rtx len, rtx val)
4187 {
4188 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4189 return;
4190
4191 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4192
4193 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4194 {
4195 if (val == const0_rtx && INTVAL (len) <= 256)
4196 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4197 else
4198 {
4199 /* Initialize memory by storing the first byte. */
4200 emit_move_insn (adjust_address (dst, QImode, 0), val);
4201
4202 if (INTVAL (len) > 1)
4203 {
4204 /* Initiate 1 byte overlap move.
4205 The first byte of DST is propagated through DSTP1.
4206 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4207 DST is set to size 1 so the rest of the memory location
4208 does not count as source operand. */
4209 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4210 set_mem_size (dst, 1);
4211
4212 emit_insn (gen_movmem_short (dstp1, dst,
4213 GEN_INT (INTVAL (len) - 2)));
4214 }
4215 }
4216 }
4217
4218 else if (TARGET_MVCLE)
4219 {
4220 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4221 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4222 }
4223
4224 else
4225 {
4226 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4227 rtx loop_start_label = gen_label_rtx ();
4228 rtx loop_end_label = gen_label_rtx ();
4229 rtx end_label = gen_label_rtx ();
4230 enum machine_mode mode;
4231
4232 mode = GET_MODE (len);
4233 if (mode == VOIDmode)
4234 mode = Pmode;
4235
4236 dst_addr = gen_reg_rtx (Pmode);
4237 count = gen_reg_rtx (mode);
4238 blocks = gen_reg_rtx (mode);
4239
4240 convert_move (count, len, 1);
4241 emit_cmp_and_jump_insns (count, const0_rtx,
4242 EQ, NULL_RTX, mode, 1, end_label);
4243
4244 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4245 dst = change_address (dst, VOIDmode, dst_addr);
4246
4247 if (val == const0_rtx)
4248 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4249 OPTAB_DIRECT);
4250 else
4251 {
4252 dstp1 = adjust_address (dst, VOIDmode, 1);
4253 set_mem_size (dst, 1);
4254
4255 /* Initialize memory by storing the first byte. */
4256 emit_move_insn (adjust_address (dst, QImode, 0), val);
4257
4258 /* If count is 1 we are done. */
4259 emit_cmp_and_jump_insns (count, const1_rtx,
4260 EQ, NULL_RTX, mode, 1, end_label);
4261
4262 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4263 OPTAB_DIRECT);
4264 }
4265 if (temp != count)
4266 emit_move_insn (count, temp);
4267
4268 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4269 OPTAB_DIRECT);
4270 if (temp != blocks)
4271 emit_move_insn (blocks, temp);
4272
4273 emit_cmp_and_jump_insns (blocks, const0_rtx,
4274 EQ, NULL_RTX, mode, 1, loop_end_label);
4275
4276 emit_label (loop_start_label);
4277
4278 if (TARGET_Z10
4279 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4280 {
4281 /* Issue a write prefetch for the +4 cache line. */
4282 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4283 GEN_INT (1024)),
4284 const1_rtx, const0_rtx);
4285 emit_insn (prefetch);
4286 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4287 }
4288
4289 if (val == const0_rtx)
4290 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4291 else
4292 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4293 s390_load_address (dst_addr,
4294 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4295
4296 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4297 OPTAB_DIRECT);
4298 if (temp != blocks)
4299 emit_move_insn (blocks, temp);
4300
4301 emit_cmp_and_jump_insns (blocks, const0_rtx,
4302 EQ, NULL_RTX, mode, 1, loop_end_label);
4303
4304 emit_jump (loop_start_label);
4305 emit_label (loop_end_label);
4306
4307 if (val == const0_rtx)
4308 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4309 else
4310 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4311 emit_label (end_label);
4312 }
4313 }
4314
4315 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4316 and return the result in TARGET. */
4317
4318 bool
4319 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4320 {
4321 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4322 rtx tmp;
4323
4324 /* When tuning for z10 or higher we rely on the Glibc functions to
4325 do the right thing. Only for constant lengths below 64k we will
4326 generate inline code. */
4327 if (s390_tune >= PROCESSOR_2097_Z10
4328 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4329 return false;
4330
4331 /* As the result of CMPINT is inverted compared to what we need,
4332 we have to swap the operands. */
4333 tmp = op0; op0 = op1; op1 = tmp;
4334
4335 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4336 {
4337 if (INTVAL (len) > 0)
4338 {
4339 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4340 emit_insn (gen_cmpint (target, ccreg));
4341 }
4342 else
4343 emit_move_insn (target, const0_rtx);
4344 }
4345 else if (TARGET_MVCLE)
4346 {
4347 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4348 emit_insn (gen_cmpint (target, ccreg));
4349 }
4350 else
4351 {
4352 rtx addr0, addr1, count, blocks, temp;
4353 rtx loop_start_label = gen_label_rtx ();
4354 rtx loop_end_label = gen_label_rtx ();
4355 rtx end_label = gen_label_rtx ();
4356 enum machine_mode mode;
4357
4358 mode = GET_MODE (len);
4359 if (mode == VOIDmode)
4360 mode = Pmode;
4361
4362 addr0 = gen_reg_rtx (Pmode);
4363 addr1 = gen_reg_rtx (Pmode);
4364 count = gen_reg_rtx (mode);
4365 blocks = gen_reg_rtx (mode);
4366
4367 convert_move (count, len, 1);
4368 emit_cmp_and_jump_insns (count, const0_rtx,
4369 EQ, NULL_RTX, mode, 1, end_label);
4370
4371 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4372 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4373 op0 = change_address (op0, VOIDmode, addr0);
4374 op1 = change_address (op1, VOIDmode, addr1);
4375
4376 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4377 OPTAB_DIRECT);
4378 if (temp != count)
4379 emit_move_insn (count, temp);
4380
4381 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4382 OPTAB_DIRECT);
4383 if (temp != blocks)
4384 emit_move_insn (blocks, temp);
4385
4386 emit_cmp_and_jump_insns (blocks, const0_rtx,
4387 EQ, NULL_RTX, mode, 1, loop_end_label);
4388
4389 emit_label (loop_start_label);
4390
4391 if (TARGET_Z10
4392 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4393 {
4394 rtx prefetch;
4395
4396 /* Issue a read prefetch for the +2 cache line of operand 1. */
4397 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4398 const0_rtx, const0_rtx);
4399 emit_insn (prefetch);
4400 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4401
4402 /* Issue a read prefetch for the +2 cache line of operand 2. */
4403 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4404 const0_rtx, const0_rtx);
4405 emit_insn (prefetch);
4406 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4407 }
4408
4409 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4410 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4411 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4412 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4413 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4414 emit_jump_insn (temp);
4415
4416 s390_load_address (addr0,
4417 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4418 s390_load_address (addr1,
4419 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4420
4421 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4422 OPTAB_DIRECT);
4423 if (temp != blocks)
4424 emit_move_insn (blocks, temp);
4425
4426 emit_cmp_and_jump_insns (blocks, const0_rtx,
4427 EQ, NULL_RTX, mode, 1, loop_end_label);
4428
4429 emit_jump (loop_start_label);
4430 emit_label (loop_end_label);
4431
4432 emit_insn (gen_cmpmem_short (op0, op1,
4433 convert_to_mode (Pmode, count, 1)));
4434 emit_label (end_label);
4435
4436 emit_insn (gen_cmpint (target, ccreg));
4437 }
4438 return true;
4439 }
4440
4441
4442 /* Expand conditional increment or decrement using alc/slb instructions.
4443 Should generate code setting DST to either SRC or SRC + INCREMENT,
4444 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4445 Returns true if successful, false otherwise.
4446
4447 That makes it possible to implement some if-constructs without jumps e.g.:
4448 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4449 unsigned int a, b, c;
4450 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4451 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4452 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4453 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4454
4455 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4456 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4457 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4458 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4459 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4460
4461 bool
4462 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4463 rtx dst, rtx src, rtx increment)
4464 {
4465 enum machine_mode cmp_mode;
4466 enum machine_mode cc_mode;
4467 rtx op_res;
4468 rtx insn;
4469 rtvec p;
4470 int ret;
4471
4472 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4473 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4474 cmp_mode = SImode;
4475 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4476 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4477 cmp_mode = DImode;
4478 else
4479 return false;
4480
4481 /* Try ADD LOGICAL WITH CARRY. */
4482 if (increment == const1_rtx)
4483 {
4484 /* Determine CC mode to use. */
4485 if (cmp_code == EQ || cmp_code == NE)
4486 {
4487 if (cmp_op1 != const0_rtx)
4488 {
4489 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4490 NULL_RTX, 0, OPTAB_WIDEN);
4491 cmp_op1 = const0_rtx;
4492 }
4493
4494 cmp_code = cmp_code == EQ ? LEU : GTU;
4495 }
4496
4497 if (cmp_code == LTU || cmp_code == LEU)
4498 {
4499 rtx tem = cmp_op0;
4500 cmp_op0 = cmp_op1;
4501 cmp_op1 = tem;
4502 cmp_code = swap_condition (cmp_code);
4503 }
4504
4505 switch (cmp_code)
4506 {
4507 case GTU:
4508 cc_mode = CCUmode;
4509 break;
4510
4511 case GEU:
4512 cc_mode = CCL3mode;
4513 break;
4514
4515 default:
4516 return false;
4517 }
4518
4519 /* Emit comparison instruction pattern. */
4520 if (!register_operand (cmp_op0, cmp_mode))
4521 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4522
4523 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4524 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4525 /* We use insn_invalid_p here to add clobbers if required. */
4526 ret = insn_invalid_p (emit_insn (insn), false);
4527 gcc_assert (!ret);
4528
4529 /* Emit ALC instruction pattern. */
4530 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4531 gen_rtx_REG (cc_mode, CC_REGNUM),
4532 const0_rtx);
4533
4534 if (src != const0_rtx)
4535 {
4536 if (!register_operand (src, GET_MODE (dst)))
4537 src = force_reg (GET_MODE (dst), src);
4538
4539 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4540 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4541 }
4542
4543 p = rtvec_alloc (2);
4544 RTVEC_ELT (p, 0) =
4545 gen_rtx_SET (VOIDmode, dst, op_res);
4546 RTVEC_ELT (p, 1) =
4547 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4548 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4549
4550 return true;
4551 }
4552
4553 /* Try SUBTRACT LOGICAL WITH BORROW. */
4554 if (increment == constm1_rtx)
4555 {
4556 /* Determine CC mode to use. */
4557 if (cmp_code == EQ || cmp_code == NE)
4558 {
4559 if (cmp_op1 != const0_rtx)
4560 {
4561 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4562 NULL_RTX, 0, OPTAB_WIDEN);
4563 cmp_op1 = const0_rtx;
4564 }
4565
4566 cmp_code = cmp_code == EQ ? LEU : GTU;
4567 }
4568
4569 if (cmp_code == GTU || cmp_code == GEU)
4570 {
4571 rtx tem = cmp_op0;
4572 cmp_op0 = cmp_op1;
4573 cmp_op1 = tem;
4574 cmp_code = swap_condition (cmp_code);
4575 }
4576
4577 switch (cmp_code)
4578 {
4579 case LEU:
4580 cc_mode = CCUmode;
4581 break;
4582
4583 case LTU:
4584 cc_mode = CCL3mode;
4585 break;
4586
4587 default:
4588 return false;
4589 }
4590
4591 /* Emit comparison instruction pattern. */
4592 if (!register_operand (cmp_op0, cmp_mode))
4593 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4594
4595 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4596 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4597 /* We use insn_invalid_p here to add clobbers if required. */
4598 ret = insn_invalid_p (emit_insn (insn), false);
4599 gcc_assert (!ret);
4600
4601 /* Emit SLB instruction pattern. */
4602 if (!register_operand (src, GET_MODE (dst)))
4603 src = force_reg (GET_MODE (dst), src);
4604
4605 op_res = gen_rtx_MINUS (GET_MODE (dst),
4606 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4607 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4608 gen_rtx_REG (cc_mode, CC_REGNUM),
4609 const0_rtx));
4610 p = rtvec_alloc (2);
4611 RTVEC_ELT (p, 0) =
4612 gen_rtx_SET (VOIDmode, dst, op_res);
4613 RTVEC_ELT (p, 1) =
4614 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4615 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4616
4617 return true;
4618 }
4619
4620 return false;
4621 }
4622
4623 /* Expand code for the insv template. Return true if successful. */
4624
4625 bool
4626 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4627 {
4628 int bitsize = INTVAL (op1);
4629 int bitpos = INTVAL (op2);
4630 enum machine_mode mode = GET_MODE (dest);
4631 enum machine_mode smode;
4632 int smode_bsize, mode_bsize;
4633 rtx op, clobber;
4634
4635 /* Generate INSERT IMMEDIATE (IILL et al). */
4636 /* (set (ze (reg)) (const_int)). */
4637 if (TARGET_ZARCH
4638 && register_operand (dest, word_mode)
4639 && (bitpos % 16) == 0
4640 && (bitsize % 16) == 0
4641 && const_int_operand (src, VOIDmode))
4642 {
4643 HOST_WIDE_INT val = INTVAL (src);
4644 int regpos = bitpos + bitsize;
4645
4646 while (regpos > bitpos)
4647 {
4648 enum machine_mode putmode;
4649 int putsize;
4650
4651 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4652 putmode = SImode;
4653 else
4654 putmode = HImode;
4655
4656 putsize = GET_MODE_BITSIZE (putmode);
4657 regpos -= putsize;
4658 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4659 GEN_INT (putsize),
4660 GEN_INT (regpos)),
4661 gen_int_mode (val, putmode));
4662 val >>= putsize;
4663 }
4664 gcc_assert (regpos == bitpos);
4665 return true;
4666 }
4667
4668 smode = smallest_mode_for_size (bitsize, MODE_INT);
4669 smode_bsize = GET_MODE_BITSIZE (smode);
4670 mode_bsize = GET_MODE_BITSIZE (mode);
4671
4672 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4673 if (bitpos == 0
4674 && (bitsize % BITS_PER_UNIT) == 0
4675 && MEM_P (dest)
4676 && (register_operand (src, word_mode)
4677 || const_int_operand (src, VOIDmode)))
4678 {
4679 /* Emit standard pattern if possible. */
4680 if (smode_bsize == bitsize)
4681 {
4682 emit_move_insn (adjust_address (dest, smode, 0),
4683 gen_lowpart (smode, src));
4684 return true;
4685 }
4686
4687 /* (set (ze (mem)) (const_int)). */
4688 else if (const_int_operand (src, VOIDmode))
4689 {
4690 int size = bitsize / BITS_PER_UNIT;
4691 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4692 BLKmode,
4693 UNITS_PER_WORD - size);
4694
4695 dest = adjust_address (dest, BLKmode, 0);
4696 set_mem_size (dest, size);
4697 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4698 return true;
4699 }
4700
4701 /* (set (ze (mem)) (reg)). */
4702 else if (register_operand (src, word_mode))
4703 {
4704 if (bitsize <= 32)
4705 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4706 const0_rtx), src);
4707 else
4708 {
4709 /* Emit st,stcmh sequence. */
4710 int stcmh_width = bitsize - 32;
4711 int size = stcmh_width / BITS_PER_UNIT;
4712
4713 emit_move_insn (adjust_address (dest, SImode, size),
4714 gen_lowpart (SImode, src));
4715 set_mem_size (dest, size);
4716 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4717 GEN_INT (stcmh_width),
4718 const0_rtx),
4719 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4720 }
4721 return true;
4722 }
4723 }
4724
4725 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4726 if ((bitpos % BITS_PER_UNIT) == 0
4727 && (bitsize % BITS_PER_UNIT) == 0
4728 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4729 && MEM_P (src)
4730 && (mode == DImode || mode == SImode)
4731 && register_operand (dest, mode))
4732 {
4733 /* Emit a strict_low_part pattern if possible. */
4734 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4735 {
4736 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4737 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4738 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4739 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4740 return true;
4741 }
4742
4743 /* ??? There are more powerful versions of ICM that are not
4744 completely represented in the md file. */
4745 }
4746
4747 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4748 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4749 {
4750 enum machine_mode mode_s = GET_MODE (src);
4751
4752 if (mode_s == VOIDmode)
4753 {
4754 /* Assume const_int etc already in the proper mode. */
4755 src = force_reg (mode, src);
4756 }
4757 else if (mode_s != mode)
4758 {
4759 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4760 src = force_reg (mode_s, src);
4761 src = gen_lowpart (mode, src);
4762 }
4763
4764 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4765 op = gen_rtx_SET (VOIDmode, op, src);
4766
4767 if (!TARGET_ZEC12)
4768 {
4769 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4770 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4771 }
4772 emit_insn (op);
4773
4774 return true;
4775 }
4776
4777 return false;
4778 }
4779
4780 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4781 register that holds VAL of mode MODE shifted by COUNT bits. */
4782
4783 static inline rtx
4784 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4785 {
4786 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4787 NULL_RTX, 1, OPTAB_DIRECT);
4788 return expand_simple_binop (SImode, ASHIFT, val, count,
4789 NULL_RTX, 1, OPTAB_DIRECT);
4790 }
4791
4792 /* Structure to hold the initial parameters for a compare_and_swap operation
4793 in HImode and QImode. */
4794
4795 struct alignment_context
4796 {
4797 rtx memsi; /* SI aligned memory location. */
4798 rtx shift; /* Bit offset with regard to lsb. */
4799 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4800 rtx modemaski; /* ~modemask */
4801 bool aligned; /* True if memory is aligned, false else. */
4802 };
4803
4804 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4805 structure AC for transparent simplifying, if the memory alignment is known
4806 to be at least 32bit. MEM is the memory location for the actual operation
4807 and MODE its mode. */
4808
4809 static void
4810 init_alignment_context (struct alignment_context *ac, rtx mem,
4811 enum machine_mode mode)
4812 {
4813 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4814 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4815
4816 if (ac->aligned)
4817 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4818 else
4819 {
4820 /* Alignment is unknown. */
4821 rtx byteoffset, addr, align;
4822
4823 /* Force the address into a register. */
4824 addr = force_reg (Pmode, XEXP (mem, 0));
4825
4826 /* Align it to SImode. */
4827 align = expand_simple_binop (Pmode, AND, addr,
4828 GEN_INT (-GET_MODE_SIZE (SImode)),
4829 NULL_RTX, 1, OPTAB_DIRECT);
4830 /* Generate MEM. */
4831 ac->memsi = gen_rtx_MEM (SImode, align);
4832 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4833 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4834 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4835
4836 /* Calculate shiftcount. */
4837 byteoffset = expand_simple_binop (Pmode, AND, addr,
4838 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4839 NULL_RTX, 1, OPTAB_DIRECT);
4840 /* As we already have some offset, evaluate the remaining distance. */
4841 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4842 NULL_RTX, 1, OPTAB_DIRECT);
4843 }
4844
4845 /* Shift is the byte count, but we need the bitcount. */
4846 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4847 NULL_RTX, 1, OPTAB_DIRECT);
4848
4849 /* Calculate masks. */
4850 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4851 GEN_INT (GET_MODE_MASK (mode)),
4852 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4853 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4854 NULL_RTX, 1);
4855 }
4856
4857 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4858 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4859 perform the merge in SEQ2. */
4860
4861 static rtx
4862 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4863 enum machine_mode mode, rtx val, rtx ins)
4864 {
4865 rtx tmp;
4866
4867 if (ac->aligned)
4868 {
4869 start_sequence ();
4870 tmp = copy_to_mode_reg (SImode, val);
4871 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4872 const0_rtx, ins))
4873 {
4874 *seq1 = NULL;
4875 *seq2 = get_insns ();
4876 end_sequence ();
4877 return tmp;
4878 }
4879 end_sequence ();
4880 }
4881
4882 /* Failed to use insv. Generate a two part shift and mask. */
4883 start_sequence ();
4884 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4885 *seq1 = get_insns ();
4886 end_sequence ();
4887
4888 start_sequence ();
4889 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4890 *seq2 = get_insns ();
4891 end_sequence ();
4892
4893 return tmp;
4894 }
4895
4896 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4897 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4898 value to set if CMP == MEM. */
4899
4900 void
4901 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4902 rtx cmp, rtx new_rtx, bool is_weak)
4903 {
4904 struct alignment_context ac;
4905 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4906 rtx res = gen_reg_rtx (SImode);
4907 rtx csloop = NULL, csend = NULL;
4908
4909 gcc_assert (MEM_P (mem));
4910
4911 init_alignment_context (&ac, mem, mode);
4912
4913 /* Load full word. Subsequent loads are performed by CS. */
4914 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4915 NULL_RTX, 1, OPTAB_DIRECT);
4916
4917 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4918 possible, we try to use insv to make this happen efficiently. If
4919 that fails we'll generate code both inside and outside the loop. */
4920 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4921 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4922
4923 if (seq0)
4924 emit_insn (seq0);
4925 if (seq1)
4926 emit_insn (seq1);
4927
4928 /* Start CS loop. */
4929 if (!is_weak)
4930 {
4931 /* Begin assuming success. */
4932 emit_move_insn (btarget, const1_rtx);
4933
4934 csloop = gen_label_rtx ();
4935 csend = gen_label_rtx ();
4936 emit_label (csloop);
4937 }
4938
4939 /* val = "<mem>00..0<mem>"
4940 * cmp = "00..0<cmp>00..0"
4941 * new = "00..0<new>00..0"
4942 */
4943
4944 emit_insn (seq2);
4945 emit_insn (seq3);
4946
4947 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4948 if (is_weak)
4949 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4950 else
4951 {
4952 rtx tmp;
4953
4954 /* Jump to end if we're done (likely?). */
4955 s390_emit_jump (csend, cc);
4956
4957 /* Check for changes outside mode, and loop internal if so.
4958 Arrange the moves so that the compare is adjacent to the
4959 branch so that we can generate CRJ. */
4960 tmp = copy_to_reg (val);
4961 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4962 1, OPTAB_DIRECT);
4963 cc = s390_emit_compare (NE, val, tmp);
4964 s390_emit_jump (csloop, cc);
4965
4966 /* Failed. */
4967 emit_move_insn (btarget, const0_rtx);
4968 emit_label (csend);
4969 }
4970
4971 /* Return the correct part of the bitfield. */
4972 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4973 NULL_RTX, 1, OPTAB_DIRECT), 1);
4974 }
4975
4976 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4977 and VAL the value to play with. If AFTER is true then store the value
4978 MEM holds after the operation, if AFTER is false then store the value MEM
4979 holds before the operation. If TARGET is zero then discard that value, else
4980 store it to TARGET. */
4981
4982 void
4983 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
4984 rtx target, rtx mem, rtx val, bool after)
4985 {
4986 struct alignment_context ac;
4987 rtx cmp;
4988 rtx new_rtx = gen_reg_rtx (SImode);
4989 rtx orig = gen_reg_rtx (SImode);
4990 rtx csloop = gen_label_rtx ();
4991
4992 gcc_assert (!target || register_operand (target, VOIDmode));
4993 gcc_assert (MEM_P (mem));
4994
4995 init_alignment_context (&ac, mem, mode);
4996
4997 /* Shift val to the correct bit positions.
4998 Preserve "icm", but prevent "ex icm". */
4999 if (!(ac.aligned && code == SET && MEM_P (val)))
5000 val = s390_expand_mask_and_shift (val, mode, ac.shift);
5001
5002 /* Further preparation insns. */
5003 if (code == PLUS || code == MINUS)
5004 emit_move_insn (orig, val);
5005 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
5006 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
5007 NULL_RTX, 1, OPTAB_DIRECT);
5008
5009 /* Load full word. Subsequent loads are performed by CS. */
5010 cmp = force_reg (SImode, ac.memsi);
5011
5012 /* Start CS loop. */
5013 emit_label (csloop);
5014 emit_move_insn (new_rtx, cmp);
5015
5016 /* Patch new with val at correct position. */
5017 switch (code)
5018 {
5019 case PLUS:
5020 case MINUS:
5021 val = expand_simple_binop (SImode, code, new_rtx, orig,
5022 NULL_RTX, 1, OPTAB_DIRECT);
5023 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5024 NULL_RTX, 1, OPTAB_DIRECT);
5025 /* FALLTHRU */
5026 case SET:
5027 if (ac.aligned && MEM_P (val))
5028 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5029 0, 0, SImode, val);
5030 else
5031 {
5032 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5033 NULL_RTX, 1, OPTAB_DIRECT);
5034 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5035 NULL_RTX, 1, OPTAB_DIRECT);
5036 }
5037 break;
5038 case AND:
5039 case IOR:
5040 case XOR:
5041 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5042 NULL_RTX, 1, OPTAB_DIRECT);
5043 break;
5044 case MULT: /* NAND */
5045 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5046 NULL_RTX, 1, OPTAB_DIRECT);
5047 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5048 NULL_RTX, 1, OPTAB_DIRECT);
5049 break;
5050 default:
5051 gcc_unreachable ();
5052 }
5053
5054 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5055 ac.memsi, cmp, new_rtx));
5056
5057 /* Return the correct part of the bitfield. */
5058 if (target)
5059 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5060 after ? new_rtx : cmp, ac.shift,
5061 NULL_RTX, 1, OPTAB_DIRECT), 1);
5062 }
5063
5064 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5065 We need to emit DTP-relative relocations. */
5066
5067 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5068
5069 static void
5070 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5071 {
5072 switch (size)
5073 {
5074 case 4:
5075 fputs ("\t.long\t", file);
5076 break;
5077 case 8:
5078 fputs ("\t.quad\t", file);
5079 break;
5080 default:
5081 gcc_unreachable ();
5082 }
5083 output_addr_const (file, x);
5084 fputs ("@DTPOFF", file);
5085 }
5086
5087 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5088 /* Implement TARGET_MANGLE_TYPE. */
5089
5090 static const char *
5091 s390_mangle_type (const_tree type)
5092 {
5093 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5094 && TARGET_LONG_DOUBLE_128)
5095 return "g";
5096
5097 /* For all other types, use normal C++ mangling. */
5098 return NULL;
5099 }
5100 #endif
5101
5102 /* In the name of slightly smaller debug output, and to cater to
5103 general assembler lossage, recognize various UNSPEC sequences
5104 and turn them back into a direct symbol reference. */
5105
5106 static rtx
5107 s390_delegitimize_address (rtx orig_x)
5108 {
5109 rtx x, y;
5110
5111 orig_x = delegitimize_mem_from_attrs (orig_x);
5112 x = orig_x;
5113
5114 /* Extract the symbol ref from:
5115 (plus:SI (reg:SI 12 %r12)
5116 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5117 UNSPEC_GOTOFF/PLTOFF)))
5118 and
5119 (plus:SI (reg:SI 12 %r12)
5120 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5121 UNSPEC_GOTOFF/PLTOFF)
5122 (const_int 4 [0x4])))) */
5123 if (GET_CODE (x) == PLUS
5124 && REG_P (XEXP (x, 0))
5125 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5126 && GET_CODE (XEXP (x, 1)) == CONST)
5127 {
5128 HOST_WIDE_INT offset = 0;
5129
5130 /* The const operand. */
5131 y = XEXP (XEXP (x, 1), 0);
5132
5133 if (GET_CODE (y) == PLUS
5134 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5135 {
5136 offset = INTVAL (XEXP (y, 1));
5137 y = XEXP (y, 0);
5138 }
5139
5140 if (GET_CODE (y) == UNSPEC
5141 && (XINT (y, 1) == UNSPEC_GOTOFF
5142 || XINT (y, 1) == UNSPEC_PLTOFF))
5143 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5144 }
5145
5146 if (GET_CODE (x) != MEM)
5147 return orig_x;
5148
5149 x = XEXP (x, 0);
5150 if (GET_CODE (x) == PLUS
5151 && GET_CODE (XEXP (x, 1)) == CONST
5152 && GET_CODE (XEXP (x, 0)) == REG
5153 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5154 {
5155 y = XEXP (XEXP (x, 1), 0);
5156 if (GET_CODE (y) == UNSPEC
5157 && XINT (y, 1) == UNSPEC_GOT)
5158 y = XVECEXP (y, 0, 0);
5159 else
5160 return orig_x;
5161 }
5162 else if (GET_CODE (x) == CONST)
5163 {
5164 /* Extract the symbol ref from:
5165 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5166 UNSPEC_PLT/GOTENT))) */
5167
5168 y = XEXP (x, 0);
5169 if (GET_CODE (y) == UNSPEC
5170 && (XINT (y, 1) == UNSPEC_GOTENT
5171 || XINT (y, 1) == UNSPEC_PLT))
5172 y = XVECEXP (y, 0, 0);
5173 else
5174 return orig_x;
5175 }
5176 else
5177 return orig_x;
5178
5179 if (GET_MODE (orig_x) != Pmode)
5180 {
5181 if (GET_MODE (orig_x) == BLKmode)
5182 return orig_x;
5183 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5184 if (y == NULL_RTX)
5185 return orig_x;
5186 }
5187 return y;
5188 }
5189
5190 /* Output operand OP to stdio stream FILE.
5191 OP is an address (register + offset) which is not used to address data;
5192 instead the rightmost bits are interpreted as the value. */
5193
5194 static void
5195 print_shift_count_operand (FILE *file, rtx op)
5196 {
5197 HOST_WIDE_INT offset;
5198 rtx base;
5199
5200 /* Extract base register and offset. */
5201 if (!s390_decompose_shift_count (op, &base, &offset))
5202 gcc_unreachable ();
5203
5204 /* Sanity check. */
5205 if (base)
5206 {
5207 gcc_assert (GET_CODE (base) == REG);
5208 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5209 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5210 }
5211
5212 /* Offsets are constricted to twelve bits. */
5213 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5214 if (base)
5215 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5216 }
5217
5218 /* See 'get_some_local_dynamic_name'. */
5219
5220 static int
5221 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5222 {
5223 rtx x = *px;
5224
5225 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5226 {
5227 x = get_pool_constant (x);
5228 return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
5229 }
5230
5231 if (GET_CODE (x) == SYMBOL_REF
5232 && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
5233 {
5234 cfun->machine->some_ld_name = XSTR (x, 0);
5235 return 1;
5236 }
5237
5238 return 0;
5239 }
5240
5241 /* Locate some local-dynamic symbol still in use by this function
5242 so that we can print its name in local-dynamic base patterns. */
5243
5244 static const char *
5245 get_some_local_dynamic_name (void)
5246 {
5247 rtx insn;
5248
5249 if (cfun->machine->some_ld_name)
5250 return cfun->machine->some_ld_name;
5251
5252 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5253 if (INSN_P (insn)
5254 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5255 return cfun->machine->some_ld_name;
5256
5257 gcc_unreachable ();
5258 }
5259
5260 /* Output machine-dependent UNSPECs occurring in address constant X
5261 in assembler syntax to stdio stream FILE. Returns true if the
5262 constant X could be recognized, false otherwise. */
5263
5264 static bool
5265 s390_output_addr_const_extra (FILE *file, rtx x)
5266 {
5267 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5268 switch (XINT (x, 1))
5269 {
5270 case UNSPEC_GOTENT:
5271 output_addr_const (file, XVECEXP (x, 0, 0));
5272 fprintf (file, "@GOTENT");
5273 return true;
5274 case UNSPEC_GOT:
5275 output_addr_const (file, XVECEXP (x, 0, 0));
5276 fprintf (file, "@GOT");
5277 return true;
5278 case UNSPEC_GOTOFF:
5279 output_addr_const (file, XVECEXP (x, 0, 0));
5280 fprintf (file, "@GOTOFF");
5281 return true;
5282 case UNSPEC_PLT:
5283 output_addr_const (file, XVECEXP (x, 0, 0));
5284 fprintf (file, "@PLT");
5285 return true;
5286 case UNSPEC_PLTOFF:
5287 output_addr_const (file, XVECEXP (x, 0, 0));
5288 fprintf (file, "@PLTOFF");
5289 return true;
5290 case UNSPEC_TLSGD:
5291 output_addr_const (file, XVECEXP (x, 0, 0));
5292 fprintf (file, "@TLSGD");
5293 return true;
5294 case UNSPEC_TLSLDM:
5295 assemble_name (file, get_some_local_dynamic_name ());
5296 fprintf (file, "@TLSLDM");
5297 return true;
5298 case UNSPEC_DTPOFF:
5299 output_addr_const (file, XVECEXP (x, 0, 0));
5300 fprintf (file, "@DTPOFF");
5301 return true;
5302 case UNSPEC_NTPOFF:
5303 output_addr_const (file, XVECEXP (x, 0, 0));
5304 fprintf (file, "@NTPOFF");
5305 return true;
5306 case UNSPEC_GOTNTPOFF:
5307 output_addr_const (file, XVECEXP (x, 0, 0));
5308 fprintf (file, "@GOTNTPOFF");
5309 return true;
5310 case UNSPEC_INDNTPOFF:
5311 output_addr_const (file, XVECEXP (x, 0, 0));
5312 fprintf (file, "@INDNTPOFF");
5313 return true;
5314 }
5315
5316 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5317 switch (XINT (x, 1))
5318 {
5319 case UNSPEC_POOL_OFFSET:
5320 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5321 output_addr_const (file, x);
5322 return true;
5323 }
5324 return false;
5325 }
5326
5327 /* Output address operand ADDR in assembler syntax to
5328 stdio stream FILE. */
5329
5330 void
5331 print_operand_address (FILE *file, rtx addr)
5332 {
5333 struct s390_address ad;
5334
5335 if (s390_loadrelative_operand_p (addr))
5336 {
5337 if (!TARGET_Z10)
5338 {
5339 output_operand_lossage ("symbolic memory references are "
5340 "only supported on z10 or later");
5341 return;
5342 }
5343 output_addr_const (file, addr);
5344 return;
5345 }
5346
5347 if (!s390_decompose_address (addr, &ad)
5348 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5349 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5350 output_operand_lossage ("cannot decompose address");
5351
5352 if (ad.disp)
5353 output_addr_const (file, ad.disp);
5354 else
5355 fprintf (file, "0");
5356
5357 if (ad.base && ad.indx)
5358 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5359 reg_names[REGNO (ad.base)]);
5360 else if (ad.base)
5361 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5362 }
5363
5364 /* Output operand X in assembler syntax to stdio stream FILE.
5365 CODE specified the format flag. The following format flags
5366 are recognized:
5367
5368 'C': print opcode suffix for branch condition.
5369 'D': print opcode suffix for inverse branch condition.
5370 'E': print opcode suffix for branch on index instruction.
5371 'G': print the size of the operand in bytes.
5372 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5373 'M': print the second word of a TImode operand.
5374 'N': print the second word of a DImode operand.
5375 'O': print only the displacement of a memory reference.
5376 'R': print only the base register of a memory reference.
5377 'S': print S-type memory reference (base+displacement).
5378 'Y': print shift count operand.
5379
5380 'b': print integer X as if it's an unsigned byte.
5381 'c': print integer X as if it's an signed byte.
5382 'e': "end" of DImode contiguous bitmask X.
5383 'f': "end" of SImode contiguous bitmask X.
5384 'h': print integer X as if it's a signed halfword.
5385 'i': print the first nonzero HImode part of X.
5386 'j': print the first HImode part unequal to -1 of X.
5387 'k': print the first nonzero SImode part of X.
5388 'm': print the first SImode part unequal to -1 of X.
5389 'o': print integer X as if it's an unsigned 32bit word.
5390 's': "start" of DImode contiguous bitmask X.
5391 't': "start" of SImode contiguous bitmask X.
5392 'x': print integer X as if it's an unsigned halfword.
5393 */
5394
5395 void
5396 print_operand (FILE *file, rtx x, int code)
5397 {
5398 HOST_WIDE_INT ival;
5399
5400 switch (code)
5401 {
5402 case 'C':
5403 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5404 return;
5405
5406 case 'D':
5407 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5408 return;
5409
5410 case 'E':
5411 if (GET_CODE (x) == LE)
5412 fprintf (file, "l");
5413 else if (GET_CODE (x) == GT)
5414 fprintf (file, "h");
5415 else
5416 output_operand_lossage ("invalid comparison operator "
5417 "for 'E' output modifier");
5418 return;
5419
5420 case 'J':
5421 if (GET_CODE (x) == SYMBOL_REF)
5422 {
5423 fprintf (file, "%s", ":tls_load:");
5424 output_addr_const (file, x);
5425 }
5426 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5427 {
5428 fprintf (file, "%s", ":tls_gdcall:");
5429 output_addr_const (file, XVECEXP (x, 0, 0));
5430 }
5431 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5432 {
5433 fprintf (file, "%s", ":tls_ldcall:");
5434 assemble_name (file, get_some_local_dynamic_name ());
5435 }
5436 else
5437 output_operand_lossage ("invalid reference for 'J' output modifier");
5438 return;
5439
5440 case 'G':
5441 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5442 return;
5443
5444 case 'O':
5445 {
5446 struct s390_address ad;
5447 int ret;
5448
5449 if (!MEM_P (x))
5450 {
5451 output_operand_lossage ("memory reference expected for "
5452 "'O' output modifier");
5453 return;
5454 }
5455
5456 ret = s390_decompose_address (XEXP (x, 0), &ad);
5457
5458 if (!ret
5459 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5460 || ad.indx)
5461 {
5462 output_operand_lossage ("invalid address for 'O' output modifier");
5463 return;
5464 }
5465
5466 if (ad.disp)
5467 output_addr_const (file, ad.disp);
5468 else
5469 fprintf (file, "0");
5470 }
5471 return;
5472
5473 case 'R':
5474 {
5475 struct s390_address ad;
5476 int ret;
5477
5478 if (!MEM_P (x))
5479 {
5480 output_operand_lossage ("memory reference expected for "
5481 "'R' output modifier");
5482 return;
5483 }
5484
5485 ret = s390_decompose_address (XEXP (x, 0), &ad);
5486
5487 if (!ret
5488 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5489 || ad.indx)
5490 {
5491 output_operand_lossage ("invalid address for 'R' output modifier");
5492 return;
5493 }
5494
5495 if (ad.base)
5496 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5497 else
5498 fprintf (file, "0");
5499 }
5500 return;
5501
5502 case 'S':
5503 {
5504 struct s390_address ad;
5505 int ret;
5506
5507 if (!MEM_P (x))
5508 {
5509 output_operand_lossage ("memory reference expected for "
5510 "'S' output modifier");
5511 return;
5512 }
5513 ret = s390_decompose_address (XEXP (x, 0), &ad);
5514
5515 if (!ret
5516 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5517 || ad.indx)
5518 {
5519 output_operand_lossage ("invalid address for 'S' output modifier");
5520 return;
5521 }
5522
5523 if (ad.disp)
5524 output_addr_const (file, ad.disp);
5525 else
5526 fprintf (file, "0");
5527
5528 if (ad.base)
5529 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5530 }
5531 return;
5532
5533 case 'N':
5534 if (GET_CODE (x) == REG)
5535 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5536 else if (GET_CODE (x) == MEM)
5537 x = change_address (x, VOIDmode,
5538 plus_constant (Pmode, XEXP (x, 0), 4));
5539 else
5540 output_operand_lossage ("register or memory expression expected "
5541 "for 'N' output modifier");
5542 break;
5543
5544 case 'M':
5545 if (GET_CODE (x) == REG)
5546 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5547 else if (GET_CODE (x) == MEM)
5548 x = change_address (x, VOIDmode,
5549 plus_constant (Pmode, XEXP (x, 0), 8));
5550 else
5551 output_operand_lossage ("register or memory expression expected "
5552 "for 'M' output modifier");
5553 break;
5554
5555 case 'Y':
5556 print_shift_count_operand (file, x);
5557 return;
5558 }
5559
5560 switch (GET_CODE (x))
5561 {
5562 case REG:
5563 fprintf (file, "%s", reg_names[REGNO (x)]);
5564 break;
5565
5566 case MEM:
5567 output_address (XEXP (x, 0));
5568 break;
5569
5570 case CONST:
5571 case CODE_LABEL:
5572 case LABEL_REF:
5573 case SYMBOL_REF:
5574 output_addr_const (file, x);
5575 break;
5576
5577 case CONST_INT:
5578 ival = INTVAL (x);
5579 switch (code)
5580 {
5581 case 0:
5582 break;
5583 case 'b':
5584 ival &= 0xff;
5585 break;
5586 case 'c':
5587 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5588 break;
5589 case 'x':
5590 ival &= 0xffff;
5591 break;
5592 case 'h':
5593 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5594 break;
5595 case 'i':
5596 ival = s390_extract_part (x, HImode, 0);
5597 break;
5598 case 'j':
5599 ival = s390_extract_part (x, HImode, -1);
5600 break;
5601 case 'k':
5602 ival = s390_extract_part (x, SImode, 0);
5603 break;
5604 case 'm':
5605 ival = s390_extract_part (x, SImode, -1);
5606 break;
5607 case 'o':
5608 ival &= 0xffffffff;
5609 break;
5610 case 'e': case 'f':
5611 case 's': case 't':
5612 {
5613 int pos, len;
5614 bool ok;
5615
5616 len = (code == 's' || code == 'e' ? 64 : 32);
5617 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5618 gcc_assert (ok);
5619 if (code == 's' || code == 't')
5620 ival = 64 - pos - len;
5621 else
5622 ival = 64 - 1 - pos;
5623 }
5624 break;
5625 default:
5626 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5627 }
5628 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5629 break;
5630
5631 case CONST_DOUBLE:
5632 gcc_assert (GET_MODE (x) == VOIDmode);
5633 if (code == 'b')
5634 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5635 else if (code == 'x')
5636 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5637 else if (code == 'h')
5638 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5639 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5640 else
5641 {
5642 if (code == 0)
5643 output_operand_lossage ("invalid constant - try using "
5644 "an output modifier");
5645 else
5646 output_operand_lossage ("invalid constant for output modifier '%c'",
5647 code);
5648 }
5649 break;
5650
5651 default:
5652 if (code == 0)
5653 output_operand_lossage ("invalid expression - try using "
5654 "an output modifier");
5655 else
5656 output_operand_lossage ("invalid expression for output "
5657 "modifier '%c'", code);
5658 break;
5659 }
5660 }
5661
5662 /* Target hook for assembling integer objects. We need to define it
5663 here to work a round a bug in some versions of GAS, which couldn't
5664 handle values smaller than INT_MIN when printed in decimal. */
5665
5666 static bool
5667 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5668 {
5669 if (size == 8 && aligned_p
5670 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5671 {
5672 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5673 INTVAL (x));
5674 return true;
5675 }
5676 return default_assemble_integer (x, size, aligned_p);
5677 }
5678
5679 /* Returns true if register REGNO is used for forming
5680 a memory address in expression X. */
5681
5682 static bool
5683 reg_used_in_mem_p (int regno, rtx x)
5684 {
5685 enum rtx_code code = GET_CODE (x);
5686 int i, j;
5687 const char *fmt;
5688
5689 if (code == MEM)
5690 {
5691 if (refers_to_regno_p (regno, regno+1,
5692 XEXP (x, 0), 0))
5693 return true;
5694 }
5695 else if (code == SET
5696 && GET_CODE (SET_DEST (x)) == PC)
5697 {
5698 if (refers_to_regno_p (regno, regno+1,
5699 SET_SRC (x), 0))
5700 return true;
5701 }
5702
5703 fmt = GET_RTX_FORMAT (code);
5704 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5705 {
5706 if (fmt[i] == 'e'
5707 && reg_used_in_mem_p (regno, XEXP (x, i)))
5708 return true;
5709
5710 else if (fmt[i] == 'E')
5711 for (j = 0; j < XVECLEN (x, i); j++)
5712 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5713 return true;
5714 }
5715 return false;
5716 }
5717
5718 /* Returns true if expression DEP_RTX sets an address register
5719 used by instruction INSN to address memory. */
5720
5721 static bool
5722 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5723 {
5724 rtx target, pat;
5725
5726 if (GET_CODE (dep_rtx) == INSN)
5727 dep_rtx = PATTERN (dep_rtx);
5728
5729 if (GET_CODE (dep_rtx) == SET)
5730 {
5731 target = SET_DEST (dep_rtx);
5732 if (GET_CODE (target) == STRICT_LOW_PART)
5733 target = XEXP (target, 0);
5734 while (GET_CODE (target) == SUBREG)
5735 target = SUBREG_REG (target);
5736
5737 if (GET_CODE (target) == REG)
5738 {
5739 int regno = REGNO (target);
5740
5741 if (s390_safe_attr_type (insn) == TYPE_LA)
5742 {
5743 pat = PATTERN (insn);
5744 if (GET_CODE (pat) == PARALLEL)
5745 {
5746 gcc_assert (XVECLEN (pat, 0) == 2);
5747 pat = XVECEXP (pat, 0, 0);
5748 }
5749 gcc_assert (GET_CODE (pat) == SET);
5750 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5751 }
5752 else if (get_attr_atype (insn) == ATYPE_AGEN)
5753 return reg_used_in_mem_p (regno, PATTERN (insn));
5754 }
5755 }
5756 return false;
5757 }
5758
5759 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5760
5761 int
5762 s390_agen_dep_p (rtx dep_insn, rtx insn)
5763 {
5764 rtx dep_rtx = PATTERN (dep_insn);
5765 int i;
5766
5767 if (GET_CODE (dep_rtx) == SET
5768 && addr_generation_dependency_p (dep_rtx, insn))
5769 return 1;
5770 else if (GET_CODE (dep_rtx) == PARALLEL)
5771 {
5772 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5773 {
5774 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5775 return 1;
5776 }
5777 }
5778 return 0;
5779 }
5780
5781
5782 /* A C statement (sans semicolon) to update the integer scheduling priority
5783 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5784 reduce the priority to execute INSN later. Do not define this macro if
5785 you do not need to adjust the scheduling priorities of insns.
5786
5787 A STD instruction should be scheduled earlier,
5788 in order to use the bypass. */
5789 static int
5790 s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
5791 {
5792 if (! INSN_P (insn))
5793 return priority;
5794
5795 if (s390_tune != PROCESSOR_2084_Z990
5796 && s390_tune != PROCESSOR_2094_Z9_109
5797 && s390_tune != PROCESSOR_2097_Z10
5798 && s390_tune != PROCESSOR_2817_Z196
5799 && s390_tune != PROCESSOR_2827_ZEC12)
5800 return priority;
5801
5802 switch (s390_safe_attr_type (insn))
5803 {
5804 case TYPE_FSTOREDF:
5805 case TYPE_FSTORESF:
5806 priority = priority << 3;
5807 break;
5808 case TYPE_STORE:
5809 case TYPE_STM:
5810 priority = priority << 1;
5811 break;
5812 default:
5813 break;
5814 }
5815 return priority;
5816 }
5817
5818
5819 /* The number of instructions that can be issued per cycle. */
5820
5821 static int
5822 s390_issue_rate (void)
5823 {
5824 switch (s390_tune)
5825 {
5826 case PROCESSOR_2084_Z990:
5827 case PROCESSOR_2094_Z9_109:
5828 case PROCESSOR_2817_Z196:
5829 return 3;
5830 case PROCESSOR_2097_Z10:
5831 case PROCESSOR_2827_ZEC12:
5832 return 2;
5833 default:
5834 return 1;
5835 }
5836 }
5837
5838 static int
5839 s390_first_cycle_multipass_dfa_lookahead (void)
5840 {
5841 return 4;
5842 }
5843
5844 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5845 Fix up MEMs as required. */
5846
5847 static void
5848 annotate_constant_pool_refs (rtx *x)
5849 {
5850 int i, j;
5851 const char *fmt;
5852
5853 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5854 || !CONSTANT_POOL_ADDRESS_P (*x));
5855
5856 /* Literal pool references can only occur inside a MEM ... */
5857 if (GET_CODE (*x) == MEM)
5858 {
5859 rtx memref = XEXP (*x, 0);
5860
5861 if (GET_CODE (memref) == SYMBOL_REF
5862 && CONSTANT_POOL_ADDRESS_P (memref))
5863 {
5864 rtx base = cfun->machine->base_reg;
5865 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5866 UNSPEC_LTREF);
5867
5868 *x = replace_equiv_address (*x, addr);
5869 return;
5870 }
5871
5872 if (GET_CODE (memref) == CONST
5873 && GET_CODE (XEXP (memref, 0)) == PLUS
5874 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5875 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5876 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5877 {
5878 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5879 rtx sym = XEXP (XEXP (memref, 0), 0);
5880 rtx base = cfun->machine->base_reg;
5881 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5882 UNSPEC_LTREF);
5883
5884 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5885 return;
5886 }
5887 }
5888
5889 /* ... or a load-address type pattern. */
5890 if (GET_CODE (*x) == SET)
5891 {
5892 rtx addrref = SET_SRC (*x);
5893
5894 if (GET_CODE (addrref) == SYMBOL_REF
5895 && CONSTANT_POOL_ADDRESS_P (addrref))
5896 {
5897 rtx base = cfun->machine->base_reg;
5898 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5899 UNSPEC_LTREF);
5900
5901 SET_SRC (*x) = addr;
5902 return;
5903 }
5904
5905 if (GET_CODE (addrref) == CONST
5906 && GET_CODE (XEXP (addrref, 0)) == PLUS
5907 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5908 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5909 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5910 {
5911 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5912 rtx sym = XEXP (XEXP (addrref, 0), 0);
5913 rtx base = cfun->machine->base_reg;
5914 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5915 UNSPEC_LTREF);
5916
5917 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5918 return;
5919 }
5920 }
5921
5922 /* Annotate LTREL_BASE as well. */
5923 if (GET_CODE (*x) == UNSPEC
5924 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5925 {
5926 rtx base = cfun->machine->base_reg;
5927 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
5928 UNSPEC_LTREL_BASE);
5929 return;
5930 }
5931
5932 fmt = GET_RTX_FORMAT (GET_CODE (*x));
5933 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
5934 {
5935 if (fmt[i] == 'e')
5936 {
5937 annotate_constant_pool_refs (&XEXP (*x, i));
5938 }
5939 else if (fmt[i] == 'E')
5940 {
5941 for (j = 0; j < XVECLEN (*x, i); j++)
5942 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
5943 }
5944 }
5945 }
5946
5947 /* Split all branches that exceed the maximum distance.
5948 Returns true if this created a new literal pool entry. */
5949
5950 static int
5951 s390_split_branches (void)
5952 {
5953 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
5954 int new_literal = 0, ret;
5955 rtx insn, pat, tmp, target;
5956 rtx *label;
5957
5958 /* We need correct insn addresses. */
5959
5960 shorten_branches (get_insns ());
5961
5962 /* Find all branches that exceed 64KB, and split them. */
5963
5964 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5965 {
5966 if (GET_CODE (insn) != JUMP_INSN)
5967 continue;
5968
5969 pat = PATTERN (insn);
5970 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
5971 pat = XVECEXP (pat, 0, 0);
5972 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
5973 continue;
5974
5975 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
5976 {
5977 label = &SET_SRC (pat);
5978 }
5979 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
5980 {
5981 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
5982 label = &XEXP (SET_SRC (pat), 1);
5983 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
5984 label = &XEXP (SET_SRC (pat), 2);
5985 else
5986 continue;
5987 }
5988 else
5989 continue;
5990
5991 if (get_attr_length (insn) <= 4)
5992 continue;
5993
5994 /* We are going to use the return register as scratch register,
5995 make sure it will be saved/restored by the prologue/epilogue. */
5996 cfun_frame_layout.save_return_addr_p = 1;
5997
5998 if (!flag_pic)
5999 {
6000 new_literal = 1;
6001 tmp = force_const_mem (Pmode, *label);
6002 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
6003 INSN_ADDRESSES_NEW (tmp, -1);
6004 annotate_constant_pool_refs (&PATTERN (tmp));
6005
6006 target = temp_reg;
6007 }
6008 else
6009 {
6010 new_literal = 1;
6011 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6012 UNSPEC_LTREL_OFFSET);
6013 target = gen_rtx_CONST (Pmode, target);
6014 target = force_const_mem (Pmode, target);
6015 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6016 INSN_ADDRESSES_NEW (tmp, -1);
6017 annotate_constant_pool_refs (&PATTERN (tmp));
6018
6019 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6020 cfun->machine->base_reg),
6021 UNSPEC_LTREL_BASE);
6022 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6023 }
6024
6025 ret = validate_change (insn, label, target, 0);
6026 gcc_assert (ret);
6027 }
6028
6029 return new_literal;
6030 }
6031
6032
6033 /* Find an annotated literal pool symbol referenced in RTX X,
6034 and store it at REF. Will abort if X contains references to
6035 more than one such pool symbol; multiple references to the same
6036 symbol are allowed, however.
6037
6038 The rtx pointed to by REF must be initialized to NULL_RTX
6039 by the caller before calling this routine. */
6040
6041 static void
6042 find_constant_pool_ref (rtx x, rtx *ref)
6043 {
6044 int i, j;
6045 const char *fmt;
6046
6047 /* Ignore LTREL_BASE references. */
6048 if (GET_CODE (x) == UNSPEC
6049 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6050 return;
6051 /* Likewise POOL_ENTRY insns. */
6052 if (GET_CODE (x) == UNSPEC_VOLATILE
6053 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6054 return;
6055
6056 gcc_assert (GET_CODE (x) != SYMBOL_REF
6057 || !CONSTANT_POOL_ADDRESS_P (x));
6058
6059 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6060 {
6061 rtx sym = XVECEXP (x, 0, 0);
6062 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6063 && CONSTANT_POOL_ADDRESS_P (sym));
6064
6065 if (*ref == NULL_RTX)
6066 *ref = sym;
6067 else
6068 gcc_assert (*ref == sym);
6069
6070 return;
6071 }
6072
6073 fmt = GET_RTX_FORMAT (GET_CODE (x));
6074 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6075 {
6076 if (fmt[i] == 'e')
6077 {
6078 find_constant_pool_ref (XEXP (x, i), ref);
6079 }
6080 else if (fmt[i] == 'E')
6081 {
6082 for (j = 0; j < XVECLEN (x, i); j++)
6083 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6084 }
6085 }
6086 }
6087
6088 /* Replace every reference to the annotated literal pool
6089 symbol REF in X by its base plus OFFSET. */
6090
6091 static void
6092 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6093 {
6094 int i, j;
6095 const char *fmt;
6096
6097 gcc_assert (*x != ref);
6098
6099 if (GET_CODE (*x) == UNSPEC
6100 && XINT (*x, 1) == UNSPEC_LTREF
6101 && XVECEXP (*x, 0, 0) == ref)
6102 {
6103 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6104 return;
6105 }
6106
6107 if (GET_CODE (*x) == PLUS
6108 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6109 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6110 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6111 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6112 {
6113 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6114 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6115 return;
6116 }
6117
6118 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6119 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6120 {
6121 if (fmt[i] == 'e')
6122 {
6123 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6124 }
6125 else if (fmt[i] == 'E')
6126 {
6127 for (j = 0; j < XVECLEN (*x, i); j++)
6128 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6129 }
6130 }
6131 }
6132
6133 /* Check whether X contains an UNSPEC_LTREL_BASE.
6134 Return its constant pool symbol if found, NULL_RTX otherwise. */
6135
6136 static rtx
6137 find_ltrel_base (rtx x)
6138 {
6139 int i, j;
6140 const char *fmt;
6141
6142 if (GET_CODE (x) == UNSPEC
6143 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6144 return XVECEXP (x, 0, 0);
6145
6146 fmt = GET_RTX_FORMAT (GET_CODE (x));
6147 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6148 {
6149 if (fmt[i] == 'e')
6150 {
6151 rtx fnd = find_ltrel_base (XEXP (x, i));
6152 if (fnd)
6153 return fnd;
6154 }
6155 else if (fmt[i] == 'E')
6156 {
6157 for (j = 0; j < XVECLEN (x, i); j++)
6158 {
6159 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6160 if (fnd)
6161 return fnd;
6162 }
6163 }
6164 }
6165
6166 return NULL_RTX;
6167 }
6168
6169 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6170
6171 static void
6172 replace_ltrel_base (rtx *x)
6173 {
6174 int i, j;
6175 const char *fmt;
6176
6177 if (GET_CODE (*x) == UNSPEC
6178 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6179 {
6180 *x = XVECEXP (*x, 0, 1);
6181 return;
6182 }
6183
6184 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6185 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6186 {
6187 if (fmt[i] == 'e')
6188 {
6189 replace_ltrel_base (&XEXP (*x, i));
6190 }
6191 else if (fmt[i] == 'E')
6192 {
6193 for (j = 0; j < XVECLEN (*x, i); j++)
6194 replace_ltrel_base (&XVECEXP (*x, i, j));
6195 }
6196 }
6197 }
6198
6199
6200 /* We keep a list of constants which we have to add to internal
6201 constant tables in the middle of large functions. */
6202
6203 #define NR_C_MODES 11
6204 enum machine_mode constant_modes[NR_C_MODES] =
6205 {
6206 TFmode, TImode, TDmode,
6207 DFmode, DImode, DDmode,
6208 SFmode, SImode, SDmode,
6209 HImode,
6210 QImode
6211 };
6212
6213 struct constant
6214 {
6215 struct constant *next;
6216 rtx value;
6217 rtx label;
6218 };
6219
6220 struct constant_pool
6221 {
6222 struct constant_pool *next;
6223 rtx first_insn;
6224 rtx pool_insn;
6225 bitmap insns;
6226 rtx emit_pool_after;
6227
6228 struct constant *constants[NR_C_MODES];
6229 struct constant *execute;
6230 rtx label;
6231 int size;
6232 };
6233
6234 /* Allocate new constant_pool structure. */
6235
6236 static struct constant_pool *
6237 s390_alloc_pool (void)
6238 {
6239 struct constant_pool *pool;
6240 int i;
6241
6242 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6243 pool->next = NULL;
6244 for (i = 0; i < NR_C_MODES; i++)
6245 pool->constants[i] = NULL;
6246
6247 pool->execute = NULL;
6248 pool->label = gen_label_rtx ();
6249 pool->first_insn = NULL_RTX;
6250 pool->pool_insn = NULL_RTX;
6251 pool->insns = BITMAP_ALLOC (NULL);
6252 pool->size = 0;
6253 pool->emit_pool_after = NULL_RTX;
6254
6255 return pool;
6256 }
6257
6258 /* Create new constant pool covering instructions starting at INSN
6259 and chain it to the end of POOL_LIST. */
6260
6261 static struct constant_pool *
6262 s390_start_pool (struct constant_pool **pool_list, rtx insn)
6263 {
6264 struct constant_pool *pool, **prev;
6265
6266 pool = s390_alloc_pool ();
6267 pool->first_insn = insn;
6268
6269 for (prev = pool_list; *prev; prev = &(*prev)->next)
6270 ;
6271 *prev = pool;
6272
6273 return pool;
6274 }
6275
6276 /* End range of instructions covered by POOL at INSN and emit
6277 placeholder insn representing the pool. */
6278
6279 static void
6280 s390_end_pool (struct constant_pool *pool, rtx insn)
6281 {
6282 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6283
6284 if (!insn)
6285 insn = get_last_insn ();
6286
6287 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6288 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6289 }
6290
6291 /* Add INSN to the list of insns covered by POOL. */
6292
6293 static void
6294 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6295 {
6296 bitmap_set_bit (pool->insns, INSN_UID (insn));
6297 }
6298
6299 /* Return pool out of POOL_LIST that covers INSN. */
6300
6301 static struct constant_pool *
6302 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6303 {
6304 struct constant_pool *pool;
6305
6306 for (pool = pool_list; pool; pool = pool->next)
6307 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6308 break;
6309
6310 return pool;
6311 }
6312
6313 /* Add constant VAL of mode MODE to the constant pool POOL. */
6314
6315 static void
6316 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6317 {
6318 struct constant *c;
6319 int i;
6320
6321 for (i = 0; i < NR_C_MODES; i++)
6322 if (constant_modes[i] == mode)
6323 break;
6324 gcc_assert (i != NR_C_MODES);
6325
6326 for (c = pool->constants[i]; c != NULL; c = c->next)
6327 if (rtx_equal_p (val, c->value))
6328 break;
6329
6330 if (c == NULL)
6331 {
6332 c = (struct constant *) xmalloc (sizeof *c);
6333 c->value = val;
6334 c->label = gen_label_rtx ();
6335 c->next = pool->constants[i];
6336 pool->constants[i] = c;
6337 pool->size += GET_MODE_SIZE (mode);
6338 }
6339 }
6340
6341 /* Return an rtx that represents the offset of X from the start of
6342 pool POOL. */
6343
6344 static rtx
6345 s390_pool_offset (struct constant_pool *pool, rtx x)
6346 {
6347 rtx label;
6348
6349 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6350 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6351 UNSPEC_POOL_OFFSET);
6352 return gen_rtx_CONST (GET_MODE (x), x);
6353 }
6354
6355 /* Find constant VAL of mode MODE in the constant pool POOL.
6356 Return an RTX describing the distance from the start of
6357 the pool to the location of the new constant. */
6358
6359 static rtx
6360 s390_find_constant (struct constant_pool *pool, rtx val,
6361 enum machine_mode mode)
6362 {
6363 struct constant *c;
6364 int i;
6365
6366 for (i = 0; i < NR_C_MODES; i++)
6367 if (constant_modes[i] == mode)
6368 break;
6369 gcc_assert (i != NR_C_MODES);
6370
6371 for (c = pool->constants[i]; c != NULL; c = c->next)
6372 if (rtx_equal_p (val, c->value))
6373 break;
6374
6375 gcc_assert (c);
6376
6377 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6378 }
6379
6380 /* Check whether INSN is an execute. Return the label_ref to its
6381 execute target template if so, NULL_RTX otherwise. */
6382
6383 static rtx
6384 s390_execute_label (rtx insn)
6385 {
6386 if (GET_CODE (insn) == INSN
6387 && GET_CODE (PATTERN (insn)) == PARALLEL
6388 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6389 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6390 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6391
6392 return NULL_RTX;
6393 }
6394
6395 /* Add execute target for INSN to the constant pool POOL. */
6396
6397 static void
6398 s390_add_execute (struct constant_pool *pool, rtx insn)
6399 {
6400 struct constant *c;
6401
6402 for (c = pool->execute; c != NULL; c = c->next)
6403 if (INSN_UID (insn) == INSN_UID (c->value))
6404 break;
6405
6406 if (c == NULL)
6407 {
6408 c = (struct constant *) xmalloc (sizeof *c);
6409 c->value = insn;
6410 c->label = gen_label_rtx ();
6411 c->next = pool->execute;
6412 pool->execute = c;
6413 pool->size += 6;
6414 }
6415 }
6416
6417 /* Find execute target for INSN in the constant pool POOL.
6418 Return an RTX describing the distance from the start of
6419 the pool to the location of the execute target. */
6420
6421 static rtx
6422 s390_find_execute (struct constant_pool *pool, rtx insn)
6423 {
6424 struct constant *c;
6425
6426 for (c = pool->execute; c != NULL; c = c->next)
6427 if (INSN_UID (insn) == INSN_UID (c->value))
6428 break;
6429
6430 gcc_assert (c);
6431
6432 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6433 }
6434
6435 /* For an execute INSN, extract the execute target template. */
6436
6437 static rtx
6438 s390_execute_target (rtx insn)
6439 {
6440 rtx pattern = PATTERN (insn);
6441 gcc_assert (s390_execute_label (insn));
6442
6443 if (XVECLEN (pattern, 0) == 2)
6444 {
6445 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6446 }
6447 else
6448 {
6449 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6450 int i;
6451
6452 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6453 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6454
6455 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6456 }
6457
6458 return pattern;
6459 }
6460
6461 /* Indicate that INSN cannot be duplicated. This is the case for
6462 execute insns that carry a unique label. */
6463
6464 static bool
6465 s390_cannot_copy_insn_p (rtx insn)
6466 {
6467 rtx label = s390_execute_label (insn);
6468 return label && label != const0_rtx;
6469 }
6470
6471 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6472 do not emit the pool base label. */
6473
6474 static void
6475 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6476 {
6477 struct constant *c;
6478 rtx insn = pool->pool_insn;
6479 int i;
6480
6481 /* Switch to rodata section. */
6482 if (TARGET_CPU_ZARCH)
6483 {
6484 insn = emit_insn_after (gen_pool_section_start (), insn);
6485 INSN_ADDRESSES_NEW (insn, -1);
6486 }
6487
6488 /* Ensure minimum pool alignment. */
6489 if (TARGET_CPU_ZARCH)
6490 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6491 else
6492 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6493 INSN_ADDRESSES_NEW (insn, -1);
6494
6495 /* Emit pool base label. */
6496 if (!remote_label)
6497 {
6498 insn = emit_label_after (pool->label, insn);
6499 INSN_ADDRESSES_NEW (insn, -1);
6500 }
6501
6502 /* Dump constants in descending alignment requirement order,
6503 ensuring proper alignment for every constant. */
6504 for (i = 0; i < NR_C_MODES; i++)
6505 for (c = pool->constants[i]; c; c = c->next)
6506 {
6507 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6508 rtx value = copy_rtx (c->value);
6509 if (GET_CODE (value) == CONST
6510 && GET_CODE (XEXP (value, 0)) == UNSPEC
6511 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6512 && XVECLEN (XEXP (value, 0), 0) == 1)
6513 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6514
6515 insn = emit_label_after (c->label, insn);
6516 INSN_ADDRESSES_NEW (insn, -1);
6517
6518 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6519 gen_rtvec (1, value),
6520 UNSPECV_POOL_ENTRY);
6521 insn = emit_insn_after (value, insn);
6522 INSN_ADDRESSES_NEW (insn, -1);
6523 }
6524
6525 /* Ensure minimum alignment for instructions. */
6526 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6527 INSN_ADDRESSES_NEW (insn, -1);
6528
6529 /* Output in-pool execute template insns. */
6530 for (c = pool->execute; c; c = c->next)
6531 {
6532 insn = emit_label_after (c->label, insn);
6533 INSN_ADDRESSES_NEW (insn, -1);
6534
6535 insn = emit_insn_after (s390_execute_target (c->value), insn);
6536 INSN_ADDRESSES_NEW (insn, -1);
6537 }
6538
6539 /* Switch back to previous section. */
6540 if (TARGET_CPU_ZARCH)
6541 {
6542 insn = emit_insn_after (gen_pool_section_end (), insn);
6543 INSN_ADDRESSES_NEW (insn, -1);
6544 }
6545
6546 insn = emit_barrier_after (insn);
6547 INSN_ADDRESSES_NEW (insn, -1);
6548
6549 /* Remove placeholder insn. */
6550 remove_insn (pool->pool_insn);
6551 }
6552
6553 /* Free all memory used by POOL. */
6554
6555 static void
6556 s390_free_pool (struct constant_pool *pool)
6557 {
6558 struct constant *c, *next;
6559 int i;
6560
6561 for (i = 0; i < NR_C_MODES; i++)
6562 for (c = pool->constants[i]; c; c = next)
6563 {
6564 next = c->next;
6565 free (c);
6566 }
6567
6568 for (c = pool->execute; c; c = next)
6569 {
6570 next = c->next;
6571 free (c);
6572 }
6573
6574 BITMAP_FREE (pool->insns);
6575 free (pool);
6576 }
6577
6578
6579 /* Collect main literal pool. Return NULL on overflow. */
6580
6581 static struct constant_pool *
6582 s390_mainpool_start (void)
6583 {
6584 struct constant_pool *pool;
6585 rtx insn;
6586
6587 pool = s390_alloc_pool ();
6588
6589 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6590 {
6591 if (GET_CODE (insn) == INSN
6592 && GET_CODE (PATTERN (insn)) == SET
6593 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6594 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6595 {
6596 gcc_assert (!pool->pool_insn);
6597 pool->pool_insn = insn;
6598 }
6599
6600 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6601 {
6602 s390_add_execute (pool, insn);
6603 }
6604 else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
6605 {
6606 rtx pool_ref = NULL_RTX;
6607 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6608 if (pool_ref)
6609 {
6610 rtx constant = get_pool_constant (pool_ref);
6611 enum machine_mode mode = get_pool_mode (pool_ref);
6612 s390_add_constant (pool, constant, mode);
6613 }
6614 }
6615
6616 /* If hot/cold partitioning is enabled we have to make sure that
6617 the literal pool is emitted in the same section where the
6618 initialization of the literal pool base pointer takes place.
6619 emit_pool_after is only used in the non-overflow case on non
6620 Z cpus where we can emit the literal pool at the end of the
6621 function body within the text section. */
6622 if (NOTE_P (insn)
6623 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6624 && !pool->emit_pool_after)
6625 pool->emit_pool_after = PREV_INSN (insn);
6626 }
6627
6628 gcc_assert (pool->pool_insn || pool->size == 0);
6629
6630 if (pool->size >= 4096)
6631 {
6632 /* We're going to chunkify the pool, so remove the main
6633 pool placeholder insn. */
6634 remove_insn (pool->pool_insn);
6635
6636 s390_free_pool (pool);
6637 pool = NULL;
6638 }
6639
6640 /* If the functions ends with the section where the literal pool
6641 should be emitted set the marker to its end. */
6642 if (pool && !pool->emit_pool_after)
6643 pool->emit_pool_after = get_last_insn ();
6644
6645 return pool;
6646 }
6647
6648 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6649 Modify the current function to output the pool constants as well as
6650 the pool register setup instruction. */
6651
6652 static void
6653 s390_mainpool_finish (struct constant_pool *pool)
6654 {
6655 rtx base_reg = cfun->machine->base_reg;
6656 rtx insn;
6657
6658 /* If the pool is empty, we're done. */
6659 if (pool->size == 0)
6660 {
6661 /* We don't actually need a base register after all. */
6662 cfun->machine->base_reg = NULL_RTX;
6663
6664 if (pool->pool_insn)
6665 remove_insn (pool->pool_insn);
6666 s390_free_pool (pool);
6667 return;
6668 }
6669
6670 /* We need correct insn addresses. */
6671 shorten_branches (get_insns ());
6672
6673 /* On zSeries, we use a LARL to load the pool register. The pool is
6674 located in the .rodata section, so we emit it after the function. */
6675 if (TARGET_CPU_ZARCH)
6676 {
6677 insn = gen_main_base_64 (base_reg, pool->label);
6678 insn = emit_insn_after (insn, pool->pool_insn);
6679 INSN_ADDRESSES_NEW (insn, -1);
6680 remove_insn (pool->pool_insn);
6681
6682 insn = get_last_insn ();
6683 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6684 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6685
6686 s390_dump_pool (pool, 0);
6687 }
6688
6689 /* On S/390, if the total size of the function's code plus literal pool
6690 does not exceed 4096 bytes, we use BASR to set up a function base
6691 pointer, and emit the literal pool at the end of the function. */
6692 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6693 + pool->size + 8 /* alignment slop */ < 4096)
6694 {
6695 insn = gen_main_base_31_small (base_reg, pool->label);
6696 insn = emit_insn_after (insn, pool->pool_insn);
6697 INSN_ADDRESSES_NEW (insn, -1);
6698 remove_insn (pool->pool_insn);
6699
6700 insn = emit_label_after (pool->label, insn);
6701 INSN_ADDRESSES_NEW (insn, -1);
6702
6703 /* emit_pool_after will be set by s390_mainpool_start to the
6704 last insn of the section where the literal pool should be
6705 emitted. */
6706 insn = pool->emit_pool_after;
6707
6708 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6709 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6710
6711 s390_dump_pool (pool, 1);
6712 }
6713
6714 /* Otherwise, we emit an inline literal pool and use BASR to branch
6715 over it, setting up the pool register at the same time. */
6716 else
6717 {
6718 rtx pool_end = gen_label_rtx ();
6719
6720 insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
6721 insn = emit_jump_insn_after (insn, pool->pool_insn);
6722 JUMP_LABEL (insn) = pool_end;
6723 INSN_ADDRESSES_NEW (insn, -1);
6724 remove_insn (pool->pool_insn);
6725
6726 insn = emit_label_after (pool->label, insn);
6727 INSN_ADDRESSES_NEW (insn, -1);
6728
6729 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6730 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6731
6732 insn = emit_label_after (pool_end, pool->pool_insn);
6733 INSN_ADDRESSES_NEW (insn, -1);
6734
6735 s390_dump_pool (pool, 1);
6736 }
6737
6738
6739 /* Replace all literal pool references. */
6740
6741 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6742 {
6743 if (INSN_P (insn))
6744 replace_ltrel_base (&PATTERN (insn));
6745
6746 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
6747 {
6748 rtx addr, pool_ref = NULL_RTX;
6749 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6750 if (pool_ref)
6751 {
6752 if (s390_execute_label (insn))
6753 addr = s390_find_execute (pool, insn);
6754 else
6755 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6756 get_pool_mode (pool_ref));
6757
6758 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6759 INSN_CODE (insn) = -1;
6760 }
6761 }
6762 }
6763
6764
6765 /* Free the pool. */
6766 s390_free_pool (pool);
6767 }
6768
6769 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6770 We have decided we cannot use this pool, so revert all changes
6771 to the current function that were done by s390_mainpool_start. */
6772 static void
6773 s390_mainpool_cancel (struct constant_pool *pool)
6774 {
6775 /* We didn't actually change the instruction stream, so simply
6776 free the pool memory. */
6777 s390_free_pool (pool);
6778 }
6779
6780
6781 /* Chunkify the literal pool. */
6782
6783 #define S390_POOL_CHUNK_MIN 0xc00
6784 #define S390_POOL_CHUNK_MAX 0xe00
6785
6786 static struct constant_pool *
6787 s390_chunkify_start (void)
6788 {
6789 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6790 int extra_size = 0;
6791 bitmap far_labels;
6792 rtx pending_ltrel = NULL_RTX;
6793 rtx insn;
6794
6795 rtx (*gen_reload_base) (rtx, rtx) =
6796 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6797
6798
6799 /* We need correct insn addresses. */
6800
6801 shorten_branches (get_insns ());
6802
6803 /* Scan all insns and move literals to pool chunks. */
6804
6805 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6806 {
6807 bool section_switch_p = false;
6808
6809 /* Check for pending LTREL_BASE. */
6810 if (INSN_P (insn))
6811 {
6812 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6813 if (ltrel_base)
6814 {
6815 gcc_assert (ltrel_base == pending_ltrel);
6816 pending_ltrel = NULL_RTX;
6817 }
6818 }
6819
6820 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6821 {
6822 if (!curr_pool)
6823 curr_pool = s390_start_pool (&pool_list, insn);
6824
6825 s390_add_execute (curr_pool, insn);
6826 s390_add_pool_insn (curr_pool, insn);
6827 }
6828 else if (GET_CODE (insn) == INSN || CALL_P (insn))
6829 {
6830 rtx pool_ref = NULL_RTX;
6831 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6832 if (pool_ref)
6833 {
6834 rtx constant = get_pool_constant (pool_ref);
6835 enum machine_mode mode = get_pool_mode (pool_ref);
6836
6837 if (!curr_pool)
6838 curr_pool = s390_start_pool (&pool_list, insn);
6839
6840 s390_add_constant (curr_pool, constant, mode);
6841 s390_add_pool_insn (curr_pool, insn);
6842
6843 /* Don't split the pool chunk between a LTREL_OFFSET load
6844 and the corresponding LTREL_BASE. */
6845 if (GET_CODE (constant) == CONST
6846 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6847 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6848 {
6849 gcc_assert (!pending_ltrel);
6850 pending_ltrel = pool_ref;
6851 }
6852 }
6853 }
6854
6855 if (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CODE_LABEL)
6856 {
6857 if (curr_pool)
6858 s390_add_pool_insn (curr_pool, insn);
6859 /* An LTREL_BASE must follow within the same basic block. */
6860 gcc_assert (!pending_ltrel);
6861 }
6862
6863 if (NOTE_P (insn))
6864 switch (NOTE_KIND (insn))
6865 {
6866 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6867 section_switch_p = true;
6868 break;
6869 case NOTE_INSN_VAR_LOCATION:
6870 case NOTE_INSN_CALL_ARG_LOCATION:
6871 continue;
6872 default:
6873 break;
6874 }
6875
6876 if (!curr_pool
6877 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6878 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6879 continue;
6880
6881 if (TARGET_CPU_ZARCH)
6882 {
6883 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6884 continue;
6885
6886 s390_end_pool (curr_pool, NULL_RTX);
6887 curr_pool = NULL;
6888 }
6889 else
6890 {
6891 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6892 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6893 + extra_size;
6894
6895 /* We will later have to insert base register reload insns.
6896 Those will have an effect on code size, which we need to
6897 consider here. This calculation makes rather pessimistic
6898 worst-case assumptions. */
6899 if (GET_CODE (insn) == CODE_LABEL)
6900 extra_size += 6;
6901
6902 if (chunk_size < S390_POOL_CHUNK_MIN
6903 && curr_pool->size < S390_POOL_CHUNK_MIN
6904 && !section_switch_p)
6905 continue;
6906
6907 /* Pool chunks can only be inserted after BARRIERs ... */
6908 if (GET_CODE (insn) == BARRIER)
6909 {
6910 s390_end_pool (curr_pool, insn);
6911 curr_pool = NULL;
6912 extra_size = 0;
6913 }
6914
6915 /* ... so if we don't find one in time, create one. */
6916 else if (chunk_size > S390_POOL_CHUNK_MAX
6917 || curr_pool->size > S390_POOL_CHUNK_MAX
6918 || section_switch_p)
6919 {
6920 rtx label, jump, barrier, next, prev;
6921
6922 if (!section_switch_p)
6923 {
6924 /* We can insert the barrier only after a 'real' insn. */
6925 if (GET_CODE (insn) != INSN && GET_CODE (insn) != CALL_INSN)
6926 continue;
6927 if (get_attr_length (insn) == 0)
6928 continue;
6929 /* Don't separate LTREL_BASE from the corresponding
6930 LTREL_OFFSET load. */
6931 if (pending_ltrel)
6932 continue;
6933 next = insn;
6934 do
6935 {
6936 insn = next;
6937 next = NEXT_INSN (insn);
6938 }
6939 while (next
6940 && NOTE_P (next)
6941 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
6942 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
6943 }
6944 else
6945 {
6946 gcc_assert (!pending_ltrel);
6947
6948 /* The old pool has to end before the section switch
6949 note in order to make it part of the current
6950 section. */
6951 insn = PREV_INSN (insn);
6952 }
6953
6954 label = gen_label_rtx ();
6955 prev = insn;
6956 if (prev && NOTE_P (prev))
6957 prev = prev_nonnote_insn (prev);
6958 if (prev)
6959 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
6960 INSN_LOCATION (prev));
6961 else
6962 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
6963 barrier = emit_barrier_after (jump);
6964 insn = emit_label_after (label, barrier);
6965 JUMP_LABEL (jump) = label;
6966 LABEL_NUSES (label) = 1;
6967
6968 INSN_ADDRESSES_NEW (jump, -1);
6969 INSN_ADDRESSES_NEW (barrier, -1);
6970 INSN_ADDRESSES_NEW (insn, -1);
6971
6972 s390_end_pool (curr_pool, barrier);
6973 curr_pool = NULL;
6974 extra_size = 0;
6975 }
6976 }
6977 }
6978
6979 if (curr_pool)
6980 s390_end_pool (curr_pool, NULL_RTX);
6981 gcc_assert (!pending_ltrel);
6982
6983 /* Find all labels that are branched into
6984 from an insn belonging to a different chunk. */
6985
6986 far_labels = BITMAP_ALLOC (NULL);
6987
6988 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6989 {
6990 /* Labels marked with LABEL_PRESERVE_P can be target
6991 of non-local jumps, so we have to mark them.
6992 The same holds for named labels.
6993
6994 Don't do that, however, if it is the label before
6995 a jump table. */
6996
6997 if (GET_CODE (insn) == CODE_LABEL
6998 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
6999 {
7000 rtx vec_insn = next_real_insn (insn);
7001 rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
7002 PATTERN (vec_insn) : NULL_RTX;
7003 if (!vec_pat
7004 || !(GET_CODE (vec_pat) == ADDR_VEC
7005 || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
7006 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7007 }
7008
7009 /* If we have a direct jump (conditional or unconditional)
7010 or a casesi jump, check all potential targets. */
7011 else if (GET_CODE (insn) == JUMP_INSN)
7012 {
7013 rtx pat = PATTERN (insn);
7014 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
7015 pat = XVECEXP (pat, 0, 0);
7016
7017 if (GET_CODE (pat) == SET)
7018 {
7019 rtx label = JUMP_LABEL (insn);
7020 if (label)
7021 {
7022 if (s390_find_pool (pool_list, label)
7023 != s390_find_pool (pool_list, insn))
7024 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7025 }
7026 }
7027 else if (GET_CODE (pat) == PARALLEL
7028 && XVECLEN (pat, 0) == 2
7029 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
7030 && GET_CODE (XVECEXP (pat, 0, 1)) == USE
7031 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
7032 {
7033 /* Find the jump table used by this casesi jump. */
7034 rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
7035 rtx vec_insn = next_real_insn (vec_label);
7036 rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
7037 PATTERN (vec_insn) : NULL_RTX;
7038 if (vec_pat
7039 && (GET_CODE (vec_pat) == ADDR_VEC
7040 || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
7041 {
7042 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7043
7044 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7045 {
7046 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7047
7048 if (s390_find_pool (pool_list, label)
7049 != s390_find_pool (pool_list, insn))
7050 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7051 }
7052 }
7053 }
7054 }
7055 }
7056
7057 /* Insert base register reload insns before every pool. */
7058
7059 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7060 {
7061 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7062 curr_pool->label);
7063 rtx insn = curr_pool->first_insn;
7064 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7065 }
7066
7067 /* Insert base register reload insns at every far label. */
7068
7069 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7070 if (GET_CODE (insn) == CODE_LABEL
7071 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7072 {
7073 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7074 if (pool)
7075 {
7076 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7077 pool->label);
7078 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7079 }
7080 }
7081
7082
7083 BITMAP_FREE (far_labels);
7084
7085
7086 /* Recompute insn addresses. */
7087
7088 init_insn_lengths ();
7089 shorten_branches (get_insns ());
7090
7091 return pool_list;
7092 }
7093
7094 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7095 After we have decided to use this list, finish implementing
7096 all changes to the current function as required. */
7097
7098 static void
7099 s390_chunkify_finish (struct constant_pool *pool_list)
7100 {
7101 struct constant_pool *curr_pool = NULL;
7102 rtx insn;
7103
7104
7105 /* Replace all literal pool references. */
7106
7107 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7108 {
7109 if (INSN_P (insn))
7110 replace_ltrel_base (&PATTERN (insn));
7111
7112 curr_pool = s390_find_pool (pool_list, insn);
7113 if (!curr_pool)
7114 continue;
7115
7116 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
7117 {
7118 rtx addr, pool_ref = NULL_RTX;
7119 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7120 if (pool_ref)
7121 {
7122 if (s390_execute_label (insn))
7123 addr = s390_find_execute (curr_pool, insn);
7124 else
7125 addr = s390_find_constant (curr_pool,
7126 get_pool_constant (pool_ref),
7127 get_pool_mode (pool_ref));
7128
7129 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7130 INSN_CODE (insn) = -1;
7131 }
7132 }
7133 }
7134
7135 /* Dump out all literal pools. */
7136
7137 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7138 s390_dump_pool (curr_pool, 0);
7139
7140 /* Free pool list. */
7141
7142 while (pool_list)
7143 {
7144 struct constant_pool *next = pool_list->next;
7145 s390_free_pool (pool_list);
7146 pool_list = next;
7147 }
7148 }
7149
7150 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7151 We have decided we cannot use this list, so revert all changes
7152 to the current function that were done by s390_chunkify_start. */
7153
7154 static void
7155 s390_chunkify_cancel (struct constant_pool *pool_list)
7156 {
7157 struct constant_pool *curr_pool = NULL;
7158 rtx insn;
7159
7160 /* Remove all pool placeholder insns. */
7161
7162 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7163 {
7164 /* Did we insert an extra barrier? Remove it. */
7165 rtx barrier = PREV_INSN (curr_pool->pool_insn);
7166 rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
7167 rtx label = NEXT_INSN (curr_pool->pool_insn);
7168
7169 if (jump && GET_CODE (jump) == JUMP_INSN
7170 && barrier && GET_CODE (barrier) == BARRIER
7171 && label && GET_CODE (label) == CODE_LABEL
7172 && GET_CODE (PATTERN (jump)) == SET
7173 && SET_DEST (PATTERN (jump)) == pc_rtx
7174 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7175 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7176 {
7177 remove_insn (jump);
7178 remove_insn (barrier);
7179 remove_insn (label);
7180 }
7181
7182 remove_insn (curr_pool->pool_insn);
7183 }
7184
7185 /* Remove all base register reload insns. */
7186
7187 for (insn = get_insns (); insn; )
7188 {
7189 rtx next_insn = NEXT_INSN (insn);
7190
7191 if (GET_CODE (insn) == INSN
7192 && GET_CODE (PATTERN (insn)) == SET
7193 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7194 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7195 remove_insn (insn);
7196
7197 insn = next_insn;
7198 }
7199
7200 /* Free pool list. */
7201
7202 while (pool_list)
7203 {
7204 struct constant_pool *next = pool_list->next;
7205 s390_free_pool (pool_list);
7206 pool_list = next;
7207 }
7208 }
7209
7210 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7211
7212 void
7213 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7214 {
7215 REAL_VALUE_TYPE r;
7216
7217 switch (GET_MODE_CLASS (mode))
7218 {
7219 case MODE_FLOAT:
7220 case MODE_DECIMAL_FLOAT:
7221 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7222
7223 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7224 assemble_real (r, mode, align);
7225 break;
7226
7227 case MODE_INT:
7228 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7229 mark_symbol_refs_as_used (exp);
7230 break;
7231
7232 default:
7233 gcc_unreachable ();
7234 }
7235 }
7236
7237
7238 /* Return an RTL expression representing the value of the return address
7239 for the frame COUNT steps up from the current frame. FRAME is the
7240 frame pointer of that frame. */
7241
7242 rtx
7243 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7244 {
7245 int offset;
7246 rtx addr;
7247
7248 /* Without backchain, we fail for all but the current frame. */
7249
7250 if (!TARGET_BACKCHAIN && count > 0)
7251 return NULL_RTX;
7252
7253 /* For the current frame, we need to make sure the initial
7254 value of RETURN_REGNUM is actually saved. */
7255
7256 if (count == 0)
7257 {
7258 /* On non-z architectures branch splitting could overwrite r14. */
7259 if (TARGET_CPU_ZARCH)
7260 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7261 else
7262 {
7263 cfun_frame_layout.save_return_addr_p = true;
7264 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7265 }
7266 }
7267
7268 if (TARGET_PACKED_STACK)
7269 offset = -2 * UNITS_PER_LONG;
7270 else
7271 offset = RETURN_REGNUM * UNITS_PER_LONG;
7272
7273 addr = plus_constant (Pmode, frame, offset);
7274 addr = memory_address (Pmode, addr);
7275 return gen_rtx_MEM (Pmode, addr);
7276 }
7277
7278 /* Return an RTL expression representing the back chain stored in
7279 the current stack frame. */
7280
7281 rtx
7282 s390_back_chain_rtx (void)
7283 {
7284 rtx chain;
7285
7286 gcc_assert (TARGET_BACKCHAIN);
7287
7288 if (TARGET_PACKED_STACK)
7289 chain = plus_constant (Pmode, stack_pointer_rtx,
7290 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7291 else
7292 chain = stack_pointer_rtx;
7293
7294 chain = gen_rtx_MEM (Pmode, chain);
7295 return chain;
7296 }
7297
7298 /* Find first call clobbered register unused in a function.
7299 This could be used as base register in a leaf function
7300 or for holding the return address before epilogue. */
7301
7302 static int
7303 find_unused_clobbered_reg (void)
7304 {
7305 int i;
7306 for (i = 0; i < 6; i++)
7307 if (!df_regs_ever_live_p (i))
7308 return i;
7309 return 0;
7310 }
7311
7312
7313 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7314 clobbered hard regs in SETREG. */
7315
7316 static void
7317 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7318 {
7319 int *regs_ever_clobbered = (int *)data;
7320 unsigned int i, regno;
7321 enum machine_mode mode = GET_MODE (setreg);
7322
7323 if (GET_CODE (setreg) == SUBREG)
7324 {
7325 rtx inner = SUBREG_REG (setreg);
7326 if (!GENERAL_REG_P (inner))
7327 return;
7328 regno = subreg_regno (setreg);
7329 }
7330 else if (GENERAL_REG_P (setreg))
7331 regno = REGNO (setreg);
7332 else
7333 return;
7334
7335 for (i = regno;
7336 i < regno + HARD_REGNO_NREGS (regno, mode);
7337 i++)
7338 regs_ever_clobbered[i] = 1;
7339 }
7340
7341 /* Walks through all basic blocks of the current function looking
7342 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7343 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7344 each of those regs. */
7345
7346 static void
7347 s390_regs_ever_clobbered (int *regs_ever_clobbered)
7348 {
7349 basic_block cur_bb;
7350 rtx cur_insn;
7351 unsigned int i;
7352
7353 memset (regs_ever_clobbered, 0, 16 * sizeof (int));
7354
7355 /* For non-leaf functions we have to consider all call clobbered regs to be
7356 clobbered. */
7357 if (!crtl->is_leaf)
7358 {
7359 for (i = 0; i < 16; i++)
7360 regs_ever_clobbered[i] = call_really_used_regs[i];
7361 }
7362
7363 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7364 this work is done by liveness analysis (mark_regs_live_at_end).
7365 Special care is needed for functions containing landing pads. Landing pads
7366 may use the eh registers, but the code which sets these registers is not
7367 contained in that function. Hence s390_regs_ever_clobbered is not able to
7368 deal with this automatically. */
7369 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7370 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7371 if (crtl->calls_eh_return
7372 || (cfun->machine->has_landing_pad_p
7373 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7374 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7375
7376 /* For nonlocal gotos all call-saved registers have to be saved.
7377 This flag is also set for the unwinding code in libgcc.
7378 See expand_builtin_unwind_init. For regs_ever_live this is done by
7379 reload. */
7380 if (cfun->has_nonlocal_label)
7381 for (i = 0; i < 16; i++)
7382 if (!call_really_used_regs[i])
7383 regs_ever_clobbered[i] = 1;
7384
7385 FOR_EACH_BB (cur_bb)
7386 {
7387 FOR_BB_INSNS (cur_bb, cur_insn)
7388 {
7389 if (INSN_P (cur_insn))
7390 note_stores (PATTERN (cur_insn),
7391 s390_reg_clobbered_rtx,
7392 regs_ever_clobbered);
7393 }
7394 }
7395 }
7396
7397 /* Determine the frame area which actually has to be accessed
7398 in the function epilogue. The values are stored at the
7399 given pointers AREA_BOTTOM (address of the lowest used stack
7400 address) and AREA_TOP (address of the first item which does
7401 not belong to the stack frame). */
7402
7403 static void
7404 s390_frame_area (int *area_bottom, int *area_top)
7405 {
7406 int b, t;
7407 int i;
7408
7409 b = INT_MAX;
7410 t = INT_MIN;
7411
7412 if (cfun_frame_layout.first_restore_gpr != -1)
7413 {
7414 b = (cfun_frame_layout.gprs_offset
7415 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7416 t = b + (cfun_frame_layout.last_restore_gpr
7417 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7418 }
7419
7420 if (TARGET_64BIT && cfun_save_high_fprs_p)
7421 {
7422 b = MIN (b, cfun_frame_layout.f8_offset);
7423 t = MAX (t, (cfun_frame_layout.f8_offset
7424 + cfun_frame_layout.high_fprs * 8));
7425 }
7426
7427 if (!TARGET_64BIT)
7428 for (i = 2; i < 4; i++)
7429 if (cfun_fpr_bit_p (i))
7430 {
7431 b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
7432 t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
7433 }
7434
7435 *area_bottom = b;
7436 *area_top = t;
7437 }
7438
7439 /* Fill cfun->machine with info about register usage of current function.
7440 Return in CLOBBERED_REGS which GPRs are currently considered set. */
7441
7442 static void
7443 s390_register_info (int clobbered_regs[])
7444 {
7445 int i, j;
7446
7447 /* fprs 8 - 15 are call saved for 64 Bit ABI. */
7448 cfun_frame_layout.fpr_bitmap = 0;
7449 cfun_frame_layout.high_fprs = 0;
7450 if (TARGET_64BIT)
7451 for (i = 24; i < 32; i++)
7452 if (df_regs_ever_live_p (i) && !global_regs[i])
7453 {
7454 cfun_set_fpr_bit (i - 16);
7455 cfun_frame_layout.high_fprs++;
7456 }
7457
7458 /* Find first and last gpr to be saved. We trust regs_ever_live
7459 data, except that we don't save and restore global registers.
7460
7461 Also, all registers with special meaning to the compiler need
7462 to be handled extra. */
7463
7464 s390_regs_ever_clobbered (clobbered_regs);
7465
7466 for (i = 0; i < 16; i++)
7467 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
7468
7469 if (frame_pointer_needed)
7470 clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
7471
7472 if (flag_pic)
7473 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7474 |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7475
7476 clobbered_regs[BASE_REGNUM]
7477 |= (cfun->machine->base_reg
7478 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7479
7480 clobbered_regs[RETURN_REGNUM]
7481 |= (!crtl->is_leaf
7482 || TARGET_TPF_PROFILING
7483 || cfun->machine->split_branches_pending_p
7484 || cfun_frame_layout.save_return_addr_p
7485 || crtl->calls_eh_return
7486 || cfun->stdarg);
7487
7488 clobbered_regs[STACK_POINTER_REGNUM]
7489 |= (!crtl->is_leaf
7490 || TARGET_TPF_PROFILING
7491 || cfun_save_high_fprs_p
7492 || get_frame_size () > 0
7493 || cfun->calls_alloca
7494 || cfun->stdarg);
7495
7496 for (i = 6; i < 16; i++)
7497 if (df_regs_ever_live_p (i) || clobbered_regs[i])
7498 break;
7499 for (j = 15; j > i; j--)
7500 if (df_regs_ever_live_p (j) || clobbered_regs[j])
7501 break;
7502
7503 if (i == 16)
7504 {
7505 /* Nothing to save/restore. */
7506 cfun_frame_layout.first_save_gpr_slot = -1;
7507 cfun_frame_layout.last_save_gpr_slot = -1;
7508 cfun_frame_layout.first_save_gpr = -1;
7509 cfun_frame_layout.first_restore_gpr = -1;
7510 cfun_frame_layout.last_save_gpr = -1;
7511 cfun_frame_layout.last_restore_gpr = -1;
7512 }
7513 else
7514 {
7515 /* Save slots for gprs from i to j. */
7516 cfun_frame_layout.first_save_gpr_slot = i;
7517 cfun_frame_layout.last_save_gpr_slot = j;
7518
7519 for (i = cfun_frame_layout.first_save_gpr_slot;
7520 i < cfun_frame_layout.last_save_gpr_slot + 1;
7521 i++)
7522 if (clobbered_regs[i])
7523 break;
7524
7525 for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
7526 if (clobbered_regs[j])
7527 break;
7528
7529 if (i == cfun_frame_layout.last_save_gpr_slot + 1)
7530 {
7531 /* Nothing to save/restore. */
7532 cfun_frame_layout.first_save_gpr = -1;
7533 cfun_frame_layout.first_restore_gpr = -1;
7534 cfun_frame_layout.last_save_gpr = -1;
7535 cfun_frame_layout.last_restore_gpr = -1;
7536 }
7537 else
7538 {
7539 /* Save / Restore from gpr i to j. */
7540 cfun_frame_layout.first_save_gpr = i;
7541 cfun_frame_layout.first_restore_gpr = i;
7542 cfun_frame_layout.last_save_gpr = j;
7543 cfun_frame_layout.last_restore_gpr = j;
7544 }
7545 }
7546
7547 if (cfun->stdarg)
7548 {
7549 /* Varargs functions need to save gprs 2 to 6. */
7550 if (cfun->va_list_gpr_size
7551 && crtl->args.info.gprs < GP_ARG_NUM_REG)
7552 {
7553 int min_gpr = crtl->args.info.gprs;
7554 int max_gpr = min_gpr + cfun->va_list_gpr_size;
7555 if (max_gpr > GP_ARG_NUM_REG)
7556 max_gpr = GP_ARG_NUM_REG;
7557
7558 if (cfun_frame_layout.first_save_gpr == -1
7559 || cfun_frame_layout.first_save_gpr > 2 + min_gpr)
7560 {
7561 cfun_frame_layout.first_save_gpr = 2 + min_gpr;
7562 cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr;
7563 }
7564
7565 if (cfun_frame_layout.last_save_gpr == -1
7566 || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1)
7567 {
7568 cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1;
7569 cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1;
7570 }
7571 }
7572
7573 /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved. */
7574 if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size
7575 && crtl->args.info.fprs < FP_ARG_NUM_REG)
7576 {
7577 int min_fpr = crtl->args.info.fprs;
7578 int max_fpr = min_fpr + cfun->va_list_fpr_size;
7579 if (max_fpr > FP_ARG_NUM_REG)
7580 max_fpr = FP_ARG_NUM_REG;
7581
7582 /* ??? This is currently required to ensure proper location
7583 of the fpr save slots within the va_list save area. */
7584 if (TARGET_PACKED_STACK)
7585 min_fpr = 0;
7586
7587 for (i = min_fpr; i < max_fpr; i++)
7588 cfun_set_fpr_bit (i);
7589 }
7590 }
7591
7592 if (!TARGET_64BIT)
7593 for (i = 2; i < 4; i++)
7594 if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16])
7595 cfun_set_fpr_bit (i);
7596 }
7597
7598 /* Fill cfun->machine with info about frame of current function. */
7599
7600 static void
7601 s390_frame_info (void)
7602 {
7603 int i;
7604
7605 cfun_frame_layout.frame_size = get_frame_size ();
7606 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7607 fatal_error ("total size of local variables exceeds architecture limit");
7608
7609 if (!TARGET_PACKED_STACK)
7610 {
7611 cfun_frame_layout.backchain_offset = 0;
7612 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7613 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7614 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7615 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7616 * UNITS_PER_LONG);
7617 }
7618 else if (TARGET_BACKCHAIN) /* kernel stack layout */
7619 {
7620 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7621 - UNITS_PER_LONG);
7622 cfun_frame_layout.gprs_offset
7623 = (cfun_frame_layout.backchain_offset
7624 - (STACK_POINTER_REGNUM - cfun_frame_layout.first_save_gpr_slot + 1)
7625 * UNITS_PER_LONG);
7626
7627 if (TARGET_64BIT)
7628 {
7629 cfun_frame_layout.f4_offset
7630 = (cfun_frame_layout.gprs_offset
7631 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7632
7633 cfun_frame_layout.f0_offset
7634 = (cfun_frame_layout.f4_offset
7635 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7636 }
7637 else
7638 {
7639 /* On 31 bit we have to care about alignment of the
7640 floating point regs to provide fastest access. */
7641 cfun_frame_layout.f0_offset
7642 = ((cfun_frame_layout.gprs_offset
7643 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
7644 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7645
7646 cfun_frame_layout.f4_offset
7647 = (cfun_frame_layout.f0_offset
7648 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7649 }
7650 }
7651 else /* no backchain */
7652 {
7653 cfun_frame_layout.f4_offset
7654 = (STACK_POINTER_OFFSET
7655 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7656
7657 cfun_frame_layout.f0_offset
7658 = (cfun_frame_layout.f4_offset
7659 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7660
7661 cfun_frame_layout.gprs_offset
7662 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7663 }
7664
7665 if (crtl->is_leaf
7666 && !TARGET_TPF_PROFILING
7667 && cfun_frame_layout.frame_size == 0
7668 && !cfun_save_high_fprs_p
7669 && !cfun->calls_alloca
7670 && !cfun->stdarg)
7671 return;
7672
7673 if (!TARGET_PACKED_STACK)
7674 cfun_frame_layout.frame_size += (STACK_POINTER_OFFSET
7675 + crtl->outgoing_args_size
7676 + cfun_frame_layout.high_fprs * 8);
7677 else
7678 {
7679 if (TARGET_BACKCHAIN)
7680 cfun_frame_layout.frame_size += UNITS_PER_LONG;
7681
7682 /* No alignment trouble here because f8-f15 are only saved under
7683 64 bit. */
7684 cfun_frame_layout.f8_offset = (MIN (MIN (cfun_frame_layout.f0_offset,
7685 cfun_frame_layout.f4_offset),
7686 cfun_frame_layout.gprs_offset)
7687 - cfun_frame_layout.high_fprs * 8);
7688
7689 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7690
7691 for (i = 0; i < 8; i++)
7692 if (cfun_fpr_bit_p (i))
7693 cfun_frame_layout.frame_size += 8;
7694
7695 cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
7696
7697 /* If under 31 bit an odd number of gprs has to be saved we have to adjust
7698 the frame size to sustain 8 byte alignment of stack frames. */
7699 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7700 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7701 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7702
7703 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7704 }
7705 }
7706
7707 /* Generate frame layout. Fills in register and frame data for the current
7708 function in cfun->machine. This routine can be called multiple times;
7709 it will re-do the complete frame layout every time. */
7710
7711 static void
7712 s390_init_frame_layout (void)
7713 {
7714 HOST_WIDE_INT frame_size;
7715 int base_used;
7716 int clobbered_regs[16];
7717
7718 /* On S/390 machines, we may need to perform branch splitting, which
7719 will require both base and return address register. We have no
7720 choice but to assume we're going to need them until right at the
7721 end of the machine dependent reorg phase. */
7722 if (!TARGET_CPU_ZARCH)
7723 cfun->machine->split_branches_pending_p = true;
7724
7725 do
7726 {
7727 frame_size = cfun_frame_layout.frame_size;
7728
7729 /* Try to predict whether we'll need the base register. */
7730 base_used = cfun->machine->split_branches_pending_p
7731 || crtl->uses_const_pool
7732 || (!DISP_IN_RANGE (frame_size)
7733 && !CONST_OK_FOR_K (frame_size));
7734
7735 /* Decide which register to use as literal pool base. In small
7736 leaf functions, try to use an unused call-clobbered register
7737 as base register to avoid save/restore overhead. */
7738 if (!base_used)
7739 cfun->machine->base_reg = NULL_RTX;
7740 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7741 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7742 else
7743 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7744
7745 s390_register_info (clobbered_regs);
7746 s390_frame_info ();
7747 }
7748 while (frame_size != cfun_frame_layout.frame_size);
7749 }
7750
7751 /* Update frame layout. Recompute actual register save data based on
7752 current info and update regs_ever_live for the special registers.
7753 May be called multiple times, but may never cause *more* registers
7754 to be saved than s390_init_frame_layout allocated room for. */
7755
7756 static void
7757 s390_update_frame_layout (void)
7758 {
7759 int clobbered_regs[16];
7760
7761 s390_register_info (clobbered_regs);
7762
7763 df_set_regs_ever_live (BASE_REGNUM,
7764 clobbered_regs[BASE_REGNUM] ? true : false);
7765 df_set_regs_ever_live (RETURN_REGNUM,
7766 clobbered_regs[RETURN_REGNUM] ? true : false);
7767 df_set_regs_ever_live (STACK_POINTER_REGNUM,
7768 clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
7769
7770 if (cfun->machine->base_reg)
7771 df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true);
7772 }
7773
7774 /* Return true if it is legal to put a value with MODE into REGNO. */
7775
7776 bool
7777 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
7778 {
7779 switch (REGNO_REG_CLASS (regno))
7780 {
7781 case FP_REGS:
7782 if (REGNO_PAIR_OK (regno, mode))
7783 {
7784 if (mode == SImode || mode == DImode)
7785 return true;
7786
7787 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
7788 return true;
7789 }
7790 break;
7791 case ADDR_REGS:
7792 if (FRAME_REGNO_P (regno) && mode == Pmode)
7793 return true;
7794
7795 /* fallthrough */
7796 case GENERAL_REGS:
7797 if (REGNO_PAIR_OK (regno, mode))
7798 {
7799 if (TARGET_ZARCH
7800 || (mode != TFmode && mode != TCmode && mode != TDmode))
7801 return true;
7802 }
7803 break;
7804 case CC_REGS:
7805 if (GET_MODE_CLASS (mode) == MODE_CC)
7806 return true;
7807 break;
7808 case ACCESS_REGS:
7809 if (REGNO_PAIR_OK (regno, mode))
7810 {
7811 if (mode == SImode || mode == Pmode)
7812 return true;
7813 }
7814 break;
7815 default:
7816 return false;
7817 }
7818
7819 return false;
7820 }
7821
7822 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
7823
7824 bool
7825 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
7826 {
7827 /* Once we've decided upon a register to use as base register, it must
7828 no longer be used for any other purpose. */
7829 if (cfun->machine->base_reg)
7830 if (REGNO (cfun->machine->base_reg) == old_reg
7831 || REGNO (cfun->machine->base_reg) == new_reg)
7832 return false;
7833
7834 return true;
7835 }
7836
7837 /* Maximum number of registers to represent a value of mode MODE
7838 in a register of class RCLASS. */
7839
7840 int
7841 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
7842 {
7843 switch (rclass)
7844 {
7845 case FP_REGS:
7846 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7847 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
7848 else
7849 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
7850 case ACCESS_REGS:
7851 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
7852 default:
7853 break;
7854 }
7855 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7856 }
7857
7858 /* Return true if register FROM can be eliminated via register TO. */
7859
7860 static bool
7861 s390_can_eliminate (const int from, const int to)
7862 {
7863 /* On zSeries machines, we have not marked the base register as fixed.
7864 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
7865 If a function requires the base register, we say here that this
7866 elimination cannot be performed. This will cause reload to free
7867 up the base register (as if it were fixed). On the other hand,
7868 if the current function does *not* require the base register, we
7869 say here the elimination succeeds, which in turn allows reload
7870 to allocate the base register for any other purpose. */
7871 if (from == BASE_REGNUM && to == BASE_REGNUM)
7872 {
7873 if (TARGET_CPU_ZARCH)
7874 {
7875 s390_init_frame_layout ();
7876 return cfun->machine->base_reg == NULL_RTX;
7877 }
7878
7879 return false;
7880 }
7881
7882 /* Everything else must point into the stack frame. */
7883 gcc_assert (to == STACK_POINTER_REGNUM
7884 || to == HARD_FRAME_POINTER_REGNUM);
7885
7886 gcc_assert (from == FRAME_POINTER_REGNUM
7887 || from == ARG_POINTER_REGNUM
7888 || from == RETURN_ADDRESS_POINTER_REGNUM);
7889
7890 /* Make sure we actually saved the return address. */
7891 if (from == RETURN_ADDRESS_POINTER_REGNUM)
7892 if (!crtl->calls_eh_return
7893 && !cfun->stdarg
7894 && !cfun_frame_layout.save_return_addr_p)
7895 return false;
7896
7897 return true;
7898 }
7899
7900 /* Return offset between register FROM and TO initially after prolog. */
7901
7902 HOST_WIDE_INT
7903 s390_initial_elimination_offset (int from, int to)
7904 {
7905 HOST_WIDE_INT offset;
7906 int index;
7907
7908 /* ??? Why are we called for non-eliminable pairs? */
7909 if (!s390_can_eliminate (from, to))
7910 return 0;
7911
7912 switch (from)
7913 {
7914 case FRAME_POINTER_REGNUM:
7915 offset = (get_frame_size()
7916 + STACK_POINTER_OFFSET
7917 + crtl->outgoing_args_size);
7918 break;
7919
7920 case ARG_POINTER_REGNUM:
7921 s390_init_frame_layout ();
7922 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
7923 break;
7924
7925 case RETURN_ADDRESS_POINTER_REGNUM:
7926 s390_init_frame_layout ();
7927 index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot;
7928 gcc_assert (index >= 0);
7929 offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset;
7930 offset += index * UNITS_PER_LONG;
7931 break;
7932
7933 case BASE_REGNUM:
7934 offset = 0;
7935 break;
7936
7937 default:
7938 gcc_unreachable ();
7939 }
7940
7941 return offset;
7942 }
7943
7944 /* Emit insn to save fpr REGNUM at offset OFFSET relative
7945 to register BASE. Return generated insn. */
7946
7947 static rtx
7948 save_fpr (rtx base, int offset, int regnum)
7949 {
7950 rtx addr;
7951 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7952
7953 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
7954 set_mem_alias_set (addr, get_varargs_alias_set ());
7955 else
7956 set_mem_alias_set (addr, get_frame_alias_set ());
7957
7958 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
7959 }
7960
7961 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
7962 to register BASE. Return generated insn. */
7963
7964 static rtx
7965 restore_fpr (rtx base, int offset, int regnum)
7966 {
7967 rtx addr;
7968 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7969 set_mem_alias_set (addr, get_frame_alias_set ());
7970
7971 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
7972 }
7973
7974 /* Return true if REGNO is a global register, but not one
7975 of the special ones that need to be saved/restored in anyway. */
7976
7977 static inline bool
7978 global_not_special_regno_p (int regno)
7979 {
7980 return (global_regs[regno]
7981 /* These registers are special and need to be
7982 restored in any case. */
7983 && !(regno == STACK_POINTER_REGNUM
7984 || regno == RETURN_REGNUM
7985 || regno == BASE_REGNUM
7986 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
7987 }
7988
7989 /* Generate insn to save registers FIRST to LAST into
7990 the register save area located at offset OFFSET
7991 relative to register BASE. */
7992
7993 static rtx
7994 save_gprs (rtx base, int offset, int first, int last)
7995 {
7996 rtx addr, insn, note;
7997 int i;
7998
7999 addr = plus_constant (Pmode, base, offset);
8000 addr = gen_rtx_MEM (Pmode, addr);
8001
8002 set_mem_alias_set (addr, get_frame_alias_set ());
8003
8004 /* Special-case single register. */
8005 if (first == last)
8006 {
8007 if (TARGET_64BIT)
8008 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8009 else
8010 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8011
8012 if (!global_not_special_regno_p (first))
8013 RTX_FRAME_RELATED_P (insn) = 1;
8014 return insn;
8015 }
8016
8017
8018 insn = gen_store_multiple (addr,
8019 gen_rtx_REG (Pmode, first),
8020 GEN_INT (last - first + 1));
8021
8022 if (first <= 6 && cfun->stdarg)
8023 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8024 {
8025 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8026
8027 if (first + i <= 6)
8028 set_mem_alias_set (mem, get_varargs_alias_set ());
8029 }
8030
8031 /* We need to set the FRAME_RELATED flag on all SETs
8032 inside the store-multiple pattern.
8033
8034 However, we must not emit DWARF records for registers 2..5
8035 if they are stored for use by variable arguments ...
8036
8037 ??? Unfortunately, it is not enough to simply not the
8038 FRAME_RELATED flags for those SETs, because the first SET
8039 of the PARALLEL is always treated as if it had the flag
8040 set, even if it does not. Therefore we emit a new pattern
8041 without those registers as REG_FRAME_RELATED_EXPR note. */
8042
8043 if (first >= 6 && !global_not_special_regno_p (first))
8044 {
8045 rtx pat = PATTERN (insn);
8046
8047 for (i = 0; i < XVECLEN (pat, 0); i++)
8048 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8049 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8050 0, i)))))
8051 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8052
8053 RTX_FRAME_RELATED_P (insn) = 1;
8054 }
8055 else if (last >= 6)
8056 {
8057 int start;
8058
8059 for (start = first >= 6 ? first : 6; start <= last; start++)
8060 if (!global_not_special_regno_p (start))
8061 break;
8062
8063 if (start > last)
8064 return insn;
8065
8066 addr = plus_constant (Pmode, base,
8067 offset + (start - first) * UNITS_PER_LONG);
8068 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8069 gen_rtx_REG (Pmode, start),
8070 GEN_INT (last - start + 1));
8071 note = PATTERN (note);
8072
8073 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8074
8075 for (i = 0; i < XVECLEN (note, 0); i++)
8076 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8077 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8078 0, i)))))
8079 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8080
8081 RTX_FRAME_RELATED_P (insn) = 1;
8082 }
8083
8084 return insn;
8085 }
8086
8087 /* Generate insn to restore registers FIRST to LAST from
8088 the register save area located at offset OFFSET
8089 relative to register BASE. */
8090
8091 static rtx
8092 restore_gprs (rtx base, int offset, int first, int last)
8093 {
8094 rtx addr, insn;
8095
8096 addr = plus_constant (Pmode, base, offset);
8097 addr = gen_rtx_MEM (Pmode, addr);
8098 set_mem_alias_set (addr, get_frame_alias_set ());
8099
8100 /* Special-case single register. */
8101 if (first == last)
8102 {
8103 if (TARGET_64BIT)
8104 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8105 else
8106 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8107
8108 return insn;
8109 }
8110
8111 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8112 addr,
8113 GEN_INT (last - first + 1));
8114 return insn;
8115 }
8116
8117 /* Return insn sequence to load the GOT register. */
8118
8119 static GTY(()) rtx got_symbol;
8120 rtx
8121 s390_load_got (void)
8122 {
8123 rtx insns;
8124
8125 /* We cannot use pic_offset_table_rtx here since we use this
8126 function also for non-pic if __tls_get_offset is called and in
8127 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8128 aren't usable. */
8129 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8130
8131 if (!got_symbol)
8132 {
8133 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8134 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8135 }
8136
8137 start_sequence ();
8138
8139 if (TARGET_CPU_ZARCH)
8140 {
8141 emit_move_insn (got_rtx, got_symbol);
8142 }
8143 else
8144 {
8145 rtx offset;
8146
8147 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8148 UNSPEC_LTREL_OFFSET);
8149 offset = gen_rtx_CONST (Pmode, offset);
8150 offset = force_const_mem (Pmode, offset);
8151
8152 emit_move_insn (got_rtx, offset);
8153
8154 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8155 UNSPEC_LTREL_BASE);
8156 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8157
8158 emit_move_insn (got_rtx, offset);
8159 }
8160
8161 insns = get_insns ();
8162 end_sequence ();
8163 return insns;
8164 }
8165
8166 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8167 and the change to the stack pointer. */
8168
8169 static void
8170 s390_emit_stack_tie (void)
8171 {
8172 rtx mem = gen_frame_mem (BLKmode,
8173 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8174
8175 emit_insn (gen_stack_tie (mem));
8176 }
8177
8178 /* Expand the prologue into a bunch of separate insns. */
8179
8180 void
8181 s390_emit_prologue (void)
8182 {
8183 rtx insn, addr;
8184 rtx temp_reg;
8185 int i;
8186 int offset;
8187 int next_fpr = 0;
8188
8189 /* Complete frame layout. */
8190
8191 s390_update_frame_layout ();
8192
8193 /* Annotate all constant pool references to let the scheduler know
8194 they implicitly use the base register. */
8195
8196 push_topmost_sequence ();
8197
8198 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8199 if (INSN_P (insn))
8200 {
8201 annotate_constant_pool_refs (&PATTERN (insn));
8202 df_insn_rescan (insn);
8203 }
8204
8205 pop_topmost_sequence ();
8206
8207 /* Choose best register to use for temp use within prologue.
8208 See below for why TPF must use the register 1. */
8209
8210 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8211 && !crtl->is_leaf
8212 && !TARGET_TPF_PROFILING)
8213 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8214 else
8215 temp_reg = gen_rtx_REG (Pmode, 1);
8216
8217 /* Save call saved gprs. */
8218 if (cfun_frame_layout.first_save_gpr != -1)
8219 {
8220 insn = save_gprs (stack_pointer_rtx,
8221 cfun_frame_layout.gprs_offset +
8222 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8223 - cfun_frame_layout.first_save_gpr_slot),
8224 cfun_frame_layout.first_save_gpr,
8225 cfun_frame_layout.last_save_gpr);
8226 emit_insn (insn);
8227 }
8228
8229 /* Dummy insn to mark literal pool slot. */
8230
8231 if (cfun->machine->base_reg)
8232 emit_insn (gen_main_pool (cfun->machine->base_reg));
8233
8234 offset = cfun_frame_layout.f0_offset;
8235
8236 /* Save f0 and f2. */
8237 for (i = 0; i < 2; i++)
8238 {
8239 if (cfun_fpr_bit_p (i))
8240 {
8241 save_fpr (stack_pointer_rtx, offset, i + 16);
8242 offset += 8;
8243 }
8244 else if (!TARGET_PACKED_STACK)
8245 offset += 8;
8246 }
8247
8248 /* Save f4 and f6. */
8249 offset = cfun_frame_layout.f4_offset;
8250 for (i = 2; i < 4; i++)
8251 {
8252 if (cfun_fpr_bit_p (i))
8253 {
8254 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8255 offset += 8;
8256
8257 /* If f4 and f6 are call clobbered they are saved due to stdargs and
8258 therefore are not frame related. */
8259 if (!call_really_used_regs[i + 16])
8260 RTX_FRAME_RELATED_P (insn) = 1;
8261 }
8262 else if (!TARGET_PACKED_STACK)
8263 offset += 8;
8264 }
8265
8266 if (TARGET_PACKED_STACK
8267 && cfun_save_high_fprs_p
8268 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8269 {
8270 offset = (cfun_frame_layout.f8_offset
8271 + (cfun_frame_layout.high_fprs - 1) * 8);
8272
8273 for (i = 15; i > 7 && offset >= 0; i--)
8274 if (cfun_fpr_bit_p (i))
8275 {
8276 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8277
8278 RTX_FRAME_RELATED_P (insn) = 1;
8279 offset -= 8;
8280 }
8281 if (offset >= cfun_frame_layout.f8_offset)
8282 next_fpr = i + 16;
8283 }
8284
8285 if (!TARGET_PACKED_STACK)
8286 next_fpr = cfun_save_high_fprs_p ? 31 : 0;
8287
8288 if (flag_stack_usage_info)
8289 current_function_static_stack_size = cfun_frame_layout.frame_size;
8290
8291 /* Decrement stack pointer. */
8292
8293 if (cfun_frame_layout.frame_size > 0)
8294 {
8295 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8296 rtx real_frame_off;
8297
8298 if (s390_stack_size)
8299 {
8300 HOST_WIDE_INT stack_guard;
8301
8302 if (s390_stack_guard)
8303 stack_guard = s390_stack_guard;
8304 else
8305 {
8306 /* If no value for stack guard is provided the smallest power of 2
8307 larger than the current frame size is chosen. */
8308 stack_guard = 1;
8309 while (stack_guard < cfun_frame_layout.frame_size)
8310 stack_guard <<= 1;
8311 }
8312
8313 if (cfun_frame_layout.frame_size >= s390_stack_size)
8314 {
8315 warning (0, "frame size of function %qs is %wd"
8316 " bytes exceeding user provided stack limit of "
8317 "%d bytes. "
8318 "An unconditional trap is added.",
8319 current_function_name(), cfun_frame_layout.frame_size,
8320 s390_stack_size);
8321 emit_insn (gen_trap ());
8322 }
8323 else
8324 {
8325 /* stack_guard has to be smaller than s390_stack_size.
8326 Otherwise we would emit an AND with zero which would
8327 not match the test under mask pattern. */
8328 if (stack_guard >= s390_stack_size)
8329 {
8330 warning (0, "frame size of function %qs is %wd"
8331 " bytes which is more than half the stack size. "
8332 "The dynamic check would not be reliable. "
8333 "No check emitted for this function.",
8334 current_function_name(),
8335 cfun_frame_layout.frame_size);
8336 }
8337 else
8338 {
8339 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8340 & ~(stack_guard - 1));
8341
8342 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8343 GEN_INT (stack_check_mask));
8344 if (TARGET_64BIT)
8345 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8346 t, const0_rtx),
8347 t, const0_rtx, const0_rtx));
8348 else
8349 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8350 t, const0_rtx),
8351 t, const0_rtx, const0_rtx));
8352 }
8353 }
8354 }
8355
8356 if (s390_warn_framesize > 0
8357 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8358 warning (0, "frame size of %qs is %wd bytes",
8359 current_function_name (), cfun_frame_layout.frame_size);
8360
8361 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8362 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8363
8364 /* Save incoming stack pointer into temp reg. */
8365 if (TARGET_BACKCHAIN || next_fpr)
8366 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8367
8368 /* Subtract frame size from stack pointer. */
8369
8370 if (DISP_IN_RANGE (INTVAL (frame_off)))
8371 {
8372 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8373 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8374 frame_off));
8375 insn = emit_insn (insn);
8376 }
8377 else
8378 {
8379 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8380 frame_off = force_const_mem (Pmode, frame_off);
8381
8382 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8383 annotate_constant_pool_refs (&PATTERN (insn));
8384 }
8385
8386 RTX_FRAME_RELATED_P (insn) = 1;
8387 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8388 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8389 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8390 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8391 real_frame_off)));
8392
8393 /* Set backchain. */
8394
8395 if (TARGET_BACKCHAIN)
8396 {
8397 if (cfun_frame_layout.backchain_offset)
8398 addr = gen_rtx_MEM (Pmode,
8399 plus_constant (Pmode, stack_pointer_rtx,
8400 cfun_frame_layout.backchain_offset));
8401 else
8402 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8403 set_mem_alias_set (addr, get_frame_alias_set ());
8404 insn = emit_insn (gen_move_insn (addr, temp_reg));
8405 }
8406
8407 /* If we support non-call exceptions (e.g. for Java),
8408 we need to make sure the backchain pointer is set up
8409 before any possibly trapping memory access. */
8410 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8411 {
8412 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8413 emit_clobber (addr);
8414 }
8415 }
8416
8417 /* Save fprs 8 - 15 (64 bit ABI). */
8418
8419 if (cfun_save_high_fprs_p && next_fpr)
8420 {
8421 /* If the stack might be accessed through a different register
8422 we have to make sure that the stack pointer decrement is not
8423 moved below the use of the stack slots. */
8424 s390_emit_stack_tie ();
8425
8426 insn = emit_insn (gen_add2_insn (temp_reg,
8427 GEN_INT (cfun_frame_layout.f8_offset)));
8428
8429 offset = 0;
8430
8431 for (i = 24; i <= next_fpr; i++)
8432 if (cfun_fpr_bit_p (i - 16))
8433 {
8434 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8435 cfun_frame_layout.frame_size
8436 + cfun_frame_layout.f8_offset
8437 + offset);
8438
8439 insn = save_fpr (temp_reg, offset, i);
8440 offset += 8;
8441 RTX_FRAME_RELATED_P (insn) = 1;
8442 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8443 gen_rtx_SET (VOIDmode,
8444 gen_rtx_MEM (DFmode, addr),
8445 gen_rtx_REG (DFmode, i)));
8446 }
8447 }
8448
8449 /* Set frame pointer, if needed. */
8450
8451 if (frame_pointer_needed)
8452 {
8453 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8454 RTX_FRAME_RELATED_P (insn) = 1;
8455 }
8456
8457 /* Set up got pointer, if needed. */
8458
8459 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8460 {
8461 rtx insns = s390_load_got ();
8462
8463 for (insn = insns; insn; insn = NEXT_INSN (insn))
8464 annotate_constant_pool_refs (&PATTERN (insn));
8465
8466 emit_insn (insns);
8467 }
8468
8469 if (TARGET_TPF_PROFILING)
8470 {
8471 /* Generate a BAS instruction to serve as a function
8472 entry intercept to facilitate the use of tracing
8473 algorithms located at the branch target. */
8474 emit_insn (gen_prologue_tpf ());
8475
8476 /* Emit a blockage here so that all code
8477 lies between the profiling mechanisms. */
8478 emit_insn (gen_blockage ());
8479 }
8480 }
8481
8482 /* Expand the epilogue into a bunch of separate insns. */
8483
8484 void
8485 s390_emit_epilogue (bool sibcall)
8486 {
8487 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8488 int area_bottom, area_top, offset = 0;
8489 int next_offset;
8490 rtvec p;
8491 int i;
8492
8493 if (TARGET_TPF_PROFILING)
8494 {
8495
8496 /* Generate a BAS instruction to serve as a function
8497 entry intercept to facilitate the use of tracing
8498 algorithms located at the branch target. */
8499
8500 /* Emit a blockage here so that all code
8501 lies between the profiling mechanisms. */
8502 emit_insn (gen_blockage ());
8503
8504 emit_insn (gen_epilogue_tpf ());
8505 }
8506
8507 /* Check whether to use frame or stack pointer for restore. */
8508
8509 frame_pointer = (frame_pointer_needed
8510 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8511
8512 s390_frame_area (&area_bottom, &area_top);
8513
8514 /* Check whether we can access the register save area.
8515 If not, increment the frame pointer as required. */
8516
8517 if (area_top <= area_bottom)
8518 {
8519 /* Nothing to restore. */
8520 }
8521 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8522 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8523 {
8524 /* Area is in range. */
8525 offset = cfun_frame_layout.frame_size;
8526 }
8527 else
8528 {
8529 rtx insn, frame_off, cfa;
8530
8531 offset = area_bottom < 0 ? -area_bottom : 0;
8532 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8533
8534 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8535 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8536 if (DISP_IN_RANGE (INTVAL (frame_off)))
8537 {
8538 insn = gen_rtx_SET (VOIDmode, frame_pointer,
8539 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8540 insn = emit_insn (insn);
8541 }
8542 else
8543 {
8544 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8545 frame_off = force_const_mem (Pmode, frame_off);
8546
8547 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
8548 annotate_constant_pool_refs (&PATTERN (insn));
8549 }
8550 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
8551 RTX_FRAME_RELATED_P (insn) = 1;
8552 }
8553
8554 /* Restore call saved fprs. */
8555
8556 if (TARGET_64BIT)
8557 {
8558 if (cfun_save_high_fprs_p)
8559 {
8560 next_offset = cfun_frame_layout.f8_offset;
8561 for (i = 24; i < 32; i++)
8562 {
8563 if (cfun_fpr_bit_p (i - 16))
8564 {
8565 restore_fpr (frame_pointer,
8566 offset + next_offset, i);
8567 cfa_restores
8568 = alloc_reg_note (REG_CFA_RESTORE,
8569 gen_rtx_REG (DFmode, i), cfa_restores);
8570 next_offset += 8;
8571 }
8572 }
8573 }
8574
8575 }
8576 else
8577 {
8578 next_offset = cfun_frame_layout.f4_offset;
8579 for (i = 18; i < 20; i++)
8580 {
8581 if (cfun_fpr_bit_p (i - 16))
8582 {
8583 restore_fpr (frame_pointer,
8584 offset + next_offset, i);
8585 cfa_restores
8586 = alloc_reg_note (REG_CFA_RESTORE,
8587 gen_rtx_REG (DFmode, i), cfa_restores);
8588 next_offset += 8;
8589 }
8590 else if (!TARGET_PACKED_STACK)
8591 next_offset += 8;
8592 }
8593
8594 }
8595
8596 /* Return register. */
8597
8598 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8599
8600 /* Restore call saved gprs. */
8601
8602 if (cfun_frame_layout.first_restore_gpr != -1)
8603 {
8604 rtx insn, addr;
8605 int i;
8606
8607 /* Check for global register and save them
8608 to stack location from where they get restored. */
8609
8610 for (i = cfun_frame_layout.first_restore_gpr;
8611 i <= cfun_frame_layout.last_restore_gpr;
8612 i++)
8613 {
8614 if (global_not_special_regno_p (i))
8615 {
8616 addr = plus_constant (Pmode, frame_pointer,
8617 offset + cfun_frame_layout.gprs_offset
8618 + (i - cfun_frame_layout.first_save_gpr_slot)
8619 * UNITS_PER_LONG);
8620 addr = gen_rtx_MEM (Pmode, addr);
8621 set_mem_alias_set (addr, get_frame_alias_set ());
8622 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
8623 }
8624 else
8625 cfa_restores
8626 = alloc_reg_note (REG_CFA_RESTORE,
8627 gen_rtx_REG (Pmode, i), cfa_restores);
8628 }
8629
8630 if (! sibcall)
8631 {
8632 /* Fetch return address from stack before load multiple,
8633 this will do good for scheduling. */
8634
8635 if (cfun_frame_layout.save_return_addr_p
8636 || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
8637 && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
8638 {
8639 int return_regnum = find_unused_clobbered_reg();
8640 if (!return_regnum)
8641 return_regnum = 4;
8642 return_reg = gen_rtx_REG (Pmode, return_regnum);
8643
8644 addr = plus_constant (Pmode, frame_pointer,
8645 offset + cfun_frame_layout.gprs_offset
8646 + (RETURN_REGNUM
8647 - cfun_frame_layout.first_save_gpr_slot)
8648 * UNITS_PER_LONG);
8649 addr = gen_rtx_MEM (Pmode, addr);
8650 set_mem_alias_set (addr, get_frame_alias_set ());
8651 emit_move_insn (return_reg, addr);
8652 }
8653 }
8654
8655 insn = restore_gprs (frame_pointer,
8656 offset + cfun_frame_layout.gprs_offset
8657 + (cfun_frame_layout.first_restore_gpr
8658 - cfun_frame_layout.first_save_gpr_slot)
8659 * UNITS_PER_LONG,
8660 cfun_frame_layout.first_restore_gpr,
8661 cfun_frame_layout.last_restore_gpr);
8662 insn = emit_insn (insn);
8663 REG_NOTES (insn) = cfa_restores;
8664 add_reg_note (insn, REG_CFA_DEF_CFA,
8665 plus_constant (Pmode, stack_pointer_rtx,
8666 STACK_POINTER_OFFSET));
8667 RTX_FRAME_RELATED_P (insn) = 1;
8668 }
8669
8670 if (! sibcall)
8671 {
8672
8673 /* Return to caller. */
8674
8675 p = rtvec_alloc (2);
8676
8677 RTVEC_ELT (p, 0) = ret_rtx;
8678 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
8679 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
8680 }
8681 }
8682
8683
8684 /* Return the size in bytes of a function argument of
8685 type TYPE and/or mode MODE. At least one of TYPE or
8686 MODE must be specified. */
8687
8688 static int
8689 s390_function_arg_size (enum machine_mode mode, const_tree type)
8690 {
8691 if (type)
8692 return int_size_in_bytes (type);
8693
8694 /* No type info available for some library calls ... */
8695 if (mode != BLKmode)
8696 return GET_MODE_SIZE (mode);
8697
8698 /* If we have neither type nor mode, abort */
8699 gcc_unreachable ();
8700 }
8701
8702 /* Return true if a function argument of type TYPE and mode MODE
8703 is to be passed in a floating-point register, if available. */
8704
8705 static bool
8706 s390_function_arg_float (enum machine_mode mode, const_tree type)
8707 {
8708 int size = s390_function_arg_size (mode, type);
8709 if (size > 8)
8710 return false;
8711
8712 /* Soft-float changes the ABI: no floating-point registers are used. */
8713 if (TARGET_SOFT_FLOAT)
8714 return false;
8715
8716 /* No type info available for some library calls ... */
8717 if (!type)
8718 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
8719
8720 /* The ABI says that record types with a single member are treated
8721 just like that member would be. */
8722 while (TREE_CODE (type) == RECORD_TYPE)
8723 {
8724 tree field, single = NULL_TREE;
8725
8726 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8727 {
8728 if (TREE_CODE (field) != FIELD_DECL)
8729 continue;
8730
8731 if (single == NULL_TREE)
8732 single = TREE_TYPE (field);
8733 else
8734 return false;
8735 }
8736
8737 if (single == NULL_TREE)
8738 return false;
8739 else
8740 type = single;
8741 }
8742
8743 return TREE_CODE (type) == REAL_TYPE;
8744 }
8745
8746 /* Return true if a function argument of type TYPE and mode MODE
8747 is to be passed in an integer register, or a pair of integer
8748 registers, if available. */
8749
8750 static bool
8751 s390_function_arg_integer (enum machine_mode mode, const_tree type)
8752 {
8753 int size = s390_function_arg_size (mode, type);
8754 if (size > 8)
8755 return false;
8756
8757 /* No type info available for some library calls ... */
8758 if (!type)
8759 return GET_MODE_CLASS (mode) == MODE_INT
8760 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
8761
8762 /* We accept small integral (and similar) types. */
8763 if (INTEGRAL_TYPE_P (type)
8764 || POINTER_TYPE_P (type)
8765 || TREE_CODE (type) == NULLPTR_TYPE
8766 || TREE_CODE (type) == OFFSET_TYPE
8767 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
8768 return true;
8769
8770 /* We also accept structs of size 1, 2, 4, 8 that are not
8771 passed in floating-point registers. */
8772 if (AGGREGATE_TYPE_P (type)
8773 && exact_log2 (size) >= 0
8774 && !s390_function_arg_float (mode, type))
8775 return true;
8776
8777 return false;
8778 }
8779
8780 /* Return 1 if a function argument of type TYPE and mode MODE
8781 is to be passed by reference. The ABI specifies that only
8782 structures of size 1, 2, 4, or 8 bytes are passed by value,
8783 all other structures (and complex numbers) are passed by
8784 reference. */
8785
8786 static bool
8787 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
8788 enum machine_mode mode, const_tree type,
8789 bool named ATTRIBUTE_UNUSED)
8790 {
8791 int size = s390_function_arg_size (mode, type);
8792 if (size > 8)
8793 return true;
8794
8795 if (type)
8796 {
8797 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
8798 return 1;
8799
8800 if (TREE_CODE (type) == COMPLEX_TYPE
8801 || TREE_CODE (type) == VECTOR_TYPE)
8802 return 1;
8803 }
8804
8805 return 0;
8806 }
8807
8808 /* Update the data in CUM to advance over an argument of mode MODE and
8809 data type TYPE. (TYPE is null for libcalls where that information
8810 may not be available.). The boolean NAMED specifies whether the
8811 argument is a named argument (as opposed to an unnamed argument
8812 matching an ellipsis). */
8813
8814 static void
8815 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
8816 const_tree type, bool named ATTRIBUTE_UNUSED)
8817 {
8818 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8819
8820 if (s390_function_arg_float (mode, type))
8821 {
8822 cum->fprs += 1;
8823 }
8824 else if (s390_function_arg_integer (mode, type))
8825 {
8826 int size = s390_function_arg_size (mode, type);
8827 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
8828 }
8829 else
8830 gcc_unreachable ();
8831 }
8832
8833 /* Define where to put the arguments to a function.
8834 Value is zero to push the argument on the stack,
8835 or a hard register in which to store the argument.
8836
8837 MODE is the argument's machine mode.
8838 TYPE is the data type of the argument (as a tree).
8839 This is null for libcalls where that information may
8840 not be available.
8841 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8842 the preceding args and about the function being called.
8843 NAMED is nonzero if this argument is a named parameter
8844 (otherwise it is an extra parameter matching an ellipsis).
8845
8846 On S/390, we use general purpose registers 2 through 6 to
8847 pass integer, pointer, and certain structure arguments, and
8848 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
8849 to pass floating point arguments. All remaining arguments
8850 are pushed to the stack. */
8851
8852 static rtx
8853 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
8854 const_tree type, bool named ATTRIBUTE_UNUSED)
8855 {
8856 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8857
8858 if (s390_function_arg_float (mode, type))
8859 {
8860 if (cum->fprs + 1 > FP_ARG_NUM_REG)
8861 return 0;
8862 else
8863 return gen_rtx_REG (mode, cum->fprs + 16);
8864 }
8865 else if (s390_function_arg_integer (mode, type))
8866 {
8867 int size = s390_function_arg_size (mode, type);
8868 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
8869
8870 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
8871 return 0;
8872 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
8873 return gen_rtx_REG (mode, cum->gprs + 2);
8874 else if (n_gprs == 2)
8875 {
8876 rtvec p = rtvec_alloc (2);
8877
8878 RTVEC_ELT (p, 0)
8879 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
8880 const0_rtx);
8881 RTVEC_ELT (p, 1)
8882 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
8883 GEN_INT (4));
8884
8885 return gen_rtx_PARALLEL (mode, p);
8886 }
8887 }
8888
8889 /* After the real arguments, expand_call calls us once again
8890 with a void_type_node type. Whatever we return here is
8891 passed as operand 2 to the call expanders.
8892
8893 We don't need this feature ... */
8894 else if (type == void_type_node)
8895 return const0_rtx;
8896
8897 gcc_unreachable ();
8898 }
8899
8900 /* Return true if return values of type TYPE should be returned
8901 in a memory buffer whose address is passed by the caller as
8902 hidden first argument. */
8903
8904 static bool
8905 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
8906 {
8907 /* We accept small integral (and similar) types. */
8908 if (INTEGRAL_TYPE_P (type)
8909 || POINTER_TYPE_P (type)
8910 || TREE_CODE (type) == OFFSET_TYPE
8911 || TREE_CODE (type) == REAL_TYPE)
8912 return int_size_in_bytes (type) > 8;
8913
8914 /* Aggregates and similar constructs are always returned
8915 in memory. */
8916 if (AGGREGATE_TYPE_P (type)
8917 || TREE_CODE (type) == COMPLEX_TYPE
8918 || TREE_CODE (type) == VECTOR_TYPE)
8919 return true;
8920
8921 /* ??? We get called on all sorts of random stuff from
8922 aggregate_value_p. We can't abort, but it's not clear
8923 what's safe to return. Pretend it's a struct I guess. */
8924 return true;
8925 }
8926
8927 /* Function arguments and return values are promoted to word size. */
8928
8929 static enum machine_mode
8930 s390_promote_function_mode (const_tree type, enum machine_mode mode,
8931 int *punsignedp,
8932 const_tree fntype ATTRIBUTE_UNUSED,
8933 int for_return ATTRIBUTE_UNUSED)
8934 {
8935 if (INTEGRAL_MODE_P (mode)
8936 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
8937 {
8938 if (type != NULL_TREE && POINTER_TYPE_P (type))
8939 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8940 return Pmode;
8941 }
8942
8943 return mode;
8944 }
8945
8946 /* Define where to return a (scalar) value of type RET_TYPE.
8947 If RET_TYPE is null, define where to return a (scalar)
8948 value of mode MODE from a libcall. */
8949
8950 static rtx
8951 s390_function_and_libcall_value (enum machine_mode mode,
8952 const_tree ret_type,
8953 const_tree fntype_or_decl,
8954 bool outgoing ATTRIBUTE_UNUSED)
8955 {
8956 /* For normal functions perform the promotion as
8957 promote_function_mode would do. */
8958 if (ret_type)
8959 {
8960 int unsignedp = TYPE_UNSIGNED (ret_type);
8961 mode = promote_function_mode (ret_type, mode, &unsignedp,
8962 fntype_or_decl, 1);
8963 }
8964
8965 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
8966 gcc_assert (GET_MODE_SIZE (mode) <= 8);
8967
8968 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
8969 return gen_rtx_REG (mode, 16);
8970 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
8971 || UNITS_PER_LONG == UNITS_PER_WORD)
8972 return gen_rtx_REG (mode, 2);
8973 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
8974 {
8975 /* This case is triggered when returning a 64 bit value with
8976 -m31 -mzarch. Although the value would fit into a single
8977 register it has to be forced into a 32 bit register pair in
8978 order to match the ABI. */
8979 rtvec p = rtvec_alloc (2);
8980
8981 RTVEC_ELT (p, 0)
8982 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
8983 RTVEC_ELT (p, 1)
8984 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
8985
8986 return gen_rtx_PARALLEL (mode, p);
8987 }
8988
8989 gcc_unreachable ();
8990 }
8991
8992 /* Define where to return a scalar return value of type RET_TYPE. */
8993
8994 static rtx
8995 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
8996 bool outgoing)
8997 {
8998 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
8999 fn_decl_or_type, outgoing);
9000 }
9001
9002 /* Define where to return a scalar libcall return value of mode
9003 MODE. */
9004
9005 static rtx
9006 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9007 {
9008 return s390_function_and_libcall_value (mode, NULL_TREE,
9009 NULL_TREE, true);
9010 }
9011
9012
9013 /* Create and return the va_list datatype.
9014
9015 On S/390, va_list is an array type equivalent to
9016
9017 typedef struct __va_list_tag
9018 {
9019 long __gpr;
9020 long __fpr;
9021 void *__overflow_arg_area;
9022 void *__reg_save_area;
9023 } va_list[1];
9024
9025 where __gpr and __fpr hold the number of general purpose
9026 or floating point arguments used up to now, respectively,
9027 __overflow_arg_area points to the stack location of the
9028 next argument passed on the stack, and __reg_save_area
9029 always points to the start of the register area in the
9030 call frame of the current function. The function prologue
9031 saves all registers used for argument passing into this
9032 area if the function uses variable arguments. */
9033
9034 static tree
9035 s390_build_builtin_va_list (void)
9036 {
9037 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9038
9039 record = lang_hooks.types.make_type (RECORD_TYPE);
9040
9041 type_decl =
9042 build_decl (BUILTINS_LOCATION,
9043 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9044
9045 f_gpr = build_decl (BUILTINS_LOCATION,
9046 FIELD_DECL, get_identifier ("__gpr"),
9047 long_integer_type_node);
9048 f_fpr = build_decl (BUILTINS_LOCATION,
9049 FIELD_DECL, get_identifier ("__fpr"),
9050 long_integer_type_node);
9051 f_ovf = build_decl (BUILTINS_LOCATION,
9052 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9053 ptr_type_node);
9054 f_sav = build_decl (BUILTINS_LOCATION,
9055 FIELD_DECL, get_identifier ("__reg_save_area"),
9056 ptr_type_node);
9057
9058 va_list_gpr_counter_field = f_gpr;
9059 va_list_fpr_counter_field = f_fpr;
9060
9061 DECL_FIELD_CONTEXT (f_gpr) = record;
9062 DECL_FIELD_CONTEXT (f_fpr) = record;
9063 DECL_FIELD_CONTEXT (f_ovf) = record;
9064 DECL_FIELD_CONTEXT (f_sav) = record;
9065
9066 TYPE_STUB_DECL (record) = type_decl;
9067 TYPE_NAME (record) = type_decl;
9068 TYPE_FIELDS (record) = f_gpr;
9069 DECL_CHAIN (f_gpr) = f_fpr;
9070 DECL_CHAIN (f_fpr) = f_ovf;
9071 DECL_CHAIN (f_ovf) = f_sav;
9072
9073 layout_type (record);
9074
9075 /* The correct type is an array type of one element. */
9076 return build_array_type (record, build_index_type (size_zero_node));
9077 }
9078
9079 /* Implement va_start by filling the va_list structure VALIST.
9080 STDARG_P is always true, and ignored.
9081 NEXTARG points to the first anonymous stack argument.
9082
9083 The following global variables are used to initialize
9084 the va_list structure:
9085
9086 crtl->args.info:
9087 holds number of gprs and fprs used for named arguments.
9088 crtl->args.arg_offset_rtx:
9089 holds the offset of the first anonymous stack argument
9090 (relative to the virtual arg pointer). */
9091
9092 static void
9093 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9094 {
9095 HOST_WIDE_INT n_gpr, n_fpr;
9096 int off;
9097 tree f_gpr, f_fpr, f_ovf, f_sav;
9098 tree gpr, fpr, ovf, sav, t;
9099
9100 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9101 f_fpr = DECL_CHAIN (f_gpr);
9102 f_ovf = DECL_CHAIN (f_fpr);
9103 f_sav = DECL_CHAIN (f_ovf);
9104
9105 valist = build_simple_mem_ref (valist);
9106 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9107 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9108 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9109 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9110
9111 /* Count number of gp and fp argument registers used. */
9112
9113 n_gpr = crtl->args.info.gprs;
9114 n_fpr = crtl->args.info.fprs;
9115
9116 if (cfun->va_list_gpr_size)
9117 {
9118 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9119 build_int_cst (NULL_TREE, n_gpr));
9120 TREE_SIDE_EFFECTS (t) = 1;
9121 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9122 }
9123
9124 if (cfun->va_list_fpr_size)
9125 {
9126 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9127 build_int_cst (NULL_TREE, n_fpr));
9128 TREE_SIDE_EFFECTS (t) = 1;
9129 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9130 }
9131
9132 /* Find the overflow area. */
9133 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9134 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9135 {
9136 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9137
9138 off = INTVAL (crtl->args.arg_offset_rtx);
9139 off = off < 0 ? 0 : off;
9140 if (TARGET_DEBUG_ARG)
9141 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9142 (int)n_gpr, (int)n_fpr, off);
9143
9144 t = fold_build_pointer_plus_hwi (t, off);
9145
9146 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9147 TREE_SIDE_EFFECTS (t) = 1;
9148 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9149 }
9150
9151 /* Find the register save area. */
9152 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9153 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9154 {
9155 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9156 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9157
9158 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9159 TREE_SIDE_EFFECTS (t) = 1;
9160 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9161 }
9162 }
9163
9164 /* Implement va_arg by updating the va_list structure
9165 VALIST as required to retrieve an argument of type
9166 TYPE, and returning that argument.
9167
9168 Generates code equivalent to:
9169
9170 if (integral value) {
9171 if (size <= 4 && args.gpr < 5 ||
9172 size > 4 && args.gpr < 4 )
9173 ret = args.reg_save_area[args.gpr+8]
9174 else
9175 ret = *args.overflow_arg_area++;
9176 } else if (float value) {
9177 if (args.fgpr < 2)
9178 ret = args.reg_save_area[args.fpr+64]
9179 else
9180 ret = *args.overflow_arg_area++;
9181 } else if (aggregate value) {
9182 if (args.gpr < 5)
9183 ret = *args.reg_save_area[args.gpr]
9184 else
9185 ret = **args.overflow_arg_area++;
9186 } */
9187
9188 static tree
9189 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9190 gimple_seq *post_p ATTRIBUTE_UNUSED)
9191 {
9192 tree f_gpr, f_fpr, f_ovf, f_sav;
9193 tree gpr, fpr, ovf, sav, reg, t, u;
9194 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9195 tree lab_false, lab_over, addr;
9196
9197 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9198 f_fpr = DECL_CHAIN (f_gpr);
9199 f_ovf = DECL_CHAIN (f_fpr);
9200 f_sav = DECL_CHAIN (f_ovf);
9201
9202 valist = build_va_arg_indirect_ref (valist);
9203 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9204 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9205 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9206
9207 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9208 both appear on a lhs. */
9209 valist = unshare_expr (valist);
9210 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9211
9212 size = int_size_in_bytes (type);
9213
9214 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9215 {
9216 if (TARGET_DEBUG_ARG)
9217 {
9218 fprintf (stderr, "va_arg: aggregate type");
9219 debug_tree (type);
9220 }
9221
9222 /* Aggregates are passed by reference. */
9223 indirect_p = 1;
9224 reg = gpr;
9225 n_reg = 1;
9226
9227 /* kernel stack layout on 31 bit: It is assumed here that no padding
9228 will be added by s390_frame_info because for va_args always an even
9229 number of gprs has to be saved r15-r2 = 14 regs. */
9230 sav_ofs = 2 * UNITS_PER_LONG;
9231 sav_scale = UNITS_PER_LONG;
9232 size = UNITS_PER_LONG;
9233 max_reg = GP_ARG_NUM_REG - n_reg;
9234 }
9235 else if (s390_function_arg_float (TYPE_MODE (type), type))
9236 {
9237 if (TARGET_DEBUG_ARG)
9238 {
9239 fprintf (stderr, "va_arg: float type");
9240 debug_tree (type);
9241 }
9242
9243 /* FP args go in FP registers, if present. */
9244 indirect_p = 0;
9245 reg = fpr;
9246 n_reg = 1;
9247 sav_ofs = 16 * UNITS_PER_LONG;
9248 sav_scale = 8;
9249 max_reg = FP_ARG_NUM_REG - n_reg;
9250 }
9251 else
9252 {
9253 if (TARGET_DEBUG_ARG)
9254 {
9255 fprintf (stderr, "va_arg: other type");
9256 debug_tree (type);
9257 }
9258
9259 /* Otherwise into GP registers. */
9260 indirect_p = 0;
9261 reg = gpr;
9262 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9263
9264 /* kernel stack layout on 31 bit: It is assumed here that no padding
9265 will be added by s390_frame_info because for va_args always an even
9266 number of gprs has to be saved r15-r2 = 14 regs. */
9267 sav_ofs = 2 * UNITS_PER_LONG;
9268
9269 if (size < UNITS_PER_LONG)
9270 sav_ofs += UNITS_PER_LONG - size;
9271
9272 sav_scale = UNITS_PER_LONG;
9273 max_reg = GP_ARG_NUM_REG - n_reg;
9274 }
9275
9276 /* Pull the value out of the saved registers ... */
9277
9278 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9279 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9280 addr = create_tmp_var (ptr_type_node, "addr");
9281
9282 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9283 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9284 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9285 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9286 gimplify_and_add (t, pre_p);
9287
9288 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9289 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9290 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9291 t = fold_build_pointer_plus (t, u);
9292
9293 gimplify_assign (addr, t, pre_p);
9294
9295 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9296
9297 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9298
9299
9300 /* ... Otherwise out of the overflow area. */
9301
9302 t = ovf;
9303 if (size < UNITS_PER_LONG)
9304 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9305
9306 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9307
9308 gimplify_assign (addr, t, pre_p);
9309
9310 t = fold_build_pointer_plus_hwi (t, size);
9311 gimplify_assign (ovf, t, pre_p);
9312
9313 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9314
9315
9316 /* Increment register save count. */
9317
9318 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9319 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9320 gimplify_and_add (u, pre_p);
9321
9322 if (indirect_p)
9323 {
9324 t = build_pointer_type_for_mode (build_pointer_type (type),
9325 ptr_mode, true);
9326 addr = fold_convert (t, addr);
9327 addr = build_va_arg_indirect_ref (addr);
9328 }
9329 else
9330 {
9331 t = build_pointer_type_for_mode (type, ptr_mode, true);
9332 addr = fold_convert (t, addr);
9333 }
9334
9335 return build_va_arg_indirect_ref (addr);
9336 }
9337
9338 /* Output assembly code for the trampoline template to
9339 stdio stream FILE.
9340
9341 On S/390, we use gpr 1 internally in the trampoline code;
9342 gpr 0 is used to hold the static chain. */
9343
9344 static void
9345 s390_asm_trampoline_template (FILE *file)
9346 {
9347 rtx op[2];
9348 op[0] = gen_rtx_REG (Pmode, 0);
9349 op[1] = gen_rtx_REG (Pmode, 1);
9350
9351 if (TARGET_64BIT)
9352 {
9353 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9354 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
9355 output_asm_insn ("br\t%1", op); /* 2 byte */
9356 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
9357 }
9358 else
9359 {
9360 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9361 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
9362 output_asm_insn ("br\t%1", op); /* 2 byte */
9363 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
9364 }
9365 }
9366
9367 /* Emit RTL insns to initialize the variable parts of a trampoline.
9368 FNADDR is an RTX for the address of the function's pure code.
9369 CXT is an RTX for the static chain value for the function. */
9370
9371 static void
9372 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9373 {
9374 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9375 rtx mem;
9376
9377 emit_block_move (m_tramp, assemble_trampoline_template (),
9378 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
9379
9380 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
9381 emit_move_insn (mem, cxt);
9382 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
9383 emit_move_insn (mem, fnaddr);
9384 }
9385
9386 /* Output assembler code to FILE to increment profiler label # LABELNO
9387 for profiling a function entry. */
9388
9389 void
9390 s390_function_profiler (FILE *file, int labelno)
9391 {
9392 rtx op[7];
9393
9394 char label[128];
9395 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
9396
9397 fprintf (file, "# function profiler \n");
9398
9399 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
9400 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
9401 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
9402
9403 op[2] = gen_rtx_REG (Pmode, 1);
9404 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
9405 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
9406
9407 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
9408 if (flag_pic)
9409 {
9410 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
9411 op[4] = gen_rtx_CONST (Pmode, op[4]);
9412 }
9413
9414 if (TARGET_64BIT)
9415 {
9416 output_asm_insn ("stg\t%0,%1", op);
9417 output_asm_insn ("larl\t%2,%3", op);
9418 output_asm_insn ("brasl\t%0,%4", op);
9419 output_asm_insn ("lg\t%0,%1", op);
9420 }
9421 else if (!flag_pic)
9422 {
9423 op[6] = gen_label_rtx ();
9424
9425 output_asm_insn ("st\t%0,%1", op);
9426 output_asm_insn ("bras\t%2,%l6", op);
9427 output_asm_insn (".long\t%4", op);
9428 output_asm_insn (".long\t%3", op);
9429 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9430 output_asm_insn ("l\t%0,0(%2)", op);
9431 output_asm_insn ("l\t%2,4(%2)", op);
9432 output_asm_insn ("basr\t%0,%0", op);
9433 output_asm_insn ("l\t%0,%1", op);
9434 }
9435 else
9436 {
9437 op[5] = gen_label_rtx ();
9438 op[6] = gen_label_rtx ();
9439
9440 output_asm_insn ("st\t%0,%1", op);
9441 output_asm_insn ("bras\t%2,%l6", op);
9442 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
9443 output_asm_insn (".long\t%4-%l5", op);
9444 output_asm_insn (".long\t%3-%l5", op);
9445 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9446 output_asm_insn ("lr\t%0,%2", op);
9447 output_asm_insn ("a\t%0,0(%2)", op);
9448 output_asm_insn ("a\t%2,4(%2)", op);
9449 output_asm_insn ("basr\t%0,%0", op);
9450 output_asm_insn ("l\t%0,%1", op);
9451 }
9452 }
9453
9454 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
9455 into its SYMBOL_REF_FLAGS. */
9456
9457 static void
9458 s390_encode_section_info (tree decl, rtx rtl, int first)
9459 {
9460 default_encode_section_info (decl, rtl, first);
9461
9462 if (TREE_CODE (decl) == VAR_DECL)
9463 {
9464 /* If a variable has a forced alignment to < 2 bytes, mark it
9465 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
9466 operand. */
9467 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
9468 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
9469 if (!DECL_SIZE (decl)
9470 || !DECL_ALIGN (decl)
9471 || !host_integerp (DECL_SIZE (decl), 0)
9472 || (DECL_ALIGN (decl) <= 64
9473 && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
9474 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9475 }
9476
9477 /* Literal pool references don't have a decl so they are handled
9478 differently here. We rely on the information in the MEM_ALIGN
9479 entry to decide upon natural alignment. */
9480 if (MEM_P (rtl)
9481 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
9482 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
9483 && (MEM_ALIGN (rtl) == 0
9484 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
9485 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
9486 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9487 }
9488
9489 /* Output thunk to FILE that implements a C++ virtual function call (with
9490 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
9491 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
9492 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
9493 relative to the resulting this pointer. */
9494
9495 static void
9496 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9497 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9498 tree function)
9499 {
9500 rtx op[10];
9501 int nonlocal = 0;
9502
9503 /* Make sure unwind info is emitted for the thunk if needed. */
9504 final_start_function (emit_barrier (), file, 1);
9505
9506 /* Operand 0 is the target function. */
9507 op[0] = XEXP (DECL_RTL (function), 0);
9508 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
9509 {
9510 nonlocal = 1;
9511 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
9512 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
9513 op[0] = gen_rtx_CONST (Pmode, op[0]);
9514 }
9515
9516 /* Operand 1 is the 'this' pointer. */
9517 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9518 op[1] = gen_rtx_REG (Pmode, 3);
9519 else
9520 op[1] = gen_rtx_REG (Pmode, 2);
9521
9522 /* Operand 2 is the delta. */
9523 op[2] = GEN_INT (delta);
9524
9525 /* Operand 3 is the vcall_offset. */
9526 op[3] = GEN_INT (vcall_offset);
9527
9528 /* Operand 4 is the temporary register. */
9529 op[4] = gen_rtx_REG (Pmode, 1);
9530
9531 /* Operands 5 to 8 can be used as labels. */
9532 op[5] = NULL_RTX;
9533 op[6] = NULL_RTX;
9534 op[7] = NULL_RTX;
9535 op[8] = NULL_RTX;
9536
9537 /* Operand 9 can be used for temporary register. */
9538 op[9] = NULL_RTX;
9539
9540 /* Generate code. */
9541 if (TARGET_64BIT)
9542 {
9543 /* Setup literal pool pointer if required. */
9544 if ((!DISP_IN_RANGE (delta)
9545 && !CONST_OK_FOR_K (delta)
9546 && !CONST_OK_FOR_Os (delta))
9547 || (!DISP_IN_RANGE (vcall_offset)
9548 && !CONST_OK_FOR_K (vcall_offset)
9549 && !CONST_OK_FOR_Os (vcall_offset)))
9550 {
9551 op[5] = gen_label_rtx ();
9552 output_asm_insn ("larl\t%4,%5", op);
9553 }
9554
9555 /* Add DELTA to this pointer. */
9556 if (delta)
9557 {
9558 if (CONST_OK_FOR_J (delta))
9559 output_asm_insn ("la\t%1,%2(%1)", op);
9560 else if (DISP_IN_RANGE (delta))
9561 output_asm_insn ("lay\t%1,%2(%1)", op);
9562 else if (CONST_OK_FOR_K (delta))
9563 output_asm_insn ("aghi\t%1,%2", op);
9564 else if (CONST_OK_FOR_Os (delta))
9565 output_asm_insn ("agfi\t%1,%2", op);
9566 else
9567 {
9568 op[6] = gen_label_rtx ();
9569 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
9570 }
9571 }
9572
9573 /* Perform vcall adjustment. */
9574 if (vcall_offset)
9575 {
9576 if (DISP_IN_RANGE (vcall_offset))
9577 {
9578 output_asm_insn ("lg\t%4,0(%1)", op);
9579 output_asm_insn ("ag\t%1,%3(%4)", op);
9580 }
9581 else if (CONST_OK_FOR_K (vcall_offset))
9582 {
9583 output_asm_insn ("lghi\t%4,%3", op);
9584 output_asm_insn ("ag\t%4,0(%1)", op);
9585 output_asm_insn ("ag\t%1,0(%4)", op);
9586 }
9587 else if (CONST_OK_FOR_Os (vcall_offset))
9588 {
9589 output_asm_insn ("lgfi\t%4,%3", op);
9590 output_asm_insn ("ag\t%4,0(%1)", op);
9591 output_asm_insn ("ag\t%1,0(%4)", op);
9592 }
9593 else
9594 {
9595 op[7] = gen_label_rtx ();
9596 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
9597 output_asm_insn ("ag\t%4,0(%1)", op);
9598 output_asm_insn ("ag\t%1,0(%4)", op);
9599 }
9600 }
9601
9602 /* Jump to target. */
9603 output_asm_insn ("jg\t%0", op);
9604
9605 /* Output literal pool if required. */
9606 if (op[5])
9607 {
9608 output_asm_insn (".align\t4", op);
9609 targetm.asm_out.internal_label (file, "L",
9610 CODE_LABEL_NUMBER (op[5]));
9611 }
9612 if (op[6])
9613 {
9614 targetm.asm_out.internal_label (file, "L",
9615 CODE_LABEL_NUMBER (op[6]));
9616 output_asm_insn (".long\t%2", op);
9617 }
9618 if (op[7])
9619 {
9620 targetm.asm_out.internal_label (file, "L",
9621 CODE_LABEL_NUMBER (op[7]));
9622 output_asm_insn (".long\t%3", op);
9623 }
9624 }
9625 else
9626 {
9627 /* Setup base pointer if required. */
9628 if (!vcall_offset
9629 || (!DISP_IN_RANGE (delta)
9630 && !CONST_OK_FOR_K (delta)
9631 && !CONST_OK_FOR_Os (delta))
9632 || (!DISP_IN_RANGE (delta)
9633 && !CONST_OK_FOR_K (vcall_offset)
9634 && !CONST_OK_FOR_Os (vcall_offset)))
9635 {
9636 op[5] = gen_label_rtx ();
9637 output_asm_insn ("basr\t%4,0", op);
9638 targetm.asm_out.internal_label (file, "L",
9639 CODE_LABEL_NUMBER (op[5]));
9640 }
9641
9642 /* Add DELTA to this pointer. */
9643 if (delta)
9644 {
9645 if (CONST_OK_FOR_J (delta))
9646 output_asm_insn ("la\t%1,%2(%1)", op);
9647 else if (DISP_IN_RANGE (delta))
9648 output_asm_insn ("lay\t%1,%2(%1)", op);
9649 else if (CONST_OK_FOR_K (delta))
9650 output_asm_insn ("ahi\t%1,%2", op);
9651 else if (CONST_OK_FOR_Os (delta))
9652 output_asm_insn ("afi\t%1,%2", op);
9653 else
9654 {
9655 op[6] = gen_label_rtx ();
9656 output_asm_insn ("a\t%1,%6-%5(%4)", op);
9657 }
9658 }
9659
9660 /* Perform vcall adjustment. */
9661 if (vcall_offset)
9662 {
9663 if (CONST_OK_FOR_J (vcall_offset))
9664 {
9665 output_asm_insn ("l\t%4,0(%1)", op);
9666 output_asm_insn ("a\t%1,%3(%4)", op);
9667 }
9668 else if (DISP_IN_RANGE (vcall_offset))
9669 {
9670 output_asm_insn ("l\t%4,0(%1)", op);
9671 output_asm_insn ("ay\t%1,%3(%4)", op);
9672 }
9673 else if (CONST_OK_FOR_K (vcall_offset))
9674 {
9675 output_asm_insn ("lhi\t%4,%3", op);
9676 output_asm_insn ("a\t%4,0(%1)", op);
9677 output_asm_insn ("a\t%1,0(%4)", op);
9678 }
9679 else if (CONST_OK_FOR_Os (vcall_offset))
9680 {
9681 output_asm_insn ("iilf\t%4,%3", op);
9682 output_asm_insn ("a\t%4,0(%1)", op);
9683 output_asm_insn ("a\t%1,0(%4)", op);
9684 }
9685 else
9686 {
9687 op[7] = gen_label_rtx ();
9688 output_asm_insn ("l\t%4,%7-%5(%4)", op);
9689 output_asm_insn ("a\t%4,0(%1)", op);
9690 output_asm_insn ("a\t%1,0(%4)", op);
9691 }
9692
9693 /* We had to clobber the base pointer register.
9694 Re-setup the base pointer (with a different base). */
9695 op[5] = gen_label_rtx ();
9696 output_asm_insn ("basr\t%4,0", op);
9697 targetm.asm_out.internal_label (file, "L",
9698 CODE_LABEL_NUMBER (op[5]));
9699 }
9700
9701 /* Jump to target. */
9702 op[8] = gen_label_rtx ();
9703
9704 if (!flag_pic)
9705 output_asm_insn ("l\t%4,%8-%5(%4)", op);
9706 else if (!nonlocal)
9707 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9708 /* We cannot call through .plt, since .plt requires %r12 loaded. */
9709 else if (flag_pic == 1)
9710 {
9711 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9712 output_asm_insn ("l\t%4,%0(%4)", op);
9713 }
9714 else if (flag_pic == 2)
9715 {
9716 op[9] = gen_rtx_REG (Pmode, 0);
9717 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
9718 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9719 output_asm_insn ("ar\t%4,%9", op);
9720 output_asm_insn ("l\t%4,0(%4)", op);
9721 }
9722
9723 output_asm_insn ("br\t%4", op);
9724
9725 /* Output literal pool. */
9726 output_asm_insn (".align\t4", op);
9727
9728 if (nonlocal && flag_pic == 2)
9729 output_asm_insn (".long\t%0", op);
9730 if (nonlocal)
9731 {
9732 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9733 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
9734 }
9735
9736 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
9737 if (!flag_pic)
9738 output_asm_insn (".long\t%0", op);
9739 else
9740 output_asm_insn (".long\t%0-%5", op);
9741
9742 if (op[6])
9743 {
9744 targetm.asm_out.internal_label (file, "L",
9745 CODE_LABEL_NUMBER (op[6]));
9746 output_asm_insn (".long\t%2", op);
9747 }
9748 if (op[7])
9749 {
9750 targetm.asm_out.internal_label (file, "L",
9751 CODE_LABEL_NUMBER (op[7]));
9752 output_asm_insn (".long\t%3", op);
9753 }
9754 }
9755 final_end_function ();
9756 }
9757
9758 static bool
9759 s390_valid_pointer_mode (enum machine_mode mode)
9760 {
9761 return (mode == SImode || (TARGET_64BIT && mode == DImode));
9762 }
9763
9764 /* Checks whether the given CALL_EXPR would use a caller
9765 saved register. This is used to decide whether sibling call
9766 optimization could be performed on the respective function
9767 call. */
9768
9769 static bool
9770 s390_call_saved_register_used (tree call_expr)
9771 {
9772 CUMULATIVE_ARGS cum_v;
9773 cumulative_args_t cum;
9774 tree parameter;
9775 enum machine_mode mode;
9776 tree type;
9777 rtx parm_rtx;
9778 int reg, i;
9779
9780 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
9781 cum = pack_cumulative_args (&cum_v);
9782
9783 for (i = 0; i < call_expr_nargs (call_expr); i++)
9784 {
9785 parameter = CALL_EXPR_ARG (call_expr, i);
9786 gcc_assert (parameter);
9787
9788 /* For an undeclared variable passed as parameter we will get
9789 an ERROR_MARK node here. */
9790 if (TREE_CODE (parameter) == ERROR_MARK)
9791 return true;
9792
9793 type = TREE_TYPE (parameter);
9794 gcc_assert (type);
9795
9796 mode = TYPE_MODE (type);
9797 gcc_assert (mode);
9798
9799 if (pass_by_reference (&cum_v, mode, type, true))
9800 {
9801 mode = Pmode;
9802 type = build_pointer_type (type);
9803 }
9804
9805 parm_rtx = s390_function_arg (cum, mode, type, 0);
9806
9807 s390_function_arg_advance (cum, mode, type, 0);
9808
9809 if (!parm_rtx)
9810 continue;
9811
9812 if (REG_P (parm_rtx))
9813 {
9814 for (reg = 0;
9815 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
9816 reg++)
9817 if (!call_used_regs[reg + REGNO (parm_rtx)])
9818 return true;
9819 }
9820
9821 if (GET_CODE (parm_rtx) == PARALLEL)
9822 {
9823 int i;
9824
9825 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
9826 {
9827 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
9828
9829 gcc_assert (REG_P (r));
9830
9831 for (reg = 0;
9832 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
9833 reg++)
9834 if (!call_used_regs[reg + REGNO (r)])
9835 return true;
9836 }
9837 }
9838
9839 }
9840 return false;
9841 }
9842
9843 /* Return true if the given call expression can be
9844 turned into a sibling call.
9845 DECL holds the declaration of the function to be called whereas
9846 EXP is the call expression itself. */
9847
9848 static bool
9849 s390_function_ok_for_sibcall (tree decl, tree exp)
9850 {
9851 /* The TPF epilogue uses register 1. */
9852 if (TARGET_TPF_PROFILING)
9853 return false;
9854
9855 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
9856 which would have to be restored before the sibcall. */
9857 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
9858 return false;
9859
9860 /* Register 6 on s390 is available as an argument register but unfortunately
9861 "caller saved". This makes functions needing this register for arguments
9862 not suitable for sibcalls. */
9863 return !s390_call_saved_register_used (exp);
9864 }
9865
9866 /* Return the fixed registers used for condition codes. */
9867
9868 static bool
9869 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9870 {
9871 *p1 = CC_REGNUM;
9872 *p2 = INVALID_REGNUM;
9873
9874 return true;
9875 }
9876
9877 /* This function is used by the call expanders of the machine description.
9878 It emits the call insn itself together with the necessary operations
9879 to adjust the target address and returns the emitted insn.
9880 ADDR_LOCATION is the target address rtx
9881 TLS_CALL the location of the thread-local symbol
9882 RESULT_REG the register where the result of the call should be stored
9883 RETADDR_REG the register where the return address should be stored
9884 If this parameter is NULL_RTX the call is considered
9885 to be a sibling call. */
9886
9887 rtx
9888 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
9889 rtx retaddr_reg)
9890 {
9891 bool plt_call = false;
9892 rtx insn;
9893 rtx call;
9894 rtx clobber;
9895 rtvec vec;
9896
9897 /* Direct function calls need special treatment. */
9898 if (GET_CODE (addr_location) == SYMBOL_REF)
9899 {
9900 /* When calling a global routine in PIC mode, we must
9901 replace the symbol itself with the PLT stub. */
9902 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
9903 {
9904 if (retaddr_reg != NULL_RTX)
9905 {
9906 addr_location = gen_rtx_UNSPEC (Pmode,
9907 gen_rtvec (1, addr_location),
9908 UNSPEC_PLT);
9909 addr_location = gen_rtx_CONST (Pmode, addr_location);
9910 plt_call = true;
9911 }
9912 else
9913 /* For -fpic code the PLT entries might use r12 which is
9914 call-saved. Therefore we cannot do a sibcall when
9915 calling directly using a symbol ref. When reaching
9916 this point we decided (in s390_function_ok_for_sibcall)
9917 to do a sibcall for a function pointer but one of the
9918 optimizers was able to get rid of the function pointer
9919 by propagating the symbol ref into the call. This
9920 optimization is illegal for S/390 so we turn the direct
9921 call into a indirect call again. */
9922 addr_location = force_reg (Pmode, addr_location);
9923 }
9924
9925 /* Unless we can use the bras(l) insn, force the
9926 routine address into a register. */
9927 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
9928 {
9929 if (flag_pic)
9930 addr_location = legitimize_pic_address (addr_location, 0);
9931 else
9932 addr_location = force_reg (Pmode, addr_location);
9933 }
9934 }
9935
9936 /* If it is already an indirect call or the code above moved the
9937 SYMBOL_REF to somewhere else make sure the address can be found in
9938 register 1. */
9939 if (retaddr_reg == NULL_RTX
9940 && GET_CODE (addr_location) != SYMBOL_REF
9941 && !plt_call)
9942 {
9943 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
9944 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
9945 }
9946
9947 addr_location = gen_rtx_MEM (QImode, addr_location);
9948 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
9949
9950 if (result_reg != NULL_RTX)
9951 call = gen_rtx_SET (VOIDmode, result_reg, call);
9952
9953 if (retaddr_reg != NULL_RTX)
9954 {
9955 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
9956
9957 if (tls_call != NULL_RTX)
9958 vec = gen_rtvec (3, call, clobber,
9959 gen_rtx_USE (VOIDmode, tls_call));
9960 else
9961 vec = gen_rtvec (2, call, clobber);
9962
9963 call = gen_rtx_PARALLEL (VOIDmode, vec);
9964 }
9965
9966 insn = emit_call_insn (call);
9967
9968 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
9969 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
9970 {
9971 /* s390_function_ok_for_sibcall should
9972 have denied sibcalls in this case. */
9973 gcc_assert (retaddr_reg != NULL_RTX);
9974 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
9975 }
9976 return insn;
9977 }
9978
9979 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9980
9981 static void
9982 s390_conditional_register_usage (void)
9983 {
9984 int i;
9985
9986 if (flag_pic)
9987 {
9988 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
9989 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
9990 }
9991 if (TARGET_CPU_ZARCH)
9992 {
9993 fixed_regs[BASE_REGNUM] = 0;
9994 call_used_regs[BASE_REGNUM] = 0;
9995 fixed_regs[RETURN_REGNUM] = 0;
9996 call_used_regs[RETURN_REGNUM] = 0;
9997 }
9998 if (TARGET_64BIT)
9999 {
10000 for (i = 24; i < 32; i++)
10001 call_used_regs[i] = call_really_used_regs[i] = 0;
10002 }
10003 else
10004 {
10005 for (i = 18; i < 20; i++)
10006 call_used_regs[i] = call_really_used_regs[i] = 0;
10007 }
10008
10009 if (TARGET_SOFT_FLOAT)
10010 {
10011 for (i = 16; i < 32; i++)
10012 call_used_regs[i] = fixed_regs[i] = 1;
10013 }
10014 }
10015
10016 /* Corresponding function to eh_return expander. */
10017
10018 static GTY(()) rtx s390_tpf_eh_return_symbol;
10019 void
10020 s390_emit_tpf_eh_return (rtx target)
10021 {
10022 rtx insn, reg;
10023
10024 if (!s390_tpf_eh_return_symbol)
10025 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10026
10027 reg = gen_rtx_REG (Pmode, 2);
10028
10029 emit_move_insn (reg, target);
10030 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10031 gen_rtx_REG (Pmode, RETURN_REGNUM));
10032 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10033
10034 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10035 }
10036
10037 /* Rework the prologue/epilogue to avoid saving/restoring
10038 registers unnecessarily. */
10039
10040 static void
10041 s390_optimize_prologue (void)
10042 {
10043 rtx insn, new_insn, next_insn;
10044
10045 /* Do a final recompute of the frame-related data. */
10046
10047 s390_update_frame_layout ();
10048
10049 /* If all special registers are in fact used, there's nothing we
10050 can do, so no point in walking the insn list. */
10051
10052 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10053 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10054 && (TARGET_CPU_ZARCH
10055 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10056 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10057 return;
10058
10059 /* Search for prologue/epilogue insns and replace them. */
10060
10061 for (insn = get_insns (); insn; insn = next_insn)
10062 {
10063 int first, last, off;
10064 rtx set, base, offset;
10065
10066 next_insn = NEXT_INSN (insn);
10067
10068 if (GET_CODE (insn) != INSN)
10069 continue;
10070
10071 if (GET_CODE (PATTERN (insn)) == PARALLEL
10072 && store_multiple_operation (PATTERN (insn), VOIDmode))
10073 {
10074 set = XVECEXP (PATTERN (insn), 0, 0);
10075 first = REGNO (SET_SRC (set));
10076 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10077 offset = const0_rtx;
10078 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10079 off = INTVAL (offset);
10080
10081 if (GET_CODE (base) != REG || off < 0)
10082 continue;
10083 if (cfun_frame_layout.first_save_gpr != -1
10084 && (cfun_frame_layout.first_save_gpr < first
10085 || cfun_frame_layout.last_save_gpr > last))
10086 continue;
10087 if (REGNO (base) != STACK_POINTER_REGNUM
10088 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10089 continue;
10090 if (first > BASE_REGNUM || last < BASE_REGNUM)
10091 continue;
10092
10093 if (cfun_frame_layout.first_save_gpr != -1)
10094 {
10095 new_insn = save_gprs (base,
10096 off + (cfun_frame_layout.first_save_gpr
10097 - first) * UNITS_PER_LONG,
10098 cfun_frame_layout.first_save_gpr,
10099 cfun_frame_layout.last_save_gpr);
10100 new_insn = emit_insn_before (new_insn, insn);
10101 INSN_ADDRESSES_NEW (new_insn, -1);
10102 }
10103
10104 remove_insn (insn);
10105 continue;
10106 }
10107
10108 if (cfun_frame_layout.first_save_gpr == -1
10109 && GET_CODE (PATTERN (insn)) == SET
10110 && GET_CODE (SET_SRC (PATTERN (insn))) == REG
10111 && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM
10112 || (!TARGET_CPU_ZARCH
10113 && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM))
10114 && GET_CODE (SET_DEST (PATTERN (insn))) == MEM)
10115 {
10116 set = PATTERN (insn);
10117 first = REGNO (SET_SRC (set));
10118 offset = const0_rtx;
10119 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10120 off = INTVAL (offset);
10121
10122 if (GET_CODE (base) != REG || off < 0)
10123 continue;
10124 if (REGNO (base) != STACK_POINTER_REGNUM
10125 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10126 continue;
10127
10128 remove_insn (insn);
10129 continue;
10130 }
10131
10132 if (GET_CODE (PATTERN (insn)) == PARALLEL
10133 && load_multiple_operation (PATTERN (insn), VOIDmode))
10134 {
10135 set = XVECEXP (PATTERN (insn), 0, 0);
10136 first = REGNO (SET_DEST (set));
10137 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10138 offset = const0_rtx;
10139 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10140 off = INTVAL (offset);
10141
10142 if (GET_CODE (base) != REG || off < 0)
10143 continue;
10144 if (cfun_frame_layout.first_restore_gpr != -1
10145 && (cfun_frame_layout.first_restore_gpr < first
10146 || cfun_frame_layout.last_restore_gpr > last))
10147 continue;
10148 if (REGNO (base) != STACK_POINTER_REGNUM
10149 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10150 continue;
10151 if (first > BASE_REGNUM || last < BASE_REGNUM)
10152 continue;
10153
10154 if (cfun_frame_layout.first_restore_gpr != -1)
10155 {
10156 new_insn = restore_gprs (base,
10157 off + (cfun_frame_layout.first_restore_gpr
10158 - first) * UNITS_PER_LONG,
10159 cfun_frame_layout.first_restore_gpr,
10160 cfun_frame_layout.last_restore_gpr);
10161 new_insn = emit_insn_before (new_insn, insn);
10162 INSN_ADDRESSES_NEW (new_insn, -1);
10163 }
10164
10165 remove_insn (insn);
10166 continue;
10167 }
10168
10169 if (cfun_frame_layout.first_restore_gpr == -1
10170 && GET_CODE (PATTERN (insn)) == SET
10171 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
10172 && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM
10173 || (!TARGET_CPU_ZARCH
10174 && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM))
10175 && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
10176 {
10177 set = PATTERN (insn);
10178 first = REGNO (SET_DEST (set));
10179 offset = const0_rtx;
10180 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10181 off = INTVAL (offset);
10182
10183 if (GET_CODE (base) != REG || off < 0)
10184 continue;
10185 if (REGNO (base) != STACK_POINTER_REGNUM
10186 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10187 continue;
10188
10189 remove_insn (insn);
10190 continue;
10191 }
10192 }
10193 }
10194
10195 /* On z10 and later the dynamic branch prediction must see the
10196 backward jump within a certain windows. If not it falls back to
10197 the static prediction. This function rearranges the loop backward
10198 branch in a way which makes the static prediction always correct.
10199 The function returns true if it added an instruction. */
10200 static bool
10201 s390_fix_long_loop_prediction (rtx insn)
10202 {
10203 rtx set = single_set (insn);
10204 rtx code_label, label_ref, new_label;
10205 rtx uncond_jump;
10206 rtx cur_insn;
10207 rtx tmp;
10208 int distance;
10209
10210 /* This will exclude branch on count and branch on index patterns
10211 since these are correctly statically predicted. */
10212 if (!set
10213 || SET_DEST (set) != pc_rtx
10214 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
10215 return false;
10216
10217 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
10218 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
10219
10220 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
10221
10222 code_label = XEXP (label_ref, 0);
10223
10224 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
10225 || INSN_ADDRESSES (INSN_UID (insn)) == -1
10226 || (INSN_ADDRESSES (INSN_UID (insn))
10227 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
10228 return false;
10229
10230 for (distance = 0, cur_insn = PREV_INSN (insn);
10231 distance < PREDICT_DISTANCE - 6;
10232 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
10233 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
10234 return false;
10235
10236 new_label = gen_label_rtx ();
10237 uncond_jump = emit_jump_insn_after (
10238 gen_rtx_SET (VOIDmode, pc_rtx,
10239 gen_rtx_LABEL_REF (VOIDmode, code_label)),
10240 insn);
10241 emit_label_after (new_label, uncond_jump);
10242
10243 tmp = XEXP (SET_SRC (set), 1);
10244 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
10245 XEXP (SET_SRC (set), 2) = tmp;
10246 INSN_CODE (insn) = -1;
10247
10248 XEXP (label_ref, 0) = new_label;
10249 JUMP_LABEL (insn) = new_label;
10250 JUMP_LABEL (uncond_jump) = code_label;
10251
10252 return true;
10253 }
10254
10255 /* Returns 1 if INSN reads the value of REG for purposes not related
10256 to addressing of memory, and 0 otherwise. */
10257 static int
10258 s390_non_addr_reg_read_p (rtx reg, rtx insn)
10259 {
10260 return reg_referenced_p (reg, PATTERN (insn))
10261 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
10262 }
10263
10264 /* Starting from INSN find_cond_jump looks downwards in the insn
10265 stream for a single jump insn which is the last user of the
10266 condition code set in INSN. */
10267 static rtx
10268 find_cond_jump (rtx insn)
10269 {
10270 for (; insn; insn = NEXT_INSN (insn))
10271 {
10272 rtx ite, cc;
10273
10274 if (LABEL_P (insn))
10275 break;
10276
10277 if (!JUMP_P (insn))
10278 {
10279 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
10280 break;
10281 continue;
10282 }
10283
10284 /* This will be triggered by a return. */
10285 if (GET_CODE (PATTERN (insn)) != SET)
10286 break;
10287
10288 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
10289 ite = SET_SRC (PATTERN (insn));
10290
10291 if (GET_CODE (ite) != IF_THEN_ELSE)
10292 break;
10293
10294 cc = XEXP (XEXP (ite, 0), 0);
10295 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
10296 break;
10297
10298 if (find_reg_note (insn, REG_DEAD, cc))
10299 return insn;
10300 break;
10301 }
10302
10303 return NULL_RTX;
10304 }
10305
10306 /* Swap the condition in COND and the operands in OP0 and OP1 so that
10307 the semantics does not change. If NULL_RTX is passed as COND the
10308 function tries to find the conditional jump starting with INSN. */
10309 static void
10310 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
10311 {
10312 rtx tmp = *op0;
10313
10314 if (cond == NULL_RTX)
10315 {
10316 rtx jump = find_cond_jump (NEXT_INSN (insn));
10317 jump = jump ? single_set (jump) : NULL_RTX;
10318
10319 if (jump == NULL_RTX)
10320 return;
10321
10322 cond = XEXP (XEXP (jump, 1), 0);
10323 }
10324
10325 *op0 = *op1;
10326 *op1 = tmp;
10327 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
10328 }
10329
10330 /* On z10, instructions of the compare-and-branch family have the
10331 property to access the register occurring as second operand with
10332 its bits complemented. If such a compare is grouped with a second
10333 instruction that accesses the same register non-complemented, and
10334 if that register's value is delivered via a bypass, then the
10335 pipeline recycles, thereby causing significant performance decline.
10336 This function locates such situations and exchanges the two
10337 operands of the compare. The function return true whenever it
10338 added an insn. */
10339 static bool
10340 s390_z10_optimize_cmp (rtx insn)
10341 {
10342 rtx prev_insn, next_insn;
10343 bool insn_added_p = false;
10344 rtx cond, *op0, *op1;
10345
10346 if (GET_CODE (PATTERN (insn)) == PARALLEL)
10347 {
10348 /* Handle compare and branch and branch on count
10349 instructions. */
10350 rtx pattern = single_set (insn);
10351
10352 if (!pattern
10353 || SET_DEST (pattern) != pc_rtx
10354 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
10355 return false;
10356
10357 cond = XEXP (SET_SRC (pattern), 0);
10358 op0 = &XEXP (cond, 0);
10359 op1 = &XEXP (cond, 1);
10360 }
10361 else if (GET_CODE (PATTERN (insn)) == SET)
10362 {
10363 rtx src, dest;
10364
10365 /* Handle normal compare instructions. */
10366 src = SET_SRC (PATTERN (insn));
10367 dest = SET_DEST (PATTERN (insn));
10368
10369 if (!REG_P (dest)
10370 || !CC_REGNO_P (REGNO (dest))
10371 || GET_CODE (src) != COMPARE)
10372 return false;
10373
10374 /* s390_swap_cmp will try to find the conditional
10375 jump when passing NULL_RTX as condition. */
10376 cond = NULL_RTX;
10377 op0 = &XEXP (src, 0);
10378 op1 = &XEXP (src, 1);
10379 }
10380 else
10381 return false;
10382
10383 if (!REG_P (*op0) || !REG_P (*op1))
10384 return false;
10385
10386 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
10387 return false;
10388
10389 /* Swap the COMPARE arguments and its mask if there is a
10390 conflicting access in the previous insn. */
10391 prev_insn = prev_active_insn (insn);
10392 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10393 && reg_referenced_p (*op1, PATTERN (prev_insn)))
10394 s390_swap_cmp (cond, op0, op1, insn);
10395
10396 /* Check if there is a conflict with the next insn. If there
10397 was no conflict with the previous insn, then swap the
10398 COMPARE arguments and its mask. If we already swapped
10399 the operands, or if swapping them would cause a conflict
10400 with the previous insn, issue a NOP after the COMPARE in
10401 order to separate the two instuctions. */
10402 next_insn = next_active_insn (insn);
10403 if (next_insn != NULL_RTX && INSN_P (next_insn)
10404 && s390_non_addr_reg_read_p (*op1, next_insn))
10405 {
10406 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10407 && s390_non_addr_reg_read_p (*op0, prev_insn))
10408 {
10409 if (REGNO (*op1) == 0)
10410 emit_insn_after (gen_nop1 (), insn);
10411 else
10412 emit_insn_after (gen_nop (), insn);
10413 insn_added_p = true;
10414 }
10415 else
10416 s390_swap_cmp (cond, op0, op1, insn);
10417 }
10418 return insn_added_p;
10419 }
10420
10421 /* Perform machine-dependent processing. */
10422
10423 static void
10424 s390_reorg (void)
10425 {
10426 bool pool_overflow = false;
10427
10428 /* Make sure all splits have been performed; splits after
10429 machine_dependent_reorg might confuse insn length counts. */
10430 split_all_insns_noflow ();
10431
10432 /* Install the main literal pool and the associated base
10433 register load insns.
10434
10435 In addition, there are two problematic situations we need
10436 to correct:
10437
10438 - the literal pool might be > 4096 bytes in size, so that
10439 some of its elements cannot be directly accessed
10440
10441 - a branch target might be > 64K away from the branch, so that
10442 it is not possible to use a PC-relative instruction.
10443
10444 To fix those, we split the single literal pool into multiple
10445 pool chunks, reloading the pool base register at various
10446 points throughout the function to ensure it always points to
10447 the pool chunk the following code expects, and / or replace
10448 PC-relative branches by absolute branches.
10449
10450 However, the two problems are interdependent: splitting the
10451 literal pool can move a branch further away from its target,
10452 causing the 64K limit to overflow, and on the other hand,
10453 replacing a PC-relative branch by an absolute branch means
10454 we need to put the branch target address into the literal
10455 pool, possibly causing it to overflow.
10456
10457 So, we loop trying to fix up both problems until we manage
10458 to satisfy both conditions at the same time. Note that the
10459 loop is guaranteed to terminate as every pass of the loop
10460 strictly decreases the total number of PC-relative branches
10461 in the function. (This is not completely true as there
10462 might be branch-over-pool insns introduced by chunkify_start.
10463 Those never need to be split however.) */
10464
10465 for (;;)
10466 {
10467 struct constant_pool *pool = NULL;
10468
10469 /* Collect the literal pool. */
10470 if (!pool_overflow)
10471 {
10472 pool = s390_mainpool_start ();
10473 if (!pool)
10474 pool_overflow = true;
10475 }
10476
10477 /* If literal pool overflowed, start to chunkify it. */
10478 if (pool_overflow)
10479 pool = s390_chunkify_start ();
10480
10481 /* Split out-of-range branches. If this has created new
10482 literal pool entries, cancel current chunk list and
10483 recompute it. zSeries machines have large branch
10484 instructions, so we never need to split a branch. */
10485 if (!TARGET_CPU_ZARCH && s390_split_branches ())
10486 {
10487 if (pool_overflow)
10488 s390_chunkify_cancel (pool);
10489 else
10490 s390_mainpool_cancel (pool);
10491
10492 continue;
10493 }
10494
10495 /* If we made it up to here, both conditions are satisfied.
10496 Finish up literal pool related changes. */
10497 if (pool_overflow)
10498 s390_chunkify_finish (pool);
10499 else
10500 s390_mainpool_finish (pool);
10501
10502 /* We're done splitting branches. */
10503 cfun->machine->split_branches_pending_p = false;
10504 break;
10505 }
10506
10507 /* Generate out-of-pool execute target insns. */
10508 if (TARGET_CPU_ZARCH)
10509 {
10510 rtx insn, label, target;
10511
10512 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10513 {
10514 label = s390_execute_label (insn);
10515 if (!label)
10516 continue;
10517
10518 gcc_assert (label != const0_rtx);
10519
10520 target = emit_label (XEXP (label, 0));
10521 INSN_ADDRESSES_NEW (target, -1);
10522
10523 target = emit_insn (s390_execute_target (insn));
10524 INSN_ADDRESSES_NEW (target, -1);
10525 }
10526 }
10527
10528 /* Try to optimize prologue and epilogue further. */
10529 s390_optimize_prologue ();
10530
10531 /* Walk over the insns and do some >=z10 specific changes. */
10532 if (s390_tune == PROCESSOR_2097_Z10
10533 || s390_tune == PROCESSOR_2817_Z196
10534 || s390_tune == PROCESSOR_2827_ZEC12)
10535 {
10536 rtx insn;
10537 bool insn_added_p = false;
10538
10539 /* The insn lengths and addresses have to be up to date for the
10540 following manipulations. */
10541 shorten_branches (get_insns ());
10542
10543 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10544 {
10545 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10546 continue;
10547
10548 if (JUMP_P (insn))
10549 insn_added_p |= s390_fix_long_loop_prediction (insn);
10550
10551 if ((GET_CODE (PATTERN (insn)) == PARALLEL
10552 || GET_CODE (PATTERN (insn)) == SET)
10553 && s390_tune == PROCESSOR_2097_Z10)
10554 insn_added_p |= s390_z10_optimize_cmp (insn);
10555 }
10556
10557 /* Adjust branches if we added new instructions. */
10558 if (insn_added_p)
10559 shorten_branches (get_insns ());
10560 }
10561 }
10562
10563 /* Return true if INSN is a fp load insn writing register REGNO. */
10564 static inline bool
10565 s390_fpload_toreg (rtx insn, unsigned int regno)
10566 {
10567 rtx set;
10568 enum attr_type flag = s390_safe_attr_type (insn);
10569
10570 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
10571 return false;
10572
10573 set = single_set (insn);
10574
10575 if (set == NULL_RTX)
10576 return false;
10577
10578 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
10579 return false;
10580
10581 if (REGNO (SET_DEST (set)) != regno)
10582 return false;
10583
10584 return true;
10585 }
10586
10587 /* This value describes the distance to be avoided between an
10588 aritmetic fp instruction and an fp load writing the same register.
10589 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
10590 fine but the exact value has to be avoided. Otherwise the FP
10591 pipeline will throw an exception causing a major penalty. */
10592 #define Z10_EARLYLOAD_DISTANCE 7
10593
10594 /* Rearrange the ready list in order to avoid the situation described
10595 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
10596 moved to the very end of the ready list. */
10597 static void
10598 s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
10599 {
10600 unsigned int regno;
10601 int nready = *nready_p;
10602 rtx tmp;
10603 int i;
10604 rtx insn;
10605 rtx set;
10606 enum attr_type flag;
10607 int distance;
10608
10609 /* Skip DISTANCE - 1 active insns. */
10610 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
10611 distance > 0 && insn != NULL_RTX;
10612 distance--, insn = prev_active_insn (insn))
10613 if (CALL_P (insn) || JUMP_P (insn))
10614 return;
10615
10616 if (insn == NULL_RTX)
10617 return;
10618
10619 set = single_set (insn);
10620
10621 if (set == NULL_RTX || !REG_P (SET_DEST (set))
10622 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
10623 return;
10624
10625 flag = s390_safe_attr_type (insn);
10626
10627 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
10628 return;
10629
10630 regno = REGNO (SET_DEST (set));
10631 i = nready - 1;
10632
10633 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
10634 i--;
10635
10636 if (!i)
10637 return;
10638
10639 tmp = ready[i];
10640 memmove (&ready[1], &ready[0], sizeof (rtx) * i);
10641 ready[0] = tmp;
10642 }
10643
10644
10645 /* The s390_sched_state variable tracks the state of the current or
10646 the last instruction group.
10647
10648 0,1,2 number of instructions scheduled in the current group
10649 3 the last group is complete - normal insns
10650 4 the last group was a cracked/expanded insn */
10651
10652 static int s390_sched_state;
10653
10654 #define S390_OOO_SCHED_STATE_NORMAL 3
10655 #define S390_OOO_SCHED_STATE_CRACKED 4
10656
10657 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
10658 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
10659 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
10660 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
10661
10662 static unsigned int
10663 s390_get_sched_attrmask (rtx insn)
10664 {
10665 unsigned int mask = 0;
10666
10667 if (get_attr_ooo_cracked (insn))
10668 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
10669 if (get_attr_ooo_expanded (insn))
10670 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
10671 if (get_attr_ooo_endgroup (insn))
10672 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
10673 if (get_attr_ooo_groupalone (insn))
10674 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
10675 return mask;
10676 }
10677
10678 /* Return the scheduling score for INSN. The higher the score the
10679 better. The score is calculated from the OOO scheduling attributes
10680 of INSN and the scheduling state s390_sched_state. */
10681 static int
10682 s390_sched_score (rtx insn)
10683 {
10684 unsigned int mask = s390_get_sched_attrmask (insn);
10685 int score = 0;
10686
10687 switch (s390_sched_state)
10688 {
10689 case 0:
10690 /* Try to put insns into the first slot which would otherwise
10691 break a group. */
10692 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10693 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10694 score += 5;
10695 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10696 score += 10;
10697 case 1:
10698 /* Prefer not cracked insns while trying to put together a
10699 group. */
10700 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10701 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10702 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10703 score += 10;
10704 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
10705 score += 5;
10706 break;
10707 case 2:
10708 /* Prefer not cracked insns while trying to put together a
10709 group. */
10710 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10711 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10712 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10713 score += 10;
10714 /* Prefer endgroup insns in the last slot. */
10715 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
10716 score += 10;
10717 break;
10718 case S390_OOO_SCHED_STATE_NORMAL:
10719 /* Prefer not cracked insns if the last was not cracked. */
10720 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10721 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
10722 score += 5;
10723 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10724 score += 10;
10725 break;
10726 case S390_OOO_SCHED_STATE_CRACKED:
10727 /* Try to keep cracked insns together to prevent them from
10728 interrupting groups. */
10729 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10730 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10731 score += 5;
10732 break;
10733 }
10734 return score;
10735 }
10736
10737 /* This function is called via hook TARGET_SCHED_REORDER before
10738 issueing one insn from list READY which contains *NREADYP entries.
10739 For target z10 it reorders load instructions to avoid early load
10740 conflicts in the floating point pipeline */
10741 static int
10742 s390_sched_reorder (FILE *file, int verbose,
10743 rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
10744 {
10745 if (s390_tune == PROCESSOR_2097_Z10)
10746 if (reload_completed && *nreadyp > 1)
10747 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
10748
10749 if (s390_tune == PROCESSOR_2827_ZEC12
10750 && reload_completed
10751 && *nreadyp > 1)
10752 {
10753 int i;
10754 int last_index = *nreadyp - 1;
10755 int max_index = -1;
10756 int max_score = -1;
10757 rtx tmp;
10758
10759 /* Just move the insn with the highest score to the top (the
10760 end) of the list. A full sort is not needed since a conflict
10761 in the hazard recognition cannot happen. So the top insn in
10762 the ready list will always be taken. */
10763 for (i = last_index; i >= 0; i--)
10764 {
10765 int score;
10766
10767 if (recog_memoized (ready[i]) < 0)
10768 continue;
10769
10770 score = s390_sched_score (ready[i]);
10771 if (score > max_score)
10772 {
10773 max_score = score;
10774 max_index = i;
10775 }
10776 }
10777
10778 if (max_index != -1)
10779 {
10780 if (max_index != last_index)
10781 {
10782 tmp = ready[max_index];
10783 ready[max_index] = ready[last_index];
10784 ready[last_index] = tmp;
10785
10786 if (verbose > 5)
10787 fprintf (file,
10788 "move insn %d to the top of list\n",
10789 INSN_UID (ready[last_index]));
10790 }
10791 else if (verbose > 5)
10792 fprintf (file,
10793 "best insn %d already on top\n",
10794 INSN_UID (ready[last_index]));
10795 }
10796
10797 if (verbose > 5)
10798 {
10799 fprintf (file, "ready list ooo attributes - sched state: %d\n",
10800 s390_sched_state);
10801
10802 for (i = last_index; i >= 0; i--)
10803 {
10804 if (recog_memoized (ready[i]) < 0)
10805 continue;
10806 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
10807 s390_sched_score (ready[i]));
10808 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
10809 PRINT_OOO_ATTR (ooo_cracked);
10810 PRINT_OOO_ATTR (ooo_expanded);
10811 PRINT_OOO_ATTR (ooo_endgroup);
10812 PRINT_OOO_ATTR (ooo_groupalone);
10813 #undef PRINT_OOO_ATTR
10814 fprintf (file, "\n");
10815 }
10816 }
10817 }
10818
10819 return s390_issue_rate ();
10820 }
10821
10822
10823 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
10824 the scheduler has issued INSN. It stores the last issued insn into
10825 last_scheduled_insn in order to make it available for
10826 s390_sched_reorder. */
10827 static int
10828 s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
10829 {
10830 last_scheduled_insn = insn;
10831
10832 if (s390_tune == PROCESSOR_2827_ZEC12
10833 && reload_completed
10834 && recog_memoized (insn) >= 0)
10835 {
10836 unsigned int mask = s390_get_sched_attrmask (insn);
10837
10838 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10839 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10840 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
10841 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
10842 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10843 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10844 else
10845 {
10846 /* Only normal insns are left (mask == 0). */
10847 switch (s390_sched_state)
10848 {
10849 case 0:
10850 case 1:
10851 case 2:
10852 case S390_OOO_SCHED_STATE_NORMAL:
10853 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
10854 s390_sched_state = 1;
10855 else
10856 s390_sched_state++;
10857
10858 break;
10859 case S390_OOO_SCHED_STATE_CRACKED:
10860 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10861 break;
10862 }
10863 }
10864 if (verbose > 5)
10865 {
10866 fprintf (file, "insn %d: ", INSN_UID (insn));
10867 #define PRINT_OOO_ATTR(ATTR) \
10868 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
10869 PRINT_OOO_ATTR (ooo_cracked);
10870 PRINT_OOO_ATTR (ooo_expanded);
10871 PRINT_OOO_ATTR (ooo_endgroup);
10872 PRINT_OOO_ATTR (ooo_groupalone);
10873 #undef PRINT_OOO_ATTR
10874 fprintf (file, "\n");
10875 fprintf (file, "sched state: %d\n", s390_sched_state);
10876 }
10877 }
10878
10879 if (GET_CODE (PATTERN (insn)) != USE
10880 && GET_CODE (PATTERN (insn)) != CLOBBER)
10881 return more - 1;
10882 else
10883 return more;
10884 }
10885
10886 static void
10887 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
10888 int verbose ATTRIBUTE_UNUSED,
10889 int max_ready ATTRIBUTE_UNUSED)
10890 {
10891 last_scheduled_insn = NULL_RTX;
10892 s390_sched_state = 0;
10893 }
10894
10895 /* This function checks the whole of insn X for memory references. The
10896 function always returns zero because the framework it is called
10897 from would stop recursively analyzing the insn upon a return value
10898 other than zero. The real result of this function is updating
10899 counter variable MEM_COUNT. */
10900 static int
10901 check_dpu (rtx *x, unsigned *mem_count)
10902 {
10903 if (*x != NULL_RTX && MEM_P (*x))
10904 (*mem_count)++;
10905 return 0;
10906 }
10907
10908 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
10909 a new number struct loop *loop should be unrolled if tuned for cpus with
10910 a built-in stride prefetcher.
10911 The loop is analyzed for memory accesses by calling check_dpu for
10912 each rtx of the loop. Depending on the loop_depth and the amount of
10913 memory accesses a new number <=nunroll is returned to improve the
10914 behaviour of the hardware prefetch unit. */
10915 static unsigned
10916 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
10917 {
10918 basic_block *bbs;
10919 rtx insn;
10920 unsigned i;
10921 unsigned mem_count = 0;
10922
10923 if (s390_tune != PROCESSOR_2097_Z10
10924 && s390_tune != PROCESSOR_2817_Z196
10925 && s390_tune != PROCESSOR_2827_ZEC12)
10926 return nunroll;
10927
10928 /* Count the number of memory references within the loop body. */
10929 bbs = get_loop_body (loop);
10930 for (i = 0; i < loop->num_nodes; i++)
10931 {
10932 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
10933 if (INSN_P (insn) && INSN_CODE (insn) != -1)
10934 for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
10935 }
10936 free (bbs);
10937
10938 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
10939 if (mem_count == 0)
10940 return nunroll;
10941
10942 switch (loop_depth(loop))
10943 {
10944 case 1:
10945 return MIN (nunroll, 28 / mem_count);
10946 case 2:
10947 return MIN (nunroll, 22 / mem_count);
10948 default:
10949 return MIN (nunroll, 16 / mem_count);
10950 }
10951 }
10952
10953 /* Initialize GCC target structure. */
10954
10955 #undef TARGET_ASM_ALIGNED_HI_OP
10956 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10957 #undef TARGET_ASM_ALIGNED_DI_OP
10958 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10959 #undef TARGET_ASM_INTEGER
10960 #define TARGET_ASM_INTEGER s390_assemble_integer
10961
10962 #undef TARGET_ASM_OPEN_PAREN
10963 #define TARGET_ASM_OPEN_PAREN ""
10964
10965 #undef TARGET_ASM_CLOSE_PAREN
10966 #define TARGET_ASM_CLOSE_PAREN ""
10967
10968 #undef TARGET_OPTION_OVERRIDE
10969 #define TARGET_OPTION_OVERRIDE s390_option_override
10970
10971 #undef TARGET_ENCODE_SECTION_INFO
10972 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
10973
10974 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10975 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
10976
10977 #ifdef HAVE_AS_TLS
10978 #undef TARGET_HAVE_TLS
10979 #define TARGET_HAVE_TLS true
10980 #endif
10981 #undef TARGET_CANNOT_FORCE_CONST_MEM
10982 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
10983
10984 #undef TARGET_DELEGITIMIZE_ADDRESS
10985 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
10986
10987 #undef TARGET_LEGITIMIZE_ADDRESS
10988 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
10989
10990 #undef TARGET_RETURN_IN_MEMORY
10991 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
10992
10993 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
10994 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
10995
10996 #undef TARGET_ASM_OUTPUT_MI_THUNK
10997 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
10998 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10999 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11000
11001 #undef TARGET_SCHED_ADJUST_PRIORITY
11002 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
11003 #undef TARGET_SCHED_ISSUE_RATE
11004 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
11005 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11006 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
11007
11008 #undef TARGET_SCHED_VARIABLE_ISSUE
11009 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
11010 #undef TARGET_SCHED_REORDER
11011 #define TARGET_SCHED_REORDER s390_sched_reorder
11012 #undef TARGET_SCHED_INIT
11013 #define TARGET_SCHED_INIT s390_sched_init
11014
11015 #undef TARGET_CANNOT_COPY_INSN_P
11016 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
11017 #undef TARGET_RTX_COSTS
11018 #define TARGET_RTX_COSTS s390_rtx_costs
11019 #undef TARGET_ADDRESS_COST
11020 #define TARGET_ADDRESS_COST s390_address_cost
11021 #undef TARGET_REGISTER_MOVE_COST
11022 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
11023 #undef TARGET_MEMORY_MOVE_COST
11024 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
11025
11026 #undef TARGET_MACHINE_DEPENDENT_REORG
11027 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
11028
11029 #undef TARGET_VALID_POINTER_MODE
11030 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
11031
11032 #undef TARGET_BUILD_BUILTIN_VA_LIST
11033 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
11034 #undef TARGET_EXPAND_BUILTIN_VA_START
11035 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
11036 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11037 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
11038
11039 #undef TARGET_PROMOTE_FUNCTION_MODE
11040 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
11041 #undef TARGET_PASS_BY_REFERENCE
11042 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
11043
11044 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11045 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
11046 #undef TARGET_FUNCTION_ARG
11047 #define TARGET_FUNCTION_ARG s390_function_arg
11048 #undef TARGET_FUNCTION_ARG_ADVANCE
11049 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
11050 #undef TARGET_FUNCTION_VALUE
11051 #define TARGET_FUNCTION_VALUE s390_function_value
11052 #undef TARGET_LIBCALL_VALUE
11053 #define TARGET_LIBCALL_VALUE s390_libcall_value
11054
11055 #undef TARGET_FIXED_CONDITION_CODE_REGS
11056 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
11057
11058 #undef TARGET_CC_MODES_COMPATIBLE
11059 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
11060
11061 #undef TARGET_INVALID_WITHIN_DOLOOP
11062 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
11063
11064 #ifdef HAVE_AS_TLS
11065 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
11066 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
11067 #endif
11068
11069 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11070 #undef TARGET_MANGLE_TYPE
11071 #define TARGET_MANGLE_TYPE s390_mangle_type
11072 #endif
11073
11074 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11075 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
11076
11077 #undef TARGET_PREFERRED_RELOAD_CLASS
11078 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
11079
11080 #undef TARGET_SECONDARY_RELOAD
11081 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
11082
11083 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11084 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
11085
11086 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
11087 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
11088
11089 #undef TARGET_LEGITIMATE_ADDRESS_P
11090 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
11091
11092 #undef TARGET_LEGITIMATE_CONSTANT_P
11093 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
11094
11095 #undef TARGET_CAN_ELIMINATE
11096 #define TARGET_CAN_ELIMINATE s390_can_eliminate
11097
11098 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11099 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
11100
11101 #undef TARGET_LOOP_UNROLL_ADJUST
11102 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
11103
11104 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11105 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
11106 #undef TARGET_TRAMPOLINE_INIT
11107 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
11108
11109 #undef TARGET_UNWIND_WORD_MODE
11110 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
11111
11112 #undef TARGET_CANONICALIZE_COMPARISON
11113 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
11114
11115 struct gcc_target targetm = TARGET_INITIALIZER;
11116
11117 #include "gt-s390.h"