s390.c (s390_register_move_cost): Don't impose the FPR<->GPR move cost penalty if...
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2013 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "reload.h"
42 #include "diagnostic-core.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "debug.h"
48 #include "langhooks.h"
49 #include "optabs.h"
50 #include "gimple.h"
51 #include "df.h"
52 #include "params.h"
53 #include "cfgloop.h"
54 #include "opts.h"
55
56 /* Define the specific costs for a given cpu. */
57
58 struct processor_costs
59 {
60 /* multiplication */
61 const int m; /* cost of an M instruction. */
62 const int mghi; /* cost of an MGHI instruction. */
63 const int mh; /* cost of an MH instruction. */
64 const int mhi; /* cost of an MHI instruction. */
65 const int ml; /* cost of an ML instruction. */
66 const int mr; /* cost of an MR instruction. */
67 const int ms; /* cost of an MS instruction. */
68 const int msg; /* cost of an MSG instruction. */
69 const int msgf; /* cost of an MSGF instruction. */
70 const int msgfr; /* cost of an MSGFR instruction. */
71 const int msgr; /* cost of an MSGR instruction. */
72 const int msr; /* cost of an MSR instruction. */
73 const int mult_df; /* cost of multiplication in DFmode. */
74 const int mxbr;
75 /* square root */
76 const int sqxbr; /* cost of square root in TFmode. */
77 const int sqdbr; /* cost of square root in DFmode. */
78 const int sqebr; /* cost of square root in SFmode. */
79 /* multiply and add */
80 const int madbr; /* cost of multiply and add in DFmode. */
81 const int maebr; /* cost of multiply and add in SFmode. */
82 /* division */
83 const int dxbr;
84 const int ddbr;
85 const int debr;
86 const int dlgr;
87 const int dlr;
88 const int dr;
89 const int dsgfr;
90 const int dsgr;
91 };
92
93 const struct processor_costs *s390_cost;
94
95 static const
96 struct processor_costs z900_cost =
97 {
98 COSTS_N_INSNS (5), /* M */
99 COSTS_N_INSNS (10), /* MGHI */
100 COSTS_N_INSNS (5), /* MH */
101 COSTS_N_INSNS (4), /* MHI */
102 COSTS_N_INSNS (5), /* ML */
103 COSTS_N_INSNS (5), /* MR */
104 COSTS_N_INSNS (4), /* MS */
105 COSTS_N_INSNS (15), /* MSG */
106 COSTS_N_INSNS (7), /* MSGF */
107 COSTS_N_INSNS (7), /* MSGFR */
108 COSTS_N_INSNS (10), /* MSGR */
109 COSTS_N_INSNS (4), /* MSR */
110 COSTS_N_INSNS (7), /* multiplication in DFmode */
111 COSTS_N_INSNS (13), /* MXBR */
112 COSTS_N_INSNS (136), /* SQXBR */
113 COSTS_N_INSNS (44), /* SQDBR */
114 COSTS_N_INSNS (35), /* SQEBR */
115 COSTS_N_INSNS (18), /* MADBR */
116 COSTS_N_INSNS (13), /* MAEBR */
117 COSTS_N_INSNS (134), /* DXBR */
118 COSTS_N_INSNS (30), /* DDBR */
119 COSTS_N_INSNS (27), /* DEBR */
120 COSTS_N_INSNS (220), /* DLGR */
121 COSTS_N_INSNS (34), /* DLR */
122 COSTS_N_INSNS (34), /* DR */
123 COSTS_N_INSNS (32), /* DSGFR */
124 COSTS_N_INSNS (32), /* DSGR */
125 };
126
127 static const
128 struct processor_costs z990_cost =
129 {
130 COSTS_N_INSNS (4), /* M */
131 COSTS_N_INSNS (2), /* MGHI */
132 COSTS_N_INSNS (2), /* MH */
133 COSTS_N_INSNS (2), /* MHI */
134 COSTS_N_INSNS (4), /* ML */
135 COSTS_N_INSNS (4), /* MR */
136 COSTS_N_INSNS (5), /* MS */
137 COSTS_N_INSNS (6), /* MSG */
138 COSTS_N_INSNS (4), /* MSGF */
139 COSTS_N_INSNS (4), /* MSGFR */
140 COSTS_N_INSNS (4), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (1), /* multiplication in DFmode */
143 COSTS_N_INSNS (28), /* MXBR */
144 COSTS_N_INSNS (130), /* SQXBR */
145 COSTS_N_INSNS (66), /* SQDBR */
146 COSTS_N_INSNS (38), /* SQEBR */
147 COSTS_N_INSNS (1), /* MADBR */
148 COSTS_N_INSNS (1), /* MAEBR */
149 COSTS_N_INSNS (60), /* DXBR */
150 COSTS_N_INSNS (40), /* DDBR */
151 COSTS_N_INSNS (26), /* DEBR */
152 COSTS_N_INSNS (176), /* DLGR */
153 COSTS_N_INSNS (31), /* DLR */
154 COSTS_N_INSNS (31), /* DR */
155 COSTS_N_INSNS (31), /* DSGFR */
156 COSTS_N_INSNS (31), /* DSGR */
157 };
158
159 static const
160 struct processor_costs z9_109_cost =
161 {
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (30), /* DLGR */
185 COSTS_N_INSNS (23), /* DLR */
186 COSTS_N_INSNS (23), /* DR */
187 COSTS_N_INSNS (24), /* DSGFR */
188 COSTS_N_INSNS (24), /* DSGR */
189 };
190
191 static const
192 struct processor_costs z10_cost =
193 {
194 COSTS_N_INSNS (10), /* M */
195 COSTS_N_INSNS (10), /* MGHI */
196 COSTS_N_INSNS (10), /* MH */
197 COSTS_N_INSNS (10), /* MHI */
198 COSTS_N_INSNS (10), /* ML */
199 COSTS_N_INSNS (10), /* MR */
200 COSTS_N_INSNS (10), /* MS */
201 COSTS_N_INSNS (10), /* MSG */
202 COSTS_N_INSNS (10), /* MSGF */
203 COSTS_N_INSNS (10), /* MSGFR */
204 COSTS_N_INSNS (10), /* MSGR */
205 COSTS_N_INSNS (10), /* MSR */
206 COSTS_N_INSNS (1) , /* multiplication in DFmode */
207 COSTS_N_INSNS (50), /* MXBR */
208 COSTS_N_INSNS (120), /* SQXBR */
209 COSTS_N_INSNS (52), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (111), /* DXBR */
214 COSTS_N_INSNS (39), /* DDBR */
215 COSTS_N_INSNS (32), /* DEBR */
216 COSTS_N_INSNS (160), /* DLGR */
217 COSTS_N_INSNS (71), /* DLR */
218 COSTS_N_INSNS (71), /* DR */
219 COSTS_N_INSNS (71), /* DSGFR */
220 COSTS_N_INSNS (71), /* DSGR */
221 };
222
223 static const
224 struct processor_costs z196_cost =
225 {
226 COSTS_N_INSNS (7), /* M */
227 COSTS_N_INSNS (5), /* MGHI */
228 COSTS_N_INSNS (5), /* MH */
229 COSTS_N_INSNS (5), /* MHI */
230 COSTS_N_INSNS (7), /* ML */
231 COSTS_N_INSNS (7), /* MR */
232 COSTS_N_INSNS (6), /* MS */
233 COSTS_N_INSNS (8), /* MSG */
234 COSTS_N_INSNS (6), /* MSGF */
235 COSTS_N_INSNS (6), /* MSGFR */
236 COSTS_N_INSNS (8), /* MSGR */
237 COSTS_N_INSNS (6), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (40), /* MXBR B+40 */
240 COSTS_N_INSNS (100), /* SQXBR B+100 */
241 COSTS_N_INSNS (42), /* SQDBR B+42 */
242 COSTS_N_INSNS (28), /* SQEBR B+28 */
243 COSTS_N_INSNS (1), /* MADBR B */
244 COSTS_N_INSNS (1), /* MAEBR B */
245 COSTS_N_INSNS (101), /* DXBR B+101 */
246 COSTS_N_INSNS (29), /* DDBR */
247 COSTS_N_INSNS (22), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR cracked */
249 COSTS_N_INSNS (160), /* DLR cracked */
250 COSTS_N_INSNS (160), /* DR expanded */
251 COSTS_N_INSNS (160), /* DSGFR cracked */
252 COSTS_N_INSNS (160), /* DSGR cracked */
253 };
254
255 static const
256 struct processor_costs zEC12_cost =
257 {
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (131), /* DXBR B+131 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
285 };
286
287 extern int reload_completed;
288
289 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
290 static rtx last_scheduled_insn;
291
292 /* Structure used to hold the components of a S/390 memory
293 address. A legitimate address on S/390 is of the general
294 form
295 base + index + displacement
296 where any of the components is optional.
297
298 base and index are registers of the class ADDR_REGS,
299 displacement is an unsigned 12-bit immediate constant. */
300
301 struct s390_address
302 {
303 rtx base;
304 rtx indx;
305 rtx disp;
306 bool pointer;
307 bool literal_pool;
308 };
309
310 /* The following structure is embedded in the machine
311 specific part of struct function. */
312
313 struct GTY (()) s390_frame_layout
314 {
315 /* Offset within stack frame. */
316 HOST_WIDE_INT gprs_offset;
317 HOST_WIDE_INT f0_offset;
318 HOST_WIDE_INT f4_offset;
319 HOST_WIDE_INT f8_offset;
320 HOST_WIDE_INT backchain_offset;
321
322 /* Number of first and last gpr where slots in the register
323 save area are reserved for. */
324 int first_save_gpr_slot;
325 int last_save_gpr_slot;
326
327 /* Number of first and last gpr to be saved, restored. */
328 int first_save_gpr;
329 int first_restore_gpr;
330 int last_save_gpr;
331 int last_restore_gpr;
332
333 /* Bits standing for floating point registers. Set, if the
334 respective register has to be saved. Starting with reg 16 (f0)
335 at the rightmost bit.
336 Bit 15 - 8 7 6 5 4 3 2 1 0
337 fpr 15 - 8 7 5 3 1 6 4 2 0
338 reg 31 - 24 23 22 21 20 19 18 17 16 */
339 unsigned int fpr_bitmap;
340
341 /* Number of floating point registers f8-f15 which must be saved. */
342 int high_fprs;
343
344 /* Set if return address needs to be saved.
345 This flag is set by s390_return_addr_rtx if it could not use
346 the initial value of r14 and therefore depends on r14 saved
347 to the stack. */
348 bool save_return_addr_p;
349
350 /* Size of stack frame. */
351 HOST_WIDE_INT frame_size;
352 };
353
354 /* Define the structure for the machine field in struct function. */
355
356 struct GTY(()) machine_function
357 {
358 struct s390_frame_layout frame_layout;
359
360 /* Literal pool base register. */
361 rtx base_reg;
362
363 /* True if we may need to perform branch splitting. */
364 bool split_branches_pending_p;
365
366 /* Some local-dynamic TLS symbol name. */
367 const char *some_ld_name;
368
369 bool has_landing_pad_p;
370 };
371
372 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
373
374 #define cfun_frame_layout (cfun->machine->frame_layout)
375 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
376 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
377 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
378 #define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |= \
379 (1 << (BITNUM)))
380 #define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap & \
381 (1 << (BITNUM))))
382
383 /* Number of GPRs and FPRs used for argument passing. */
384 #define GP_ARG_NUM_REG 5
385 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
386
387 /* A couple of shortcuts. */
388 #define CONST_OK_FOR_J(x) \
389 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
390 #define CONST_OK_FOR_K(x) \
391 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
392 #define CONST_OK_FOR_Os(x) \
393 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
394 #define CONST_OK_FOR_Op(x) \
395 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
396 #define CONST_OK_FOR_On(x) \
397 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
398
399 #define REGNO_PAIR_OK(REGNO, MODE) \
400 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
401
402 /* That's the read ahead of the dynamic branch prediction unit in
403 bytes on a z10 (or higher) CPU. */
404 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
405
406 /* Return the alignment for LABEL. We default to the -falign-labels
407 value except for the literal pool base label. */
408 int
409 s390_label_align (rtx label)
410 {
411 rtx prev_insn = prev_active_insn (label);
412
413 if (prev_insn == NULL_RTX)
414 goto old;
415
416 prev_insn = single_set (prev_insn);
417
418 if (prev_insn == NULL_RTX)
419 goto old;
420
421 prev_insn = SET_SRC (prev_insn);
422
423 /* Don't align literal pool base labels. */
424 if (GET_CODE (prev_insn) == UNSPEC
425 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
426 return 0;
427
428 old:
429 return align_labels_log;
430 }
431
432 static enum machine_mode
433 s390_libgcc_cmp_return_mode (void)
434 {
435 return TARGET_64BIT ? DImode : SImode;
436 }
437
438 static enum machine_mode
439 s390_libgcc_shift_count_mode (void)
440 {
441 return TARGET_64BIT ? DImode : SImode;
442 }
443
444 static enum machine_mode
445 s390_unwind_word_mode (void)
446 {
447 return TARGET_64BIT ? DImode : SImode;
448 }
449
450 /* Return true if the back end supports mode MODE. */
451 static bool
452 s390_scalar_mode_supported_p (enum machine_mode mode)
453 {
454 /* In contrast to the default implementation reject TImode constants on 31bit
455 TARGET_ZARCH for ABI compliance. */
456 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
457 return false;
458
459 if (DECIMAL_FLOAT_MODE_P (mode))
460 return default_decimal_float_supported_p ();
461
462 return default_scalar_mode_supported_p (mode);
463 }
464
465 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
466
467 void
468 s390_set_has_landing_pad_p (bool value)
469 {
470 cfun->machine->has_landing_pad_p = value;
471 }
472
473 /* If two condition code modes are compatible, return a condition code
474 mode which is compatible with both. Otherwise, return
475 VOIDmode. */
476
477 static enum machine_mode
478 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
479 {
480 if (m1 == m2)
481 return m1;
482
483 switch (m1)
484 {
485 case CCZmode:
486 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
487 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
488 return m2;
489 return VOIDmode;
490
491 case CCSmode:
492 case CCUmode:
493 case CCTmode:
494 case CCSRmode:
495 case CCURmode:
496 case CCZ1mode:
497 if (m2 == CCZmode)
498 return m1;
499
500 return VOIDmode;
501
502 default:
503 return VOIDmode;
504 }
505 return VOIDmode;
506 }
507
508 /* Return true if SET either doesn't set the CC register, or else
509 the source and destination have matching CC modes and that
510 CC mode is at least as constrained as REQ_MODE. */
511
512 static bool
513 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
514 {
515 enum machine_mode set_mode;
516
517 gcc_assert (GET_CODE (set) == SET);
518
519 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
520 return 1;
521
522 set_mode = GET_MODE (SET_DEST (set));
523 switch (set_mode)
524 {
525 case CCSmode:
526 case CCSRmode:
527 case CCUmode:
528 case CCURmode:
529 case CCLmode:
530 case CCL1mode:
531 case CCL2mode:
532 case CCL3mode:
533 case CCT1mode:
534 case CCT2mode:
535 case CCT3mode:
536 if (req_mode != set_mode)
537 return 0;
538 break;
539
540 case CCZmode:
541 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
542 && req_mode != CCSRmode && req_mode != CCURmode)
543 return 0;
544 break;
545
546 case CCAPmode:
547 case CCANmode:
548 if (req_mode != CCAmode)
549 return 0;
550 break;
551
552 default:
553 gcc_unreachable ();
554 }
555
556 return (GET_MODE (SET_SRC (set)) == set_mode);
557 }
558
559 /* Return true if every SET in INSN that sets the CC register
560 has source and destination with matching CC modes and that
561 CC mode is at least as constrained as REQ_MODE.
562 If REQ_MODE is VOIDmode, always return false. */
563
564 bool
565 s390_match_ccmode (rtx insn, enum machine_mode req_mode)
566 {
567 int i;
568
569 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
570 if (req_mode == VOIDmode)
571 return false;
572
573 if (GET_CODE (PATTERN (insn)) == SET)
574 return s390_match_ccmode_set (PATTERN (insn), req_mode);
575
576 if (GET_CODE (PATTERN (insn)) == PARALLEL)
577 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
578 {
579 rtx set = XVECEXP (PATTERN (insn), 0, i);
580 if (GET_CODE (set) == SET)
581 if (!s390_match_ccmode_set (set, req_mode))
582 return false;
583 }
584
585 return true;
586 }
587
588 /* If a test-under-mask instruction can be used to implement
589 (compare (and ... OP1) OP2), return the CC mode required
590 to do that. Otherwise, return VOIDmode.
591 MIXED is true if the instruction can distinguish between
592 CC1 and CC2 for mixed selected bits (TMxx), it is false
593 if the instruction cannot (TM). */
594
595 enum machine_mode
596 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
597 {
598 int bit0, bit1;
599
600 /* ??? Fixme: should work on CONST_DOUBLE as well. */
601 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
602 return VOIDmode;
603
604 /* Selected bits all zero: CC0.
605 e.g.: int a; if ((a & (16 + 128)) == 0) */
606 if (INTVAL (op2) == 0)
607 return CCTmode;
608
609 /* Selected bits all one: CC3.
610 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
611 if (INTVAL (op2) == INTVAL (op1))
612 return CCT3mode;
613
614 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
615 int a;
616 if ((a & (16 + 128)) == 16) -> CCT1
617 if ((a & (16 + 128)) == 128) -> CCT2 */
618 if (mixed)
619 {
620 bit1 = exact_log2 (INTVAL (op2));
621 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
622 if (bit0 != -1 && bit1 != -1)
623 return bit0 > bit1 ? CCT1mode : CCT2mode;
624 }
625
626 return VOIDmode;
627 }
628
629 /* Given a comparison code OP (EQ, NE, etc.) and the operands
630 OP0 and OP1 of a COMPARE, return the mode to be used for the
631 comparison. */
632
633 enum machine_mode
634 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
635 {
636 switch (code)
637 {
638 case EQ:
639 case NE:
640 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
641 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
642 return CCAPmode;
643 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
644 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
645 return CCAPmode;
646 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
647 || GET_CODE (op1) == NEG)
648 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
649 return CCLmode;
650
651 if (GET_CODE (op0) == AND)
652 {
653 /* Check whether we can potentially do it via TM. */
654 enum machine_mode ccmode;
655 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
656 if (ccmode != VOIDmode)
657 {
658 /* Relax CCTmode to CCZmode to allow fall-back to AND
659 if that turns out to be beneficial. */
660 return ccmode == CCTmode ? CCZmode : ccmode;
661 }
662 }
663
664 if (register_operand (op0, HImode)
665 && GET_CODE (op1) == CONST_INT
666 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
667 return CCT3mode;
668 if (register_operand (op0, QImode)
669 && GET_CODE (op1) == CONST_INT
670 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
671 return CCT3mode;
672
673 return CCZmode;
674
675 case LE:
676 case LT:
677 case GE:
678 case GT:
679 /* The only overflow condition of NEG and ABS happens when
680 -INT_MAX is used as parameter, which stays negative. So
681 we have an overflow from a positive value to a negative.
682 Using CCAP mode the resulting cc can be used for comparisons. */
683 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
684 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
685 return CCAPmode;
686
687 /* If constants are involved in an add instruction it is possible to use
688 the resulting cc for comparisons with zero. Knowing the sign of the
689 constant the overflow behavior gets predictable. e.g.:
690 int a, b; if ((b = a + c) > 0)
691 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
692 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
693 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
694 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
695 /* Avoid INT32_MIN on 32 bit. */
696 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
697 {
698 if (INTVAL (XEXP((op0), 1)) < 0)
699 return CCANmode;
700 else
701 return CCAPmode;
702 }
703 /* Fall through. */
704 case UNORDERED:
705 case ORDERED:
706 case UNEQ:
707 case UNLE:
708 case UNLT:
709 case UNGE:
710 case UNGT:
711 case LTGT:
712 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
713 && GET_CODE (op1) != CONST_INT)
714 return CCSRmode;
715 return CCSmode;
716
717 case LTU:
718 case GEU:
719 if (GET_CODE (op0) == PLUS
720 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
721 return CCL1mode;
722
723 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
724 && GET_CODE (op1) != CONST_INT)
725 return CCURmode;
726 return CCUmode;
727
728 case LEU:
729 case GTU:
730 if (GET_CODE (op0) == MINUS
731 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
732 return CCL2mode;
733
734 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
735 && GET_CODE (op1) != CONST_INT)
736 return CCURmode;
737 return CCUmode;
738
739 default:
740 gcc_unreachable ();
741 }
742 }
743
744 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
745 that we can implement more efficiently. */
746
747 static void
748 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
749 bool op0_preserve_value)
750 {
751 if (op0_preserve_value)
752 return;
753
754 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
755 if ((*code == EQ || *code == NE)
756 && *op1 == const0_rtx
757 && GET_CODE (*op0) == ZERO_EXTRACT
758 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
759 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
760 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
761 {
762 rtx inner = XEXP (*op0, 0);
763 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
764 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
765 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
766
767 if (len > 0 && len < modesize
768 && pos >= 0 && pos + len <= modesize
769 && modesize <= HOST_BITS_PER_WIDE_INT)
770 {
771 unsigned HOST_WIDE_INT block;
772 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
773 block <<= modesize - pos - len;
774
775 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
776 gen_int_mode (block, GET_MODE (inner)));
777 }
778 }
779
780 /* Narrow AND of memory against immediate to enable TM. */
781 if ((*code == EQ || *code == NE)
782 && *op1 == const0_rtx
783 && GET_CODE (*op0) == AND
784 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
785 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
786 {
787 rtx inner = XEXP (*op0, 0);
788 rtx mask = XEXP (*op0, 1);
789
790 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
791 if (GET_CODE (inner) == SUBREG
792 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
793 && (GET_MODE_SIZE (GET_MODE (inner))
794 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
795 && ((INTVAL (mask)
796 & GET_MODE_MASK (GET_MODE (inner))
797 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
798 == 0))
799 inner = SUBREG_REG (inner);
800
801 /* Do not change volatile MEMs. */
802 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
803 {
804 int part = s390_single_part (XEXP (*op0, 1),
805 GET_MODE (inner), QImode, 0);
806 if (part >= 0)
807 {
808 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
809 inner = adjust_address_nv (inner, QImode, part);
810 *op0 = gen_rtx_AND (QImode, inner, mask);
811 }
812 }
813 }
814
815 /* Narrow comparisons against 0xffff to HImode if possible. */
816 if ((*code == EQ || *code == NE)
817 && GET_CODE (*op1) == CONST_INT
818 && INTVAL (*op1) == 0xffff
819 && SCALAR_INT_MODE_P (GET_MODE (*op0))
820 && (nonzero_bits (*op0, GET_MODE (*op0))
821 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
822 {
823 *op0 = gen_lowpart (HImode, *op0);
824 *op1 = constm1_rtx;
825 }
826
827 /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */
828 if (GET_CODE (*op0) == UNSPEC
829 && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
830 && XVECLEN (*op0, 0) == 1
831 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
832 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
833 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
834 && *op1 == const0_rtx)
835 {
836 enum rtx_code new_code = UNKNOWN;
837 switch (*code)
838 {
839 case EQ: new_code = EQ; break;
840 case NE: new_code = NE; break;
841 case LT: new_code = GTU; break;
842 case GT: new_code = LTU; break;
843 case LE: new_code = GEU; break;
844 case GE: new_code = LEU; break;
845 default: break;
846 }
847
848 if (new_code != UNKNOWN)
849 {
850 *op0 = XVECEXP (*op0, 0, 0);
851 *code = new_code;
852 }
853 }
854
855 /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */
856 if (GET_CODE (*op0) == UNSPEC
857 && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
858 && XVECLEN (*op0, 0) == 1
859 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
860 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
861 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
862 && *op1 == const0_rtx)
863 {
864 enum rtx_code new_code = UNKNOWN;
865 switch (*code)
866 {
867 case EQ: new_code = EQ; break;
868 case NE: new_code = NE; break;
869 default: break;
870 }
871
872 if (new_code != UNKNOWN)
873 {
874 *op0 = XVECEXP (*op0, 0, 0);
875 *code = new_code;
876 }
877 }
878
879 /* Simplify cascaded EQ, NE with const0_rtx. */
880 if ((*code == NE || *code == EQ)
881 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
882 && GET_MODE (*op0) == SImode
883 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
884 && REG_P (XEXP (*op0, 0))
885 && XEXP (*op0, 1) == const0_rtx
886 && *op1 == const0_rtx)
887 {
888 if ((*code == EQ && GET_CODE (*op0) == NE)
889 || (*code == NE && GET_CODE (*op0) == EQ))
890 *code = EQ;
891 else
892 *code = NE;
893 *op0 = XEXP (*op0, 0);
894 }
895
896 /* Prefer register over memory as first operand. */
897 if (MEM_P (*op0) && REG_P (*op1))
898 {
899 rtx tem = *op0; *op0 = *op1; *op1 = tem;
900 *code = (int)swap_condition ((enum rtx_code)*code);
901 }
902 }
903
904 /* Emit a compare instruction suitable to implement the comparison
905 OP0 CODE OP1. Return the correct condition RTL to be placed in
906 the IF_THEN_ELSE of the conditional branch testing the result. */
907
908 rtx
909 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
910 {
911 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
912 rtx cc;
913
914 /* Do not output a redundant compare instruction if a compare_and_swap
915 pattern already computed the result and the machine modes are compatible. */
916 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
917 {
918 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
919 == GET_MODE (op0));
920 cc = op0;
921 }
922 else
923 {
924 cc = gen_rtx_REG (mode, CC_REGNUM);
925 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
926 }
927
928 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
929 }
930
931 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
932 matches CMP.
933 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
934 conditional branch testing the result. */
935
936 static rtx
937 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
938 rtx cmp, rtx new_rtx)
939 {
940 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
941 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
942 const0_rtx);
943 }
944
945 /* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
946 unconditional jump, else a conditional jump under condition COND. */
947
948 void
949 s390_emit_jump (rtx target, rtx cond)
950 {
951 rtx insn;
952
953 target = gen_rtx_LABEL_REF (VOIDmode, target);
954 if (cond)
955 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
956
957 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
958 emit_jump_insn (insn);
959 }
960
961 /* Return branch condition mask to implement a branch
962 specified by CODE. Return -1 for invalid comparisons. */
963
964 int
965 s390_branch_condition_mask (rtx code)
966 {
967 const int CC0 = 1 << 3;
968 const int CC1 = 1 << 2;
969 const int CC2 = 1 << 1;
970 const int CC3 = 1 << 0;
971
972 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
973 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
974 gcc_assert (XEXP (code, 1) == const0_rtx);
975
976 switch (GET_MODE (XEXP (code, 0)))
977 {
978 case CCZmode:
979 case CCZ1mode:
980 switch (GET_CODE (code))
981 {
982 case EQ: return CC0;
983 case NE: return CC1 | CC2 | CC3;
984 default: return -1;
985 }
986 break;
987
988 case CCT1mode:
989 switch (GET_CODE (code))
990 {
991 case EQ: return CC1;
992 case NE: return CC0 | CC2 | CC3;
993 default: return -1;
994 }
995 break;
996
997 case CCT2mode:
998 switch (GET_CODE (code))
999 {
1000 case EQ: return CC2;
1001 case NE: return CC0 | CC1 | CC3;
1002 default: return -1;
1003 }
1004 break;
1005
1006 case CCT3mode:
1007 switch (GET_CODE (code))
1008 {
1009 case EQ: return CC3;
1010 case NE: return CC0 | CC1 | CC2;
1011 default: return -1;
1012 }
1013 break;
1014
1015 case CCLmode:
1016 switch (GET_CODE (code))
1017 {
1018 case EQ: return CC0 | CC2;
1019 case NE: return CC1 | CC3;
1020 default: return -1;
1021 }
1022 break;
1023
1024 case CCL1mode:
1025 switch (GET_CODE (code))
1026 {
1027 case LTU: return CC2 | CC3; /* carry */
1028 case GEU: return CC0 | CC1; /* no carry */
1029 default: return -1;
1030 }
1031 break;
1032
1033 case CCL2mode:
1034 switch (GET_CODE (code))
1035 {
1036 case GTU: return CC0 | CC1; /* borrow */
1037 case LEU: return CC2 | CC3; /* no borrow */
1038 default: return -1;
1039 }
1040 break;
1041
1042 case CCL3mode:
1043 switch (GET_CODE (code))
1044 {
1045 case EQ: return CC0 | CC2;
1046 case NE: return CC1 | CC3;
1047 case LTU: return CC1;
1048 case GTU: return CC3;
1049 case LEU: return CC1 | CC2;
1050 case GEU: return CC2 | CC3;
1051 default: return -1;
1052 }
1053
1054 case CCUmode:
1055 switch (GET_CODE (code))
1056 {
1057 case EQ: return CC0;
1058 case NE: return CC1 | CC2 | CC3;
1059 case LTU: return CC1;
1060 case GTU: return CC2;
1061 case LEU: return CC0 | CC1;
1062 case GEU: return CC0 | CC2;
1063 default: return -1;
1064 }
1065 break;
1066
1067 case CCURmode:
1068 switch (GET_CODE (code))
1069 {
1070 case EQ: return CC0;
1071 case NE: return CC2 | CC1 | CC3;
1072 case LTU: return CC2;
1073 case GTU: return CC1;
1074 case LEU: return CC0 | CC2;
1075 case GEU: return CC0 | CC1;
1076 default: return -1;
1077 }
1078 break;
1079
1080 case CCAPmode:
1081 switch (GET_CODE (code))
1082 {
1083 case EQ: return CC0;
1084 case NE: return CC1 | CC2 | CC3;
1085 case LT: return CC1 | CC3;
1086 case GT: return CC2;
1087 case LE: return CC0 | CC1 | CC3;
1088 case GE: return CC0 | CC2;
1089 default: return -1;
1090 }
1091 break;
1092
1093 case CCANmode:
1094 switch (GET_CODE (code))
1095 {
1096 case EQ: return CC0;
1097 case NE: return CC1 | CC2 | CC3;
1098 case LT: return CC1;
1099 case GT: return CC2 | CC3;
1100 case LE: return CC0 | CC1;
1101 case GE: return CC0 | CC2 | CC3;
1102 default: return -1;
1103 }
1104 break;
1105
1106 case CCSmode:
1107 switch (GET_CODE (code))
1108 {
1109 case EQ: return CC0;
1110 case NE: return CC1 | CC2 | CC3;
1111 case LT: return CC1;
1112 case GT: return CC2;
1113 case LE: return CC0 | CC1;
1114 case GE: return CC0 | CC2;
1115 case UNORDERED: return CC3;
1116 case ORDERED: return CC0 | CC1 | CC2;
1117 case UNEQ: return CC0 | CC3;
1118 case UNLT: return CC1 | CC3;
1119 case UNGT: return CC2 | CC3;
1120 case UNLE: return CC0 | CC1 | CC3;
1121 case UNGE: return CC0 | CC2 | CC3;
1122 case LTGT: return CC1 | CC2;
1123 default: return -1;
1124 }
1125 break;
1126
1127 case CCSRmode:
1128 switch (GET_CODE (code))
1129 {
1130 case EQ: return CC0;
1131 case NE: return CC2 | CC1 | CC3;
1132 case LT: return CC2;
1133 case GT: return CC1;
1134 case LE: return CC0 | CC2;
1135 case GE: return CC0 | CC1;
1136 case UNORDERED: return CC3;
1137 case ORDERED: return CC0 | CC2 | CC1;
1138 case UNEQ: return CC0 | CC3;
1139 case UNLT: return CC2 | CC3;
1140 case UNGT: return CC1 | CC3;
1141 case UNLE: return CC0 | CC2 | CC3;
1142 case UNGE: return CC0 | CC1 | CC3;
1143 case LTGT: return CC2 | CC1;
1144 default: return -1;
1145 }
1146 break;
1147
1148 default:
1149 return -1;
1150 }
1151 }
1152
1153
1154 /* Return branch condition mask to implement a compare and branch
1155 specified by CODE. Return -1 for invalid comparisons. */
1156
1157 int
1158 s390_compare_and_branch_condition_mask (rtx code)
1159 {
1160 const int CC0 = 1 << 3;
1161 const int CC1 = 1 << 2;
1162 const int CC2 = 1 << 1;
1163
1164 switch (GET_CODE (code))
1165 {
1166 case EQ:
1167 return CC0;
1168 case NE:
1169 return CC1 | CC2;
1170 case LT:
1171 case LTU:
1172 return CC1;
1173 case GT:
1174 case GTU:
1175 return CC2;
1176 case LE:
1177 case LEU:
1178 return CC0 | CC1;
1179 case GE:
1180 case GEU:
1181 return CC0 | CC2;
1182 default:
1183 gcc_unreachable ();
1184 }
1185 return -1;
1186 }
1187
1188 /* If INV is false, return assembler mnemonic string to implement
1189 a branch specified by CODE. If INV is true, return mnemonic
1190 for the corresponding inverted branch. */
1191
1192 static const char *
1193 s390_branch_condition_mnemonic (rtx code, int inv)
1194 {
1195 int mask;
1196
1197 static const char *const mnemonic[16] =
1198 {
1199 NULL, "o", "h", "nle",
1200 "l", "nhe", "lh", "ne",
1201 "e", "nlh", "he", "nl",
1202 "le", "nh", "no", NULL
1203 };
1204
1205 if (GET_CODE (XEXP (code, 0)) == REG
1206 && REGNO (XEXP (code, 0)) == CC_REGNUM
1207 && XEXP (code, 1) == const0_rtx)
1208 mask = s390_branch_condition_mask (code);
1209 else
1210 mask = s390_compare_and_branch_condition_mask (code);
1211
1212 gcc_assert (mask >= 0);
1213
1214 if (inv)
1215 mask ^= 15;
1216
1217 gcc_assert (mask >= 1 && mask <= 14);
1218
1219 return mnemonic[mask];
1220 }
1221
1222 /* Return the part of op which has a value different from def.
1223 The size of the part is determined by mode.
1224 Use this function only if you already know that op really
1225 contains such a part. */
1226
1227 unsigned HOST_WIDE_INT
1228 s390_extract_part (rtx op, enum machine_mode mode, int def)
1229 {
1230 unsigned HOST_WIDE_INT value = 0;
1231 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1232 int part_bits = GET_MODE_BITSIZE (mode);
1233 unsigned HOST_WIDE_INT part_mask
1234 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1235 int i;
1236
1237 for (i = 0; i < max_parts; i++)
1238 {
1239 if (i == 0)
1240 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1241 else
1242 value >>= part_bits;
1243
1244 if ((value & part_mask) != (def & part_mask))
1245 return value & part_mask;
1246 }
1247
1248 gcc_unreachable ();
1249 }
1250
1251 /* If OP is an integer constant of mode MODE with exactly one
1252 part of mode PART_MODE unequal to DEF, return the number of that
1253 part. Otherwise, return -1. */
1254
1255 int
1256 s390_single_part (rtx op,
1257 enum machine_mode mode,
1258 enum machine_mode part_mode,
1259 int def)
1260 {
1261 unsigned HOST_WIDE_INT value = 0;
1262 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1263 unsigned HOST_WIDE_INT part_mask
1264 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1265 int i, part = -1;
1266
1267 if (GET_CODE (op) != CONST_INT)
1268 return -1;
1269
1270 for (i = 0; i < n_parts; i++)
1271 {
1272 if (i == 0)
1273 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1274 else
1275 value >>= GET_MODE_BITSIZE (part_mode);
1276
1277 if ((value & part_mask) != (def & part_mask))
1278 {
1279 if (part != -1)
1280 return -1;
1281 else
1282 part = i;
1283 }
1284 }
1285 return part == -1 ? -1 : n_parts - 1 - part;
1286 }
1287
1288 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1289 bits and no other bits are set in IN. POS and LENGTH can be used
1290 to obtain the start position and the length of the bitfield.
1291
1292 POS gives the position of the first bit of the bitfield counting
1293 from the lowest order bit starting with zero. In order to use this
1294 value for S/390 instructions this has to be converted to "bits big
1295 endian" style. */
1296
1297 bool
1298 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1299 int *pos, int *length)
1300 {
1301 int tmp_pos = 0;
1302 int tmp_length = 0;
1303 int i;
1304 unsigned HOST_WIDE_INT mask = 1ULL;
1305 bool contiguous = false;
1306
1307 for (i = 0; i < size; mask <<= 1, i++)
1308 {
1309 if (contiguous)
1310 {
1311 if (mask & in)
1312 tmp_length++;
1313 else
1314 break;
1315 }
1316 else
1317 {
1318 if (mask & in)
1319 {
1320 contiguous = true;
1321 tmp_length++;
1322 }
1323 else
1324 tmp_pos++;
1325 }
1326 }
1327
1328 if (!tmp_length)
1329 return false;
1330
1331 /* Calculate a mask for all bits beyond the contiguous bits. */
1332 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1333
1334 if (mask & in)
1335 return false;
1336
1337 if (tmp_length + tmp_pos - 1 > size)
1338 return false;
1339
1340 if (length)
1341 *length = tmp_length;
1342
1343 if (pos)
1344 *pos = tmp_pos;
1345
1346 return true;
1347 }
1348
1349 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1350 equivalent to a shift followed by the AND. In particular, CONTIG
1351 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1352 for ROTL indicate a rotate to the right. */
1353
1354 bool
1355 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1356 {
1357 int pos, len;
1358 bool ok;
1359
1360 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1361 gcc_assert (ok);
1362
1363 return ((rotl >= 0 && rotl <= pos)
1364 || (rotl < 0 && -rotl <= bitsize - len - pos));
1365 }
1366
1367 /* Check whether we can (and want to) split a double-word
1368 move in mode MODE from SRC to DST into two single-word
1369 moves, moving the subword FIRST_SUBWORD first. */
1370
1371 bool
1372 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1373 {
1374 /* Floating point registers cannot be split. */
1375 if (FP_REG_P (src) || FP_REG_P (dst))
1376 return false;
1377
1378 /* We don't need to split if operands are directly accessible. */
1379 if (s_operand (src, mode) || s_operand (dst, mode))
1380 return false;
1381
1382 /* Non-offsettable memory references cannot be split. */
1383 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1384 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1385 return false;
1386
1387 /* Moving the first subword must not clobber a register
1388 needed to move the second subword. */
1389 if (register_operand (dst, mode))
1390 {
1391 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1392 if (reg_overlap_mentioned_p (subreg, src))
1393 return false;
1394 }
1395
1396 return true;
1397 }
1398
1399 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1400 and [MEM2, MEM2 + SIZE] do overlap and false
1401 otherwise. */
1402
1403 bool
1404 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1405 {
1406 rtx addr1, addr2, addr_delta;
1407 HOST_WIDE_INT delta;
1408
1409 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1410 return true;
1411
1412 if (size == 0)
1413 return false;
1414
1415 addr1 = XEXP (mem1, 0);
1416 addr2 = XEXP (mem2, 0);
1417
1418 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1419
1420 /* This overlapping check is used by peepholes merging memory block operations.
1421 Overlapping operations would otherwise be recognized by the S/390 hardware
1422 and would fall back to a slower implementation. Allowing overlapping
1423 operations would lead to slow code but not to wrong code. Therefore we are
1424 somewhat optimistic if we cannot prove that the memory blocks are
1425 overlapping.
1426 That's why we return false here although this may accept operations on
1427 overlapping memory areas. */
1428 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1429 return false;
1430
1431 delta = INTVAL (addr_delta);
1432
1433 if (delta == 0
1434 || (delta > 0 && delta < size)
1435 || (delta < 0 && -delta < size))
1436 return true;
1437
1438 return false;
1439 }
1440
1441 /* Check whether the address of memory reference MEM2 equals exactly
1442 the address of memory reference MEM1 plus DELTA. Return true if
1443 we can prove this to be the case, false otherwise. */
1444
1445 bool
1446 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1447 {
1448 rtx addr1, addr2, addr_delta;
1449
1450 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1451 return false;
1452
1453 addr1 = XEXP (mem1, 0);
1454 addr2 = XEXP (mem2, 0);
1455
1456 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1457 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1458 return false;
1459
1460 return true;
1461 }
1462
1463 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1464
1465 void
1466 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1467 rtx *operands)
1468 {
1469 enum machine_mode wmode = mode;
1470 rtx dst = operands[0];
1471 rtx src1 = operands[1];
1472 rtx src2 = operands[2];
1473 rtx op, clob, tem;
1474
1475 /* If we cannot handle the operation directly, use a temp register. */
1476 if (!s390_logical_operator_ok_p (operands))
1477 dst = gen_reg_rtx (mode);
1478
1479 /* QImode and HImode patterns make sense only if we have a destination
1480 in memory. Otherwise perform the operation in SImode. */
1481 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1482 wmode = SImode;
1483
1484 /* Widen operands if required. */
1485 if (mode != wmode)
1486 {
1487 if (GET_CODE (dst) == SUBREG
1488 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1489 dst = tem;
1490 else if (REG_P (dst))
1491 dst = gen_rtx_SUBREG (wmode, dst, 0);
1492 else
1493 dst = gen_reg_rtx (wmode);
1494
1495 if (GET_CODE (src1) == SUBREG
1496 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1497 src1 = tem;
1498 else if (GET_MODE (src1) != VOIDmode)
1499 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1500
1501 if (GET_CODE (src2) == SUBREG
1502 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1503 src2 = tem;
1504 else if (GET_MODE (src2) != VOIDmode)
1505 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1506 }
1507
1508 /* Emit the instruction. */
1509 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1510 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1511 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1512
1513 /* Fix up the destination if needed. */
1514 if (dst != operands[0])
1515 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1516 }
1517
1518 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1519
1520 bool
1521 s390_logical_operator_ok_p (rtx *operands)
1522 {
1523 /* If the destination operand is in memory, it needs to coincide
1524 with one of the source operands. After reload, it has to be
1525 the first source operand. */
1526 if (GET_CODE (operands[0]) == MEM)
1527 return rtx_equal_p (operands[0], operands[1])
1528 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1529
1530 return true;
1531 }
1532
1533 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1534 operand IMMOP to switch from SS to SI type instructions. */
1535
1536 void
1537 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1538 {
1539 int def = code == AND ? -1 : 0;
1540 HOST_WIDE_INT mask;
1541 int part;
1542
1543 gcc_assert (GET_CODE (*memop) == MEM);
1544 gcc_assert (!MEM_VOLATILE_P (*memop));
1545
1546 mask = s390_extract_part (*immop, QImode, def);
1547 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1548 gcc_assert (part >= 0);
1549
1550 *memop = adjust_address (*memop, QImode, part);
1551 *immop = gen_int_mode (mask, QImode);
1552 }
1553
1554
1555 /* How to allocate a 'struct machine_function'. */
1556
1557 static struct machine_function *
1558 s390_init_machine_status (void)
1559 {
1560 return ggc_alloc_cleared_machine_function ();
1561 }
1562
1563 static void
1564 s390_option_override (void)
1565 {
1566 /* Set up function hooks. */
1567 init_machine_status = s390_init_machine_status;
1568
1569 /* Architecture mode defaults according to ABI. */
1570 if (!(target_flags_explicit & MASK_ZARCH))
1571 {
1572 if (TARGET_64BIT)
1573 target_flags |= MASK_ZARCH;
1574 else
1575 target_flags &= ~MASK_ZARCH;
1576 }
1577
1578 /* Set the march default in case it hasn't been specified on
1579 cmdline. */
1580 if (s390_arch == PROCESSOR_max)
1581 {
1582 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
1583 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
1584 s390_arch_flags = processor_flags_table[(int)s390_arch];
1585 }
1586
1587 /* Determine processor to tune for. */
1588 if (s390_tune == PROCESSOR_max)
1589 {
1590 s390_tune = s390_arch;
1591 s390_tune_flags = s390_arch_flags;
1592 }
1593
1594 /* Sanity checks. */
1595 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
1596 error ("z/Architecture mode not supported on %s", s390_arch_string);
1597 if (TARGET_64BIT && !TARGET_ZARCH)
1598 error ("64-bit ABI not supported in ESA/390 mode");
1599
1600 /* Use hardware DFP if available and not explicitly disabled by
1601 user. E.g. with -m31 -march=z10 -mzarch */
1602 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
1603 target_flags |= MASK_HARD_DFP;
1604
1605 if (TARGET_HARD_DFP && !TARGET_DFP)
1606 {
1607 if (target_flags_explicit & MASK_HARD_DFP)
1608 {
1609 if (!TARGET_CPU_DFP)
1610 error ("hardware decimal floating point instructions"
1611 " not available on %s", s390_arch_string);
1612 if (!TARGET_ZARCH)
1613 error ("hardware decimal floating point instructions"
1614 " not available in ESA/390 mode");
1615 }
1616 else
1617 target_flags &= ~MASK_HARD_DFP;
1618 }
1619
1620 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
1621 {
1622 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
1623 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
1624
1625 target_flags &= ~MASK_HARD_DFP;
1626 }
1627
1628 /* Set processor cost function. */
1629 switch (s390_tune)
1630 {
1631 case PROCESSOR_2084_Z990:
1632 s390_cost = &z990_cost;
1633 break;
1634 case PROCESSOR_2094_Z9_109:
1635 s390_cost = &z9_109_cost;
1636 break;
1637 case PROCESSOR_2097_Z10:
1638 s390_cost = &z10_cost;
1639 break;
1640 case PROCESSOR_2817_Z196:
1641 s390_cost = &z196_cost;
1642 break;
1643 case PROCESSOR_2827_ZEC12:
1644 s390_cost = &zEC12_cost;
1645 break;
1646 default:
1647 s390_cost = &z900_cost;
1648 }
1649
1650 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
1651 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
1652 "in combination");
1653
1654 if (s390_stack_size)
1655 {
1656 if (s390_stack_guard >= s390_stack_size)
1657 error ("stack size must be greater than the stack guard value");
1658 else if (s390_stack_size > 1 << 16)
1659 error ("stack size must not be greater than 64k");
1660 }
1661 else if (s390_stack_guard)
1662 error ("-mstack-guard implies use of -mstack-size");
1663
1664 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1665 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1666 target_flags |= MASK_LONG_DOUBLE_128;
1667 #endif
1668
1669 if (s390_tune == PROCESSOR_2097_Z10
1670 || s390_tune == PROCESSOR_2817_Z196
1671 || s390_tune == PROCESSOR_2827_ZEC12)
1672 {
1673 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
1674 global_options.x_param_values,
1675 global_options_set.x_param_values);
1676 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
1677 global_options.x_param_values,
1678 global_options_set.x_param_values);
1679 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
1680 global_options.x_param_values,
1681 global_options_set.x_param_values);
1682 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
1683 global_options.x_param_values,
1684 global_options_set.x_param_values);
1685 }
1686
1687 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
1688 global_options.x_param_values,
1689 global_options_set.x_param_values);
1690 /* values for loop prefetching */
1691 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
1692 global_options.x_param_values,
1693 global_options_set.x_param_values);
1694 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
1695 global_options.x_param_values,
1696 global_options_set.x_param_values);
1697 /* s390 has more than 2 levels and the size is much larger. Since
1698 we are always running virtualized assume that we only get a small
1699 part of the caches above l1. */
1700 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
1701 global_options.x_param_values,
1702 global_options_set.x_param_values);
1703 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
1704 global_options.x_param_values,
1705 global_options_set.x_param_values);
1706 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
1707 global_options.x_param_values,
1708 global_options_set.x_param_values);
1709
1710 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
1711 requires the arch flags to be evaluated already. Since prefetching
1712 is beneficial on s390, we enable it if available. */
1713 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
1714 flag_prefetch_loop_arrays = 1;
1715
1716 /* Use the alternative scheduling-pressure algorithm by default. */
1717 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
1718 global_options.x_param_values,
1719 global_options_set.x_param_values);
1720
1721 if (TARGET_TPF)
1722 {
1723 /* Don't emit DWARF3/4 unless specifically selected. The TPF
1724 debuggers do not yet support DWARF 3/4. */
1725 if (!global_options_set.x_dwarf_strict)
1726 dwarf_strict = 1;
1727 if (!global_options_set.x_dwarf_version)
1728 dwarf_version = 2;
1729 }
1730 }
1731
1732 /* Map for smallest class containing reg regno. */
1733
1734 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1735 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1736 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1737 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1738 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1739 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1740 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1741 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1742 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1743 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1744 ACCESS_REGS, ACCESS_REGS
1745 };
1746
1747 /* Return attribute type of insn. */
1748
1749 static enum attr_type
1750 s390_safe_attr_type (rtx insn)
1751 {
1752 if (recog_memoized (insn) >= 0)
1753 return get_attr_type (insn);
1754 else
1755 return TYPE_NONE;
1756 }
1757
1758 /* Return true if DISP is a valid short displacement. */
1759
1760 static bool
1761 s390_short_displacement (rtx disp)
1762 {
1763 /* No displacement is OK. */
1764 if (!disp)
1765 return true;
1766
1767 /* Without the long displacement facility we don't need to
1768 distingiush between long and short displacement. */
1769 if (!TARGET_LONG_DISPLACEMENT)
1770 return true;
1771
1772 /* Integer displacement in range. */
1773 if (GET_CODE (disp) == CONST_INT)
1774 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1775
1776 /* GOT offset is not OK, the GOT can be large. */
1777 if (GET_CODE (disp) == CONST
1778 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1779 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1780 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1781 return false;
1782
1783 /* All other symbolic constants are literal pool references,
1784 which are OK as the literal pool must be small. */
1785 if (GET_CODE (disp) == CONST)
1786 return true;
1787
1788 return false;
1789 }
1790
1791 /* Decompose a RTL expression ADDR for a memory address into
1792 its components, returned in OUT.
1793
1794 Returns false if ADDR is not a valid memory address, true
1795 otherwise. If OUT is NULL, don't return the components,
1796 but check for validity only.
1797
1798 Note: Only addresses in canonical form are recognized.
1799 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1800 canonical form so that they will be recognized. */
1801
1802 static int
1803 s390_decompose_address (rtx addr, struct s390_address *out)
1804 {
1805 HOST_WIDE_INT offset = 0;
1806 rtx base = NULL_RTX;
1807 rtx indx = NULL_RTX;
1808 rtx disp = NULL_RTX;
1809 rtx orig_disp;
1810 bool pointer = false;
1811 bool base_ptr = false;
1812 bool indx_ptr = false;
1813 bool literal_pool = false;
1814
1815 /* We may need to substitute the literal pool base register into the address
1816 below. However, at this point we do not know which register is going to
1817 be used as base, so we substitute the arg pointer register. This is going
1818 to be treated as holding a pointer below -- it shouldn't be used for any
1819 other purpose. */
1820 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1821
1822 /* Decompose address into base + index + displacement. */
1823
1824 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1825 base = addr;
1826
1827 else if (GET_CODE (addr) == PLUS)
1828 {
1829 rtx op0 = XEXP (addr, 0);
1830 rtx op1 = XEXP (addr, 1);
1831 enum rtx_code code0 = GET_CODE (op0);
1832 enum rtx_code code1 = GET_CODE (op1);
1833
1834 if (code0 == REG || code0 == UNSPEC)
1835 {
1836 if (code1 == REG || code1 == UNSPEC)
1837 {
1838 indx = op0; /* index + base */
1839 base = op1;
1840 }
1841
1842 else
1843 {
1844 base = op0; /* base + displacement */
1845 disp = op1;
1846 }
1847 }
1848
1849 else if (code0 == PLUS)
1850 {
1851 indx = XEXP (op0, 0); /* index + base + disp */
1852 base = XEXP (op0, 1);
1853 disp = op1;
1854 }
1855
1856 else
1857 {
1858 return false;
1859 }
1860 }
1861
1862 else
1863 disp = addr; /* displacement */
1864
1865 /* Extract integer part of displacement. */
1866 orig_disp = disp;
1867 if (disp)
1868 {
1869 if (GET_CODE (disp) == CONST_INT)
1870 {
1871 offset = INTVAL (disp);
1872 disp = NULL_RTX;
1873 }
1874 else if (GET_CODE (disp) == CONST
1875 && GET_CODE (XEXP (disp, 0)) == PLUS
1876 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1877 {
1878 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1879 disp = XEXP (XEXP (disp, 0), 0);
1880 }
1881 }
1882
1883 /* Strip off CONST here to avoid special case tests later. */
1884 if (disp && GET_CODE (disp) == CONST)
1885 disp = XEXP (disp, 0);
1886
1887 /* We can convert literal pool addresses to
1888 displacements by basing them off the base register. */
1889 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1890 {
1891 /* Either base or index must be free to hold the base register. */
1892 if (!base)
1893 base = fake_pool_base, literal_pool = true;
1894 else if (!indx)
1895 indx = fake_pool_base, literal_pool = true;
1896 else
1897 return false;
1898
1899 /* Mark up the displacement. */
1900 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1901 UNSPEC_LTREL_OFFSET);
1902 }
1903
1904 /* Validate base register. */
1905 if (base)
1906 {
1907 if (GET_CODE (base) == UNSPEC)
1908 switch (XINT (base, 1))
1909 {
1910 case UNSPEC_LTREF:
1911 if (!disp)
1912 disp = gen_rtx_UNSPEC (Pmode,
1913 gen_rtvec (1, XVECEXP (base, 0, 0)),
1914 UNSPEC_LTREL_OFFSET);
1915 else
1916 return false;
1917
1918 base = XVECEXP (base, 0, 1);
1919 break;
1920
1921 case UNSPEC_LTREL_BASE:
1922 if (XVECLEN (base, 0) == 1)
1923 base = fake_pool_base, literal_pool = true;
1924 else
1925 base = XVECEXP (base, 0, 1);
1926 break;
1927
1928 default:
1929 return false;
1930 }
1931
1932 if (!REG_P (base)
1933 || (GET_MODE (base) != SImode
1934 && GET_MODE (base) != Pmode))
1935 return false;
1936
1937 if (REGNO (base) == STACK_POINTER_REGNUM
1938 || REGNO (base) == FRAME_POINTER_REGNUM
1939 || ((reload_completed || reload_in_progress)
1940 && frame_pointer_needed
1941 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1942 || REGNO (base) == ARG_POINTER_REGNUM
1943 || (flag_pic
1944 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1945 pointer = base_ptr = true;
1946
1947 if ((reload_completed || reload_in_progress)
1948 && base == cfun->machine->base_reg)
1949 pointer = base_ptr = literal_pool = true;
1950 }
1951
1952 /* Validate index register. */
1953 if (indx)
1954 {
1955 if (GET_CODE (indx) == UNSPEC)
1956 switch (XINT (indx, 1))
1957 {
1958 case UNSPEC_LTREF:
1959 if (!disp)
1960 disp = gen_rtx_UNSPEC (Pmode,
1961 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1962 UNSPEC_LTREL_OFFSET);
1963 else
1964 return false;
1965
1966 indx = XVECEXP (indx, 0, 1);
1967 break;
1968
1969 case UNSPEC_LTREL_BASE:
1970 if (XVECLEN (indx, 0) == 1)
1971 indx = fake_pool_base, literal_pool = true;
1972 else
1973 indx = XVECEXP (indx, 0, 1);
1974 break;
1975
1976 default:
1977 return false;
1978 }
1979
1980 if (!REG_P (indx)
1981 || (GET_MODE (indx) != SImode
1982 && GET_MODE (indx) != Pmode))
1983 return false;
1984
1985 if (REGNO (indx) == STACK_POINTER_REGNUM
1986 || REGNO (indx) == FRAME_POINTER_REGNUM
1987 || ((reload_completed || reload_in_progress)
1988 && frame_pointer_needed
1989 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1990 || REGNO (indx) == ARG_POINTER_REGNUM
1991 || (flag_pic
1992 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1993 pointer = indx_ptr = true;
1994
1995 if ((reload_completed || reload_in_progress)
1996 && indx == cfun->machine->base_reg)
1997 pointer = indx_ptr = literal_pool = true;
1998 }
1999
2000 /* Prefer to use pointer as base, not index. */
2001 if (base && indx && !base_ptr
2002 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2003 {
2004 rtx tmp = base;
2005 base = indx;
2006 indx = tmp;
2007 }
2008
2009 /* Validate displacement. */
2010 if (!disp)
2011 {
2012 /* If virtual registers are involved, the displacement will change later
2013 anyway as the virtual registers get eliminated. This could make a
2014 valid displacement invalid, but it is more likely to make an invalid
2015 displacement valid, because we sometimes access the register save area
2016 via negative offsets to one of those registers.
2017 Thus we don't check the displacement for validity here. If after
2018 elimination the displacement turns out to be invalid after all,
2019 this is fixed up by reload in any case. */
2020 if (base != arg_pointer_rtx
2021 && indx != arg_pointer_rtx
2022 && base != return_address_pointer_rtx
2023 && indx != return_address_pointer_rtx
2024 && base != frame_pointer_rtx
2025 && indx != frame_pointer_rtx
2026 && base != virtual_stack_vars_rtx
2027 && indx != virtual_stack_vars_rtx)
2028 if (!DISP_IN_RANGE (offset))
2029 return false;
2030 }
2031 else
2032 {
2033 /* All the special cases are pointers. */
2034 pointer = true;
2035
2036 /* In the small-PIC case, the linker converts @GOT
2037 and @GOTNTPOFF offsets to possible displacements. */
2038 if (GET_CODE (disp) == UNSPEC
2039 && (XINT (disp, 1) == UNSPEC_GOT
2040 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2041 && flag_pic == 1)
2042 {
2043 ;
2044 }
2045
2046 /* Accept pool label offsets. */
2047 else if (GET_CODE (disp) == UNSPEC
2048 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2049 ;
2050
2051 /* Accept literal pool references. */
2052 else if (GET_CODE (disp) == UNSPEC
2053 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2054 {
2055 /* In case CSE pulled a non literal pool reference out of
2056 the pool we have to reject the address. This is
2057 especially important when loading the GOT pointer on non
2058 zarch CPUs. In this case the literal pool contains an lt
2059 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2060 will most likely exceed the displacement. */
2061 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2062 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2063 return false;
2064
2065 orig_disp = gen_rtx_CONST (Pmode, disp);
2066 if (offset)
2067 {
2068 /* If we have an offset, make sure it does not
2069 exceed the size of the constant pool entry. */
2070 rtx sym = XVECEXP (disp, 0, 0);
2071 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2072 return false;
2073
2074 orig_disp = plus_constant (Pmode, orig_disp, offset);
2075 }
2076 }
2077
2078 else
2079 return false;
2080 }
2081
2082 if (!base && !indx)
2083 pointer = true;
2084
2085 if (out)
2086 {
2087 out->base = base;
2088 out->indx = indx;
2089 out->disp = orig_disp;
2090 out->pointer = pointer;
2091 out->literal_pool = literal_pool;
2092 }
2093
2094 return true;
2095 }
2096
2097 /* Decompose a RTL expression OP for a shift count into its components,
2098 and return the base register in BASE and the offset in OFFSET.
2099
2100 Return true if OP is a valid shift count, false if not. */
2101
2102 bool
2103 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2104 {
2105 HOST_WIDE_INT off = 0;
2106
2107 /* We can have an integer constant, an address register,
2108 or a sum of the two. */
2109 if (GET_CODE (op) == CONST_INT)
2110 {
2111 off = INTVAL (op);
2112 op = NULL_RTX;
2113 }
2114 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2115 {
2116 off = INTVAL (XEXP (op, 1));
2117 op = XEXP (op, 0);
2118 }
2119 while (op && GET_CODE (op) == SUBREG)
2120 op = SUBREG_REG (op);
2121
2122 if (op && GET_CODE (op) != REG)
2123 return false;
2124
2125 if (offset)
2126 *offset = off;
2127 if (base)
2128 *base = op;
2129
2130 return true;
2131 }
2132
2133
2134 /* Return true if CODE is a valid address without index. */
2135
2136 bool
2137 s390_legitimate_address_without_index_p (rtx op)
2138 {
2139 struct s390_address addr;
2140
2141 if (!s390_decompose_address (XEXP (op, 0), &addr))
2142 return false;
2143 if (addr.indx)
2144 return false;
2145
2146 return true;
2147 }
2148
2149
2150 /* Return TRUE if ADDR is an operand valid for a load/store relative
2151 instruction. Be aware that the alignment of the operand needs to
2152 be checked separately.
2153 Valid addresses are single references or a sum of a reference and a
2154 constant integer. Return these parts in SYMREF and ADDEND. You can
2155 pass NULL in REF and/or ADDEND if you are not interested in these
2156 values. Literal pool references are *not* considered symbol
2157 references. */
2158
2159 static bool
2160 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2161 {
2162 HOST_WIDE_INT tmpaddend = 0;
2163
2164 if (GET_CODE (addr) == CONST)
2165 addr = XEXP (addr, 0);
2166
2167 if (GET_CODE (addr) == PLUS)
2168 {
2169 if (!CONST_INT_P (XEXP (addr, 1)))
2170 return false;
2171
2172 tmpaddend = INTVAL (XEXP (addr, 1));
2173 addr = XEXP (addr, 0);
2174 }
2175
2176 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2177 || (GET_CODE (addr) == UNSPEC
2178 && (XINT (addr, 1) == UNSPEC_GOTENT
2179 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2180 {
2181 if (symref)
2182 *symref = addr;
2183 if (addend)
2184 *addend = tmpaddend;
2185
2186 return true;
2187 }
2188 return false;
2189 }
2190
2191 /* Return true if the address in OP is valid for constraint letter C
2192 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2193 pool MEMs should be accepted. Only the Q, R, S, T constraint
2194 letters are allowed for C. */
2195
2196 static int
2197 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2198 {
2199 struct s390_address addr;
2200 bool decomposed = false;
2201
2202 /* This check makes sure that no symbolic address (except literal
2203 pool references) are accepted by the R or T constraints. */
2204 if (s390_loadrelative_operand_p (op, NULL, NULL))
2205 return 0;
2206
2207 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2208 if (!lit_pool_ok)
2209 {
2210 if (!s390_decompose_address (op, &addr))
2211 return 0;
2212 if (addr.literal_pool)
2213 return 0;
2214 decomposed = true;
2215 }
2216
2217 switch (c)
2218 {
2219 case 'Q': /* no index short displacement */
2220 if (!decomposed && !s390_decompose_address (op, &addr))
2221 return 0;
2222 if (addr.indx)
2223 return 0;
2224 if (!s390_short_displacement (addr.disp))
2225 return 0;
2226 break;
2227
2228 case 'R': /* with index short displacement */
2229 if (TARGET_LONG_DISPLACEMENT)
2230 {
2231 if (!decomposed && !s390_decompose_address (op, &addr))
2232 return 0;
2233 if (!s390_short_displacement (addr.disp))
2234 return 0;
2235 }
2236 /* Any invalid address here will be fixed up by reload,
2237 so accept it for the most generic constraint. */
2238 break;
2239
2240 case 'S': /* no index long displacement */
2241 if (!TARGET_LONG_DISPLACEMENT)
2242 return 0;
2243 if (!decomposed && !s390_decompose_address (op, &addr))
2244 return 0;
2245 if (addr.indx)
2246 return 0;
2247 if (s390_short_displacement (addr.disp))
2248 return 0;
2249 break;
2250
2251 case 'T': /* with index long displacement */
2252 if (!TARGET_LONG_DISPLACEMENT)
2253 return 0;
2254 /* Any invalid address here will be fixed up by reload,
2255 so accept it for the most generic constraint. */
2256 if ((decomposed || s390_decompose_address (op, &addr))
2257 && s390_short_displacement (addr.disp))
2258 return 0;
2259 break;
2260 default:
2261 return 0;
2262 }
2263 return 1;
2264 }
2265
2266
2267 /* Evaluates constraint strings described by the regular expression
2268 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2269 the constraint given in STR, or 0 else. */
2270
2271 int
2272 s390_mem_constraint (const char *str, rtx op)
2273 {
2274 char c = str[0];
2275
2276 switch (c)
2277 {
2278 case 'A':
2279 /* Check for offsettable variants of memory constraints. */
2280 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2281 return 0;
2282 if ((reload_completed || reload_in_progress)
2283 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2284 return 0;
2285 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2286 case 'B':
2287 /* Check for non-literal-pool variants of memory constraints. */
2288 if (!MEM_P (op))
2289 return 0;
2290 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2291 case 'Q':
2292 case 'R':
2293 case 'S':
2294 case 'T':
2295 if (GET_CODE (op) != MEM)
2296 return 0;
2297 return s390_check_qrst_address (c, XEXP (op, 0), true);
2298 case 'U':
2299 return (s390_check_qrst_address ('Q', op, true)
2300 || s390_check_qrst_address ('R', op, true));
2301 case 'W':
2302 return (s390_check_qrst_address ('S', op, true)
2303 || s390_check_qrst_address ('T', op, true));
2304 case 'Y':
2305 /* Simply check for the basic form of a shift count. Reload will
2306 take care of making sure we have a proper base register. */
2307 if (!s390_decompose_shift_count (op, NULL, NULL))
2308 return 0;
2309 break;
2310 case 'Z':
2311 return s390_check_qrst_address (str[1], op, true);
2312 default:
2313 return 0;
2314 }
2315 return 1;
2316 }
2317
2318
2319 /* Evaluates constraint strings starting with letter O. Input
2320 parameter C is the second letter following the "O" in the constraint
2321 string. Returns 1 if VALUE meets the respective constraint and 0
2322 otherwise. */
2323
2324 int
2325 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2326 {
2327 if (!TARGET_EXTIMM)
2328 return 0;
2329
2330 switch (c)
2331 {
2332 case 's':
2333 return trunc_int_for_mode (value, SImode) == value;
2334
2335 case 'p':
2336 return value == 0
2337 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2338
2339 case 'n':
2340 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2341
2342 default:
2343 gcc_unreachable ();
2344 }
2345 }
2346
2347
2348 /* Evaluates constraint strings starting with letter N. Parameter STR
2349 contains the letters following letter "N" in the constraint string.
2350 Returns true if VALUE matches the constraint. */
2351
2352 int
2353 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2354 {
2355 enum machine_mode mode, part_mode;
2356 int def;
2357 int part, part_goal;
2358
2359
2360 if (str[0] == 'x')
2361 part_goal = -1;
2362 else
2363 part_goal = str[0] - '0';
2364
2365 switch (str[1])
2366 {
2367 case 'Q':
2368 part_mode = QImode;
2369 break;
2370 case 'H':
2371 part_mode = HImode;
2372 break;
2373 case 'S':
2374 part_mode = SImode;
2375 break;
2376 default:
2377 return 0;
2378 }
2379
2380 switch (str[2])
2381 {
2382 case 'H':
2383 mode = HImode;
2384 break;
2385 case 'S':
2386 mode = SImode;
2387 break;
2388 case 'D':
2389 mode = DImode;
2390 break;
2391 default:
2392 return 0;
2393 }
2394
2395 switch (str[3])
2396 {
2397 case '0':
2398 def = 0;
2399 break;
2400 case 'F':
2401 def = -1;
2402 break;
2403 default:
2404 return 0;
2405 }
2406
2407 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2408 return 0;
2409
2410 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2411 if (part < 0)
2412 return 0;
2413 if (part_goal != -1 && part_goal != part)
2414 return 0;
2415
2416 return 1;
2417 }
2418
2419
2420 /* Returns true if the input parameter VALUE is a float zero. */
2421
2422 int
2423 s390_float_const_zero_p (rtx value)
2424 {
2425 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2426 && value == CONST0_RTX (GET_MODE (value)));
2427 }
2428
2429 /* Implement TARGET_REGISTER_MOVE_COST. */
2430
2431 static int
2432 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2433 reg_class_t from, reg_class_t to)
2434 {
2435 /* On s390, copy between fprs and gprs is expensive as long as no
2436 ldgr/lgdr can be used. */
2437 if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
2438 && ((reg_classes_intersect_p (from, GENERAL_REGS)
2439 && reg_classes_intersect_p (to, FP_REGS))
2440 || (reg_classes_intersect_p (from, FP_REGS)
2441 && reg_classes_intersect_p (to, GENERAL_REGS))))
2442 return 10;
2443
2444 return 1;
2445 }
2446
2447 /* Implement TARGET_MEMORY_MOVE_COST. */
2448
2449 static int
2450 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2451 reg_class_t rclass ATTRIBUTE_UNUSED,
2452 bool in ATTRIBUTE_UNUSED)
2453 {
2454 return 1;
2455 }
2456
2457 /* Compute a (partial) cost for rtx X. Return true if the complete
2458 cost has been computed, and false if subexpressions should be
2459 scanned. In either case, *TOTAL contains the cost result.
2460 CODE contains GET_CODE (x), OUTER_CODE contains the code
2461 of the superexpression of x. */
2462
2463 static bool
2464 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2465 int *total, bool speed ATTRIBUTE_UNUSED)
2466 {
2467 switch (code)
2468 {
2469 case CONST:
2470 case CONST_INT:
2471 case LABEL_REF:
2472 case SYMBOL_REF:
2473 case CONST_DOUBLE:
2474 case MEM:
2475 *total = 0;
2476 return true;
2477
2478 case ASHIFT:
2479 case ASHIFTRT:
2480 case LSHIFTRT:
2481 case ROTATE:
2482 case ROTATERT:
2483 case AND:
2484 case IOR:
2485 case XOR:
2486 case NEG:
2487 case NOT:
2488 *total = COSTS_N_INSNS (1);
2489 return false;
2490
2491 case PLUS:
2492 case MINUS:
2493 *total = COSTS_N_INSNS (1);
2494 return false;
2495
2496 case MULT:
2497 switch (GET_MODE (x))
2498 {
2499 case SImode:
2500 {
2501 rtx left = XEXP (x, 0);
2502 rtx right = XEXP (x, 1);
2503 if (GET_CODE (right) == CONST_INT
2504 && CONST_OK_FOR_K (INTVAL (right)))
2505 *total = s390_cost->mhi;
2506 else if (GET_CODE (left) == SIGN_EXTEND)
2507 *total = s390_cost->mh;
2508 else
2509 *total = s390_cost->ms; /* msr, ms, msy */
2510 break;
2511 }
2512 case DImode:
2513 {
2514 rtx left = XEXP (x, 0);
2515 rtx right = XEXP (x, 1);
2516 if (TARGET_ZARCH)
2517 {
2518 if (GET_CODE (right) == CONST_INT
2519 && CONST_OK_FOR_K (INTVAL (right)))
2520 *total = s390_cost->mghi;
2521 else if (GET_CODE (left) == SIGN_EXTEND)
2522 *total = s390_cost->msgf;
2523 else
2524 *total = s390_cost->msg; /* msgr, msg */
2525 }
2526 else /* TARGET_31BIT */
2527 {
2528 if (GET_CODE (left) == SIGN_EXTEND
2529 && GET_CODE (right) == SIGN_EXTEND)
2530 /* mulsidi case: mr, m */
2531 *total = s390_cost->m;
2532 else if (GET_CODE (left) == ZERO_EXTEND
2533 && GET_CODE (right) == ZERO_EXTEND
2534 && TARGET_CPU_ZARCH)
2535 /* umulsidi case: ml, mlr */
2536 *total = s390_cost->ml;
2537 else
2538 /* Complex calculation is required. */
2539 *total = COSTS_N_INSNS (40);
2540 }
2541 break;
2542 }
2543 case SFmode:
2544 case DFmode:
2545 *total = s390_cost->mult_df;
2546 break;
2547 case TFmode:
2548 *total = s390_cost->mxbr;
2549 break;
2550 default:
2551 return false;
2552 }
2553 return false;
2554
2555 case FMA:
2556 switch (GET_MODE (x))
2557 {
2558 case DFmode:
2559 *total = s390_cost->madbr;
2560 break;
2561 case SFmode:
2562 *total = s390_cost->maebr;
2563 break;
2564 default:
2565 return false;
2566 }
2567 /* Negate in the third argument is free: FMSUB. */
2568 if (GET_CODE (XEXP (x, 2)) == NEG)
2569 {
2570 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2571 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2572 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2573 return true;
2574 }
2575 return false;
2576
2577 case UDIV:
2578 case UMOD:
2579 if (GET_MODE (x) == TImode) /* 128 bit division */
2580 *total = s390_cost->dlgr;
2581 else if (GET_MODE (x) == DImode)
2582 {
2583 rtx right = XEXP (x, 1);
2584 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2585 *total = s390_cost->dlr;
2586 else /* 64 by 64 bit division */
2587 *total = s390_cost->dlgr;
2588 }
2589 else if (GET_MODE (x) == SImode) /* 32 bit division */
2590 *total = s390_cost->dlr;
2591 return false;
2592
2593 case DIV:
2594 case MOD:
2595 if (GET_MODE (x) == DImode)
2596 {
2597 rtx right = XEXP (x, 1);
2598 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2599 if (TARGET_ZARCH)
2600 *total = s390_cost->dsgfr;
2601 else
2602 *total = s390_cost->dr;
2603 else /* 64 by 64 bit division */
2604 *total = s390_cost->dsgr;
2605 }
2606 else if (GET_MODE (x) == SImode) /* 32 bit division */
2607 *total = s390_cost->dlr;
2608 else if (GET_MODE (x) == SFmode)
2609 {
2610 *total = s390_cost->debr;
2611 }
2612 else if (GET_MODE (x) == DFmode)
2613 {
2614 *total = s390_cost->ddbr;
2615 }
2616 else if (GET_MODE (x) == TFmode)
2617 {
2618 *total = s390_cost->dxbr;
2619 }
2620 return false;
2621
2622 case SQRT:
2623 if (GET_MODE (x) == SFmode)
2624 *total = s390_cost->sqebr;
2625 else if (GET_MODE (x) == DFmode)
2626 *total = s390_cost->sqdbr;
2627 else /* TFmode */
2628 *total = s390_cost->sqxbr;
2629 return false;
2630
2631 case SIGN_EXTEND:
2632 case ZERO_EXTEND:
2633 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2634 || outer_code == PLUS || outer_code == MINUS
2635 || outer_code == COMPARE)
2636 *total = 0;
2637 return false;
2638
2639 case COMPARE:
2640 *total = COSTS_N_INSNS (1);
2641 if (GET_CODE (XEXP (x, 0)) == AND
2642 && GET_CODE (XEXP (x, 1)) == CONST_INT
2643 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2644 {
2645 rtx op0 = XEXP (XEXP (x, 0), 0);
2646 rtx op1 = XEXP (XEXP (x, 0), 1);
2647 rtx op2 = XEXP (x, 1);
2648
2649 if (memory_operand (op0, GET_MODE (op0))
2650 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2651 return true;
2652 if (register_operand (op0, GET_MODE (op0))
2653 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2654 return true;
2655 }
2656 return false;
2657
2658 default:
2659 return false;
2660 }
2661 }
2662
2663 /* Return the cost of an address rtx ADDR. */
2664
2665 static int
2666 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2667 addr_space_t as ATTRIBUTE_UNUSED,
2668 bool speed ATTRIBUTE_UNUSED)
2669 {
2670 struct s390_address ad;
2671 if (!s390_decompose_address (addr, &ad))
2672 return 1000;
2673
2674 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2675 }
2676
2677 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2678 otherwise return 0. */
2679
2680 int
2681 tls_symbolic_operand (rtx op)
2682 {
2683 if (GET_CODE (op) != SYMBOL_REF)
2684 return 0;
2685 return SYMBOL_REF_TLS_MODEL (op);
2686 }
2687 \f
2688 /* Split DImode access register reference REG (on 64-bit) into its constituent
2689 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2690 gen_highpart cannot be used as they assume all registers are word-sized,
2691 while our access registers have only half that size. */
2692
2693 void
2694 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2695 {
2696 gcc_assert (TARGET_64BIT);
2697 gcc_assert (ACCESS_REG_P (reg));
2698 gcc_assert (GET_MODE (reg) == DImode);
2699 gcc_assert (!(REGNO (reg) & 1));
2700
2701 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2702 *hi = gen_rtx_REG (SImode, REGNO (reg));
2703 }
2704
2705 /* Return true if OP contains a symbol reference */
2706
2707 bool
2708 symbolic_reference_mentioned_p (rtx op)
2709 {
2710 const char *fmt;
2711 int i;
2712
2713 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2714 return 1;
2715
2716 fmt = GET_RTX_FORMAT (GET_CODE (op));
2717 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2718 {
2719 if (fmt[i] == 'E')
2720 {
2721 int j;
2722
2723 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2724 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2725 return 1;
2726 }
2727
2728 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2729 return 1;
2730 }
2731
2732 return 0;
2733 }
2734
2735 /* Return true if OP contains a reference to a thread-local symbol. */
2736
2737 bool
2738 tls_symbolic_reference_mentioned_p (rtx op)
2739 {
2740 const char *fmt;
2741 int i;
2742
2743 if (GET_CODE (op) == SYMBOL_REF)
2744 return tls_symbolic_operand (op);
2745
2746 fmt = GET_RTX_FORMAT (GET_CODE (op));
2747 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2748 {
2749 if (fmt[i] == 'E')
2750 {
2751 int j;
2752
2753 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2754 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2755 return true;
2756 }
2757
2758 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2759 return true;
2760 }
2761
2762 return false;
2763 }
2764
2765
2766 /* Return true if OP is a legitimate general operand when
2767 generating PIC code. It is given that flag_pic is on
2768 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2769
2770 int
2771 legitimate_pic_operand_p (rtx op)
2772 {
2773 /* Accept all non-symbolic constants. */
2774 if (!SYMBOLIC_CONST (op))
2775 return 1;
2776
2777 /* Reject everything else; must be handled
2778 via emit_symbolic_move. */
2779 return 0;
2780 }
2781
2782 /* Returns true if the constant value OP is a legitimate general operand.
2783 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2784
2785 static bool
2786 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2787 {
2788 /* Accept all non-symbolic constants. */
2789 if (!SYMBOLIC_CONST (op))
2790 return 1;
2791
2792 /* Accept immediate LARL operands. */
2793 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2794 return 1;
2795
2796 /* Thread-local symbols are never legal constants. This is
2797 so that emit_call knows that computing such addresses
2798 might require a function call. */
2799 if (TLS_SYMBOLIC_CONST (op))
2800 return 0;
2801
2802 /* In the PIC case, symbolic constants must *not* be
2803 forced into the literal pool. We accept them here,
2804 so that they will be handled by emit_symbolic_move. */
2805 if (flag_pic)
2806 return 1;
2807
2808 /* All remaining non-PIC symbolic constants are
2809 forced into the literal pool. */
2810 return 0;
2811 }
2812
2813 /* Determine if it's legal to put X into the constant pool. This
2814 is not possible if X contains the address of a symbol that is
2815 not constant (TLS) or not known at final link time (PIC). */
2816
2817 static bool
2818 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2819 {
2820 switch (GET_CODE (x))
2821 {
2822 case CONST_INT:
2823 case CONST_DOUBLE:
2824 /* Accept all non-symbolic constants. */
2825 return false;
2826
2827 case LABEL_REF:
2828 /* Labels are OK iff we are non-PIC. */
2829 return flag_pic != 0;
2830
2831 case SYMBOL_REF:
2832 /* 'Naked' TLS symbol references are never OK,
2833 non-TLS symbols are OK iff we are non-PIC. */
2834 if (tls_symbolic_operand (x))
2835 return true;
2836 else
2837 return flag_pic != 0;
2838
2839 case CONST:
2840 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2841 case PLUS:
2842 case MINUS:
2843 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2844 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2845
2846 case UNSPEC:
2847 switch (XINT (x, 1))
2848 {
2849 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2850 case UNSPEC_LTREL_OFFSET:
2851 case UNSPEC_GOT:
2852 case UNSPEC_GOTOFF:
2853 case UNSPEC_PLTOFF:
2854 case UNSPEC_TLSGD:
2855 case UNSPEC_TLSLDM:
2856 case UNSPEC_NTPOFF:
2857 case UNSPEC_DTPOFF:
2858 case UNSPEC_GOTNTPOFF:
2859 case UNSPEC_INDNTPOFF:
2860 return false;
2861
2862 /* If the literal pool shares the code section, be put
2863 execute template placeholders into the pool as well. */
2864 case UNSPEC_INSN:
2865 return TARGET_CPU_ZARCH;
2866
2867 default:
2868 return true;
2869 }
2870 break;
2871
2872 default:
2873 gcc_unreachable ();
2874 }
2875 }
2876
2877 /* Returns true if the constant value OP is a legitimate general
2878 operand during and after reload. The difference to
2879 legitimate_constant_p is that this function will not accept
2880 a constant that would need to be forced to the literal pool
2881 before it can be used as operand.
2882 This function accepts all constants which can be loaded directly
2883 into a GPR. */
2884
2885 bool
2886 legitimate_reload_constant_p (rtx op)
2887 {
2888 /* Accept la(y) operands. */
2889 if (GET_CODE (op) == CONST_INT
2890 && DISP_IN_RANGE (INTVAL (op)))
2891 return true;
2892
2893 /* Accept l(g)hi/l(g)fi operands. */
2894 if (GET_CODE (op) == CONST_INT
2895 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2896 return true;
2897
2898 /* Accept lliXX operands. */
2899 if (TARGET_ZARCH
2900 && GET_CODE (op) == CONST_INT
2901 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2902 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2903 return true;
2904
2905 if (TARGET_EXTIMM
2906 && GET_CODE (op) == CONST_INT
2907 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2908 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2909 return true;
2910
2911 /* Accept larl operands. */
2912 if (TARGET_CPU_ZARCH
2913 && larl_operand (op, VOIDmode))
2914 return true;
2915
2916 /* Accept floating-point zero operands that fit into a single GPR. */
2917 if (GET_CODE (op) == CONST_DOUBLE
2918 && s390_float_const_zero_p (op)
2919 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2920 return true;
2921
2922 /* Accept double-word operands that can be split. */
2923 if (GET_CODE (op) == CONST_INT
2924 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2925 {
2926 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2927 rtx hi = operand_subword (op, 0, 0, dword_mode);
2928 rtx lo = operand_subword (op, 1, 0, dword_mode);
2929 return legitimate_reload_constant_p (hi)
2930 && legitimate_reload_constant_p (lo);
2931 }
2932
2933 /* Everything else cannot be handled without reload. */
2934 return false;
2935 }
2936
2937 /* Returns true if the constant value OP is a legitimate fp operand
2938 during and after reload.
2939 This function accepts all constants which can be loaded directly
2940 into an FPR. */
2941
2942 static bool
2943 legitimate_reload_fp_constant_p (rtx op)
2944 {
2945 /* Accept floating-point zero operands if the load zero instruction
2946 can be used. Prior to z196 the load fp zero instruction caused a
2947 performance penalty if the result is used as BFP number. */
2948 if (TARGET_Z196
2949 && GET_CODE (op) == CONST_DOUBLE
2950 && s390_float_const_zero_p (op))
2951 return true;
2952
2953 return false;
2954 }
2955
2956 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2957 return the class of reg to actually use. */
2958
2959 static reg_class_t
2960 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2961 {
2962 switch (GET_CODE (op))
2963 {
2964 /* Constants we cannot reload into general registers
2965 must be forced into the literal pool. */
2966 case CONST_DOUBLE:
2967 case CONST_INT:
2968 if (reg_class_subset_p (GENERAL_REGS, rclass)
2969 && legitimate_reload_constant_p (op))
2970 return GENERAL_REGS;
2971 else if (reg_class_subset_p (ADDR_REGS, rclass)
2972 && legitimate_reload_constant_p (op))
2973 return ADDR_REGS;
2974 else if (reg_class_subset_p (FP_REGS, rclass)
2975 && legitimate_reload_fp_constant_p (op))
2976 return FP_REGS;
2977 return NO_REGS;
2978
2979 /* If a symbolic constant or a PLUS is reloaded,
2980 it is most likely being used as an address, so
2981 prefer ADDR_REGS. If 'class' is not a superset
2982 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2983 case CONST:
2984 /* A larl operand with odd addend will get fixed via secondary
2985 reload. So don't request it to be pushed into literal
2986 pool. */
2987 if (TARGET_CPU_ZARCH
2988 && GET_CODE (XEXP (op, 0)) == PLUS
2989 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
2990 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
2991 {
2992 if (reg_class_subset_p (ADDR_REGS, rclass))
2993 return ADDR_REGS;
2994 else
2995 return NO_REGS;
2996 }
2997 /* fallthrough */
2998 case LABEL_REF:
2999 case SYMBOL_REF:
3000 if (!legitimate_reload_constant_p (op))
3001 return NO_REGS;
3002 /* fallthrough */
3003 case PLUS:
3004 /* load address will be used. */
3005 if (reg_class_subset_p (ADDR_REGS, rclass))
3006 return ADDR_REGS;
3007 else
3008 return NO_REGS;
3009
3010 default:
3011 break;
3012 }
3013
3014 return rclass;
3015 }
3016
3017 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3018 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3019 aligned. */
3020
3021 bool
3022 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3023 {
3024 HOST_WIDE_INT addend;
3025 rtx symref;
3026
3027 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3028 return false;
3029
3030 if (addend & (alignment - 1))
3031 return false;
3032
3033 if (GET_CODE (symref) == SYMBOL_REF
3034 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3035 return true;
3036
3037 if (GET_CODE (symref) == UNSPEC
3038 && alignment <= UNITS_PER_LONG)
3039 return true;
3040
3041 return false;
3042 }
3043
3044 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3045 operand SCRATCH is used to reload the even part of the address and
3046 adding one. */
3047
3048 void
3049 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3050 {
3051 HOST_WIDE_INT addend;
3052 rtx symref;
3053
3054 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3055 gcc_unreachable ();
3056
3057 if (!(addend & 1))
3058 /* Easy case. The addend is even so larl will do fine. */
3059 emit_move_insn (reg, addr);
3060 else
3061 {
3062 /* We can leave the scratch register untouched if the target
3063 register is a valid base register. */
3064 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3065 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3066 scratch = reg;
3067
3068 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3069 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3070
3071 if (addend != 1)
3072 emit_move_insn (scratch,
3073 gen_rtx_CONST (Pmode,
3074 gen_rtx_PLUS (Pmode, symref,
3075 GEN_INT (addend - 1))));
3076 else
3077 emit_move_insn (scratch, symref);
3078
3079 /* Increment the address using la in order to avoid clobbering cc. */
3080 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3081 }
3082 }
3083
3084 /* Generate what is necessary to move between REG and MEM using
3085 SCRATCH. The direction is given by TOMEM. */
3086
3087 void
3088 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3089 {
3090 /* Reload might have pulled a constant out of the literal pool.
3091 Force it back in. */
3092 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3093 || GET_CODE (mem) == CONST)
3094 mem = force_const_mem (GET_MODE (reg), mem);
3095
3096 gcc_assert (MEM_P (mem));
3097
3098 /* For a load from memory we can leave the scratch register
3099 untouched if the target register is a valid base register. */
3100 if (!tomem
3101 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3102 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3103 && GET_MODE (reg) == GET_MODE (scratch))
3104 scratch = reg;
3105
3106 /* Load address into scratch register. Since we can't have a
3107 secondary reload for a secondary reload we have to cover the case
3108 where larl would need a secondary reload here as well. */
3109 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3110
3111 /* Now we can use a standard load/store to do the move. */
3112 if (tomem)
3113 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3114 else
3115 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3116 }
3117
3118 /* Inform reload about cases where moving X with a mode MODE to a register in
3119 RCLASS requires an extra scratch or immediate register. Return the class
3120 needed for the immediate register. */
3121
3122 static reg_class_t
3123 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3124 enum machine_mode mode, secondary_reload_info *sri)
3125 {
3126 enum reg_class rclass = (enum reg_class) rclass_i;
3127
3128 /* Intermediate register needed. */
3129 if (reg_classes_intersect_p (CC_REGS, rclass))
3130 return GENERAL_REGS;
3131
3132 if (TARGET_Z10)
3133 {
3134 HOST_WIDE_INT offset;
3135 rtx symref;
3136
3137 /* On z10 several optimizer steps may generate larl operands with
3138 an odd addend. */
3139 if (in_p
3140 && s390_loadrelative_operand_p (x, &symref, &offset)
3141 && mode == Pmode
3142 && !SYMBOL_REF_ALIGN1_P (symref)
3143 && (offset & 1) == 1)
3144 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3145 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3146
3147 /* On z10 we need a scratch register when moving QI, TI or floating
3148 point mode values from or to a memory location with a SYMBOL_REF
3149 or if the symref addend of a SI or DI move is not aligned to the
3150 width of the access. */
3151 if (MEM_P (x)
3152 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3153 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3154 || (!TARGET_ZARCH && mode == DImode)
3155 || ((mode == HImode || mode == SImode || mode == DImode)
3156 && (!s390_check_symref_alignment (XEXP (x, 0),
3157 GET_MODE_SIZE (mode))))))
3158 {
3159 #define __SECONDARY_RELOAD_CASE(M,m) \
3160 case M##mode: \
3161 if (TARGET_64BIT) \
3162 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3163 CODE_FOR_reload##m##di_tomem_z10; \
3164 else \
3165 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3166 CODE_FOR_reload##m##si_tomem_z10; \
3167 break;
3168
3169 switch (GET_MODE (x))
3170 {
3171 __SECONDARY_RELOAD_CASE (QI, qi);
3172 __SECONDARY_RELOAD_CASE (HI, hi);
3173 __SECONDARY_RELOAD_CASE (SI, si);
3174 __SECONDARY_RELOAD_CASE (DI, di);
3175 __SECONDARY_RELOAD_CASE (TI, ti);
3176 __SECONDARY_RELOAD_CASE (SF, sf);
3177 __SECONDARY_RELOAD_CASE (DF, df);
3178 __SECONDARY_RELOAD_CASE (TF, tf);
3179 __SECONDARY_RELOAD_CASE (SD, sd);
3180 __SECONDARY_RELOAD_CASE (DD, dd);
3181 __SECONDARY_RELOAD_CASE (TD, td);
3182
3183 default:
3184 gcc_unreachable ();
3185 }
3186 #undef __SECONDARY_RELOAD_CASE
3187 }
3188 }
3189
3190 /* We need a scratch register when loading a PLUS expression which
3191 is not a legitimate operand of the LOAD ADDRESS instruction. */
3192 if (in_p && s390_plus_operand (x, mode))
3193 sri->icode = (TARGET_64BIT ?
3194 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3195
3196 /* Performing a multiword move from or to memory we have to make sure the
3197 second chunk in memory is addressable without causing a displacement
3198 overflow. If that would be the case we calculate the address in
3199 a scratch register. */
3200 if (MEM_P (x)
3201 && GET_CODE (XEXP (x, 0)) == PLUS
3202 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3203 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3204 + GET_MODE_SIZE (mode) - 1))
3205 {
3206 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3207 in a s_operand address since we may fallback to lm/stm. So we only
3208 have to care about overflows in the b+i+d case. */
3209 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3210 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3211 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3212 /* For FP_REGS no lm/stm is available so this check is triggered
3213 for displacement overflows in b+i+d and b+d like addresses. */
3214 || (reg_classes_intersect_p (FP_REGS, rclass)
3215 && s390_class_max_nregs (FP_REGS, mode) > 1))
3216 {
3217 if (in_p)
3218 sri->icode = (TARGET_64BIT ?
3219 CODE_FOR_reloaddi_nonoffmem_in :
3220 CODE_FOR_reloadsi_nonoffmem_in);
3221 else
3222 sri->icode = (TARGET_64BIT ?
3223 CODE_FOR_reloaddi_nonoffmem_out :
3224 CODE_FOR_reloadsi_nonoffmem_out);
3225 }
3226 }
3227
3228 /* A scratch address register is needed when a symbolic constant is
3229 copied to r0 compiling with -fPIC. In other cases the target
3230 register might be used as temporary (see legitimize_pic_address). */
3231 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3232 sri->icode = (TARGET_64BIT ?
3233 CODE_FOR_reloaddi_PIC_addr :
3234 CODE_FOR_reloadsi_PIC_addr);
3235
3236 /* Either scratch or no register needed. */
3237 return NO_REGS;
3238 }
3239
3240 /* Generate code to load SRC, which is PLUS that is not a
3241 legitimate operand for the LA instruction, into TARGET.
3242 SCRATCH may be used as scratch register. */
3243
3244 void
3245 s390_expand_plus_operand (rtx target, rtx src,
3246 rtx scratch)
3247 {
3248 rtx sum1, sum2;
3249 struct s390_address ad;
3250
3251 /* src must be a PLUS; get its two operands. */
3252 gcc_assert (GET_CODE (src) == PLUS);
3253 gcc_assert (GET_MODE (src) == Pmode);
3254
3255 /* Check if any of the two operands is already scheduled
3256 for replacement by reload. This can happen e.g. when
3257 float registers occur in an address. */
3258 sum1 = find_replacement (&XEXP (src, 0));
3259 sum2 = find_replacement (&XEXP (src, 1));
3260 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3261
3262 /* If the address is already strictly valid, there's nothing to do. */
3263 if (!s390_decompose_address (src, &ad)
3264 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3265 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3266 {
3267 /* Otherwise, one of the operands cannot be an address register;
3268 we reload its value into the scratch register. */
3269 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3270 {
3271 emit_move_insn (scratch, sum1);
3272 sum1 = scratch;
3273 }
3274 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3275 {
3276 emit_move_insn (scratch, sum2);
3277 sum2 = scratch;
3278 }
3279
3280 /* According to the way these invalid addresses are generated
3281 in reload.c, it should never happen (at least on s390) that
3282 *neither* of the PLUS components, after find_replacements
3283 was applied, is an address register. */
3284 if (sum1 == scratch && sum2 == scratch)
3285 {
3286 debug_rtx (src);
3287 gcc_unreachable ();
3288 }
3289
3290 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3291 }
3292
3293 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3294 is only ever performed on addresses, so we can mark the
3295 sum as legitimate for LA in any case. */
3296 s390_load_address (target, src);
3297 }
3298
3299
3300 /* Return true if ADDR is a valid memory address.
3301 STRICT specifies whether strict register checking applies. */
3302
3303 static bool
3304 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3305 {
3306 struct s390_address ad;
3307
3308 if (TARGET_Z10
3309 && larl_operand (addr, VOIDmode)
3310 && (mode == VOIDmode
3311 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3312 return true;
3313
3314 if (!s390_decompose_address (addr, &ad))
3315 return false;
3316
3317 if (strict)
3318 {
3319 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3320 return false;
3321
3322 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3323 return false;
3324 }
3325 else
3326 {
3327 if (ad.base
3328 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3329 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3330 return false;
3331
3332 if (ad.indx
3333 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3334 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3335 return false;
3336 }
3337 return true;
3338 }
3339
3340 /* Return true if OP is a valid operand for the LA instruction.
3341 In 31-bit, we need to prove that the result is used as an
3342 address, as LA performs only a 31-bit addition. */
3343
3344 bool
3345 legitimate_la_operand_p (rtx op)
3346 {
3347 struct s390_address addr;
3348 if (!s390_decompose_address (op, &addr))
3349 return false;
3350
3351 return (TARGET_64BIT || addr.pointer);
3352 }
3353
3354 /* Return true if it is valid *and* preferable to use LA to
3355 compute the sum of OP1 and OP2. */
3356
3357 bool
3358 preferred_la_operand_p (rtx op1, rtx op2)
3359 {
3360 struct s390_address addr;
3361
3362 if (op2 != const0_rtx)
3363 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3364
3365 if (!s390_decompose_address (op1, &addr))
3366 return false;
3367 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3368 return false;
3369 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3370 return false;
3371
3372 /* Avoid LA instructions with index register on z196; it is
3373 preferable to use regular add instructions when possible.
3374 Starting with zEC12 the la with index register is "uncracked"
3375 again. */
3376 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3377 return false;
3378
3379 if (!TARGET_64BIT && !addr.pointer)
3380 return false;
3381
3382 if (addr.pointer)
3383 return true;
3384
3385 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3386 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3387 return true;
3388
3389 return false;
3390 }
3391
3392 /* Emit a forced load-address operation to load SRC into DST.
3393 This will use the LOAD ADDRESS instruction even in situations
3394 where legitimate_la_operand_p (SRC) returns false. */
3395
3396 void
3397 s390_load_address (rtx dst, rtx src)
3398 {
3399 if (TARGET_64BIT)
3400 emit_move_insn (dst, src);
3401 else
3402 emit_insn (gen_force_la_31 (dst, src));
3403 }
3404
3405 /* Return a legitimate reference for ORIG (an address) using the
3406 register REG. If REG is 0, a new pseudo is generated.
3407
3408 There are two types of references that must be handled:
3409
3410 1. Global data references must load the address from the GOT, via
3411 the PIC reg. An insn is emitted to do this load, and the reg is
3412 returned.
3413
3414 2. Static data references, constant pool addresses, and code labels
3415 compute the address as an offset from the GOT, whose base is in
3416 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3417 differentiate them from global data objects. The returned
3418 address is the PIC reg + an unspec constant.
3419
3420 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3421 reg also appears in the address. */
3422
3423 rtx
3424 legitimize_pic_address (rtx orig, rtx reg)
3425 {
3426 rtx addr = orig;
3427 rtx addend = const0_rtx;
3428 rtx new_rtx = orig;
3429
3430 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3431
3432 if (GET_CODE (addr) == CONST)
3433 addr = XEXP (addr, 0);
3434
3435 if (GET_CODE (addr) == PLUS)
3436 {
3437 addend = XEXP (addr, 1);
3438 addr = XEXP (addr, 0);
3439 }
3440
3441 if ((GET_CODE (addr) == LABEL_REF
3442 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3443 || (GET_CODE (addr) == UNSPEC &&
3444 (XINT (addr, 1) == UNSPEC_GOTENT
3445 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3446 && GET_CODE (addend) == CONST_INT)
3447 {
3448 /* This can be locally addressed. */
3449
3450 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3451 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3452 gen_rtx_CONST (Pmode, addr) : addr);
3453
3454 if (TARGET_CPU_ZARCH
3455 && larl_operand (const_addr, VOIDmode)
3456 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3457 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3458 {
3459 if (INTVAL (addend) & 1)
3460 {
3461 /* LARL can't handle odd offsets, so emit a pair of LARL
3462 and LA. */
3463 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3464
3465 if (!DISP_IN_RANGE (INTVAL (addend)))
3466 {
3467 HOST_WIDE_INT even = INTVAL (addend) - 1;
3468 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3469 addr = gen_rtx_CONST (Pmode, addr);
3470 addend = const1_rtx;
3471 }
3472
3473 emit_move_insn (temp, addr);
3474 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3475
3476 if (reg != 0)
3477 {
3478 s390_load_address (reg, new_rtx);
3479 new_rtx = reg;
3480 }
3481 }
3482 else
3483 {
3484 /* If the offset is even, we can just use LARL. This
3485 will happen automatically. */
3486 }
3487 }
3488 else
3489 {
3490 /* No larl - Access local symbols relative to the GOT. */
3491
3492 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3493
3494 if (reload_in_progress || reload_completed)
3495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3496
3497 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3498 if (addend != const0_rtx)
3499 addr = gen_rtx_PLUS (Pmode, addr, addend);
3500 addr = gen_rtx_CONST (Pmode, addr);
3501 addr = force_const_mem (Pmode, addr);
3502 emit_move_insn (temp, addr);
3503
3504 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3505 if (reg != 0)
3506 {
3507 s390_load_address (reg, new_rtx);
3508 new_rtx = reg;
3509 }
3510 }
3511 }
3512 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3513 {
3514 /* A non-local symbol reference without addend.
3515
3516 The symbol ref is wrapped into an UNSPEC to make sure the
3517 proper operand modifier (@GOT or @GOTENT) will be emitted.
3518 This will tell the linker to put the symbol into the GOT.
3519
3520 Additionally the code dereferencing the GOT slot is emitted here.
3521
3522 An addend to the symref needs to be added afterwards.
3523 legitimize_pic_address calls itself recursively to handle
3524 that case. So no need to do it here. */
3525
3526 if (reg == 0)
3527 reg = gen_reg_rtx (Pmode);
3528
3529 if (TARGET_Z10)
3530 {
3531 /* Use load relative if possible.
3532 lgrl <target>, sym@GOTENT */
3533 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3534 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3535 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3536
3537 emit_move_insn (reg, new_rtx);
3538 new_rtx = reg;
3539 }
3540 else if (flag_pic == 1)
3541 {
3542 /* Assume GOT offset is a valid displacement operand (< 4k
3543 or < 512k with z990). This is handled the same way in
3544 both 31- and 64-bit code (@GOT).
3545 lg <target>, sym@GOT(r12) */
3546
3547 if (reload_in_progress || reload_completed)
3548 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3549
3550 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3551 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3552 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3553 new_rtx = gen_const_mem (Pmode, new_rtx);
3554 emit_move_insn (reg, new_rtx);
3555 new_rtx = reg;
3556 }
3557 else if (TARGET_CPU_ZARCH)
3558 {
3559 /* If the GOT offset might be >= 4k, we determine the position
3560 of the GOT entry via a PC-relative LARL (@GOTENT).
3561 larl temp, sym@GOTENT
3562 lg <target>, 0(temp) */
3563
3564 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3565
3566 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3567 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3568
3569 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3570 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3571 emit_move_insn (temp, new_rtx);
3572
3573 new_rtx = gen_const_mem (Pmode, temp);
3574 emit_move_insn (reg, new_rtx);
3575
3576 new_rtx = reg;
3577 }
3578 else
3579 {
3580 /* If the GOT offset might be >= 4k, we have to load it
3581 from the literal pool (@GOT).
3582
3583 lg temp, lit-litbase(r13)
3584 lg <target>, 0(temp)
3585 lit: .long sym@GOT */
3586
3587 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3588
3589 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3590 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3591
3592 if (reload_in_progress || reload_completed)
3593 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3594
3595 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3596 addr = gen_rtx_CONST (Pmode, addr);
3597 addr = force_const_mem (Pmode, addr);
3598 emit_move_insn (temp, addr);
3599
3600 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3601 new_rtx = gen_const_mem (Pmode, new_rtx);
3602 emit_move_insn (reg, new_rtx);
3603 new_rtx = reg;
3604 }
3605 }
3606 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3607 {
3608 gcc_assert (XVECLEN (addr, 0) == 1);
3609 switch (XINT (addr, 1))
3610 {
3611 /* These address symbols (or PLT slots) relative to the GOT
3612 (not GOT slots!). In general this will exceed the
3613 displacement range so these value belong into the literal
3614 pool. */
3615 case UNSPEC_GOTOFF:
3616 case UNSPEC_PLTOFF:
3617 new_rtx = force_const_mem (Pmode, orig);
3618 break;
3619
3620 /* For -fPIC the GOT size might exceed the displacement
3621 range so make sure the value is in the literal pool. */
3622 case UNSPEC_GOT:
3623 if (flag_pic == 2)
3624 new_rtx = force_const_mem (Pmode, orig);
3625 break;
3626
3627 /* For @GOTENT larl is used. This is handled like local
3628 symbol refs. */
3629 case UNSPEC_GOTENT:
3630 gcc_unreachable ();
3631 break;
3632
3633 /* @PLT is OK as is on 64-bit, must be converted to
3634 GOT-relative @PLTOFF on 31-bit. */
3635 case UNSPEC_PLT:
3636 if (!TARGET_CPU_ZARCH)
3637 {
3638 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3639
3640 if (reload_in_progress || reload_completed)
3641 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3642
3643 addr = XVECEXP (addr, 0, 0);
3644 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3645 UNSPEC_PLTOFF);
3646 if (addend != const0_rtx)
3647 addr = gen_rtx_PLUS (Pmode, addr, addend);
3648 addr = gen_rtx_CONST (Pmode, addr);
3649 addr = force_const_mem (Pmode, addr);
3650 emit_move_insn (temp, addr);
3651
3652 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3653 if (reg != 0)
3654 {
3655 s390_load_address (reg, new_rtx);
3656 new_rtx = reg;
3657 }
3658 }
3659 else
3660 /* On 64 bit larl can be used. This case is handled like
3661 local symbol refs. */
3662 gcc_unreachable ();
3663 break;
3664
3665 /* Everything else cannot happen. */
3666 default:
3667 gcc_unreachable ();
3668 }
3669 }
3670 else if (addend != const0_rtx)
3671 {
3672 /* Otherwise, compute the sum. */
3673
3674 rtx base = legitimize_pic_address (addr, reg);
3675 new_rtx = legitimize_pic_address (addend,
3676 base == reg ? NULL_RTX : reg);
3677 if (GET_CODE (new_rtx) == CONST_INT)
3678 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3679 else
3680 {
3681 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3682 {
3683 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3684 new_rtx = XEXP (new_rtx, 1);
3685 }
3686 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3687 }
3688
3689 if (GET_CODE (new_rtx) == CONST)
3690 new_rtx = XEXP (new_rtx, 0);
3691 new_rtx = force_operand (new_rtx, 0);
3692 }
3693
3694 return new_rtx;
3695 }
3696
3697 /* Load the thread pointer into a register. */
3698
3699 rtx
3700 s390_get_thread_pointer (void)
3701 {
3702 rtx tp = gen_reg_rtx (Pmode);
3703
3704 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3705 mark_reg_pointer (tp, BITS_PER_WORD);
3706
3707 return tp;
3708 }
3709
3710 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3711 in s390_tls_symbol which always refers to __tls_get_offset.
3712 The returned offset is written to RESULT_REG and an USE rtx is
3713 generated for TLS_CALL. */
3714
3715 static GTY(()) rtx s390_tls_symbol;
3716
3717 static void
3718 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3719 {
3720 rtx insn;
3721
3722 if (!flag_pic)
3723 emit_insn (s390_load_got ());
3724
3725 if (!s390_tls_symbol)
3726 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3727
3728 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3729 gen_rtx_REG (Pmode, RETURN_REGNUM));
3730
3731 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3732 RTL_CONST_CALL_P (insn) = 1;
3733 }
3734
3735 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3736 this (thread-local) address. REG may be used as temporary. */
3737
3738 static rtx
3739 legitimize_tls_address (rtx addr, rtx reg)
3740 {
3741 rtx new_rtx, tls_call, temp, base, r2, insn;
3742
3743 if (GET_CODE (addr) == SYMBOL_REF)
3744 switch (tls_symbolic_operand (addr))
3745 {
3746 case TLS_MODEL_GLOBAL_DYNAMIC:
3747 start_sequence ();
3748 r2 = gen_rtx_REG (Pmode, 2);
3749 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3750 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3751 new_rtx = force_const_mem (Pmode, new_rtx);
3752 emit_move_insn (r2, new_rtx);
3753 s390_emit_tls_call_insn (r2, tls_call);
3754 insn = get_insns ();
3755 end_sequence ();
3756
3757 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3758 temp = gen_reg_rtx (Pmode);
3759 emit_libcall_block (insn, temp, r2, new_rtx);
3760
3761 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3762 if (reg != 0)
3763 {
3764 s390_load_address (reg, new_rtx);
3765 new_rtx = reg;
3766 }
3767 break;
3768
3769 case TLS_MODEL_LOCAL_DYNAMIC:
3770 start_sequence ();
3771 r2 = gen_rtx_REG (Pmode, 2);
3772 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3773 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3774 new_rtx = force_const_mem (Pmode, new_rtx);
3775 emit_move_insn (r2, new_rtx);
3776 s390_emit_tls_call_insn (r2, tls_call);
3777 insn = get_insns ();
3778 end_sequence ();
3779
3780 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3781 temp = gen_reg_rtx (Pmode);
3782 emit_libcall_block (insn, temp, r2, new_rtx);
3783
3784 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3785 base = gen_reg_rtx (Pmode);
3786 s390_load_address (base, new_rtx);
3787
3788 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3789 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3790 new_rtx = force_const_mem (Pmode, new_rtx);
3791 temp = gen_reg_rtx (Pmode);
3792 emit_move_insn (temp, new_rtx);
3793
3794 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3795 if (reg != 0)
3796 {
3797 s390_load_address (reg, new_rtx);
3798 new_rtx = reg;
3799 }
3800 break;
3801
3802 case TLS_MODEL_INITIAL_EXEC:
3803 if (flag_pic == 1)
3804 {
3805 /* Assume GOT offset < 4k. This is handled the same way
3806 in both 31- and 64-bit code. */
3807
3808 if (reload_in_progress || reload_completed)
3809 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3810
3811 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3812 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3813 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3814 new_rtx = gen_const_mem (Pmode, new_rtx);
3815 temp = gen_reg_rtx (Pmode);
3816 emit_move_insn (temp, new_rtx);
3817 }
3818 else if (TARGET_CPU_ZARCH)
3819 {
3820 /* If the GOT offset might be >= 4k, we determine the position
3821 of the GOT entry via a PC-relative LARL. */
3822
3823 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3824 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3825 temp = gen_reg_rtx (Pmode);
3826 emit_move_insn (temp, new_rtx);
3827
3828 new_rtx = gen_const_mem (Pmode, temp);
3829 temp = gen_reg_rtx (Pmode);
3830 emit_move_insn (temp, new_rtx);
3831 }
3832 else if (flag_pic)
3833 {
3834 /* If the GOT offset might be >= 4k, we have to load it
3835 from the literal pool. */
3836
3837 if (reload_in_progress || reload_completed)
3838 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3839
3840 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3841 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3842 new_rtx = force_const_mem (Pmode, new_rtx);
3843 temp = gen_reg_rtx (Pmode);
3844 emit_move_insn (temp, new_rtx);
3845
3846 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3847 new_rtx = gen_const_mem (Pmode, new_rtx);
3848
3849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3850 temp = gen_reg_rtx (Pmode);
3851 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3852 }
3853 else
3854 {
3855 /* In position-dependent code, load the absolute address of
3856 the GOT entry from the literal pool. */
3857
3858 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3859 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3860 new_rtx = force_const_mem (Pmode, new_rtx);
3861 temp = gen_reg_rtx (Pmode);
3862 emit_move_insn (temp, new_rtx);
3863
3864 new_rtx = temp;
3865 new_rtx = gen_const_mem (Pmode, new_rtx);
3866 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3867 temp = gen_reg_rtx (Pmode);
3868 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3869 }
3870
3871 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3872 if (reg != 0)
3873 {
3874 s390_load_address (reg, new_rtx);
3875 new_rtx = reg;
3876 }
3877 break;
3878
3879 case TLS_MODEL_LOCAL_EXEC:
3880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3881 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3882 new_rtx = force_const_mem (Pmode, new_rtx);
3883 temp = gen_reg_rtx (Pmode);
3884 emit_move_insn (temp, new_rtx);
3885
3886 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3887 if (reg != 0)
3888 {
3889 s390_load_address (reg, new_rtx);
3890 new_rtx = reg;
3891 }
3892 break;
3893
3894 default:
3895 gcc_unreachable ();
3896 }
3897
3898 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3899 {
3900 switch (XINT (XEXP (addr, 0), 1))
3901 {
3902 case UNSPEC_INDNTPOFF:
3903 gcc_assert (TARGET_CPU_ZARCH);
3904 new_rtx = addr;
3905 break;
3906
3907 default:
3908 gcc_unreachable ();
3909 }
3910 }
3911
3912 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3913 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3914 {
3915 new_rtx = XEXP (XEXP (addr, 0), 0);
3916 if (GET_CODE (new_rtx) != SYMBOL_REF)
3917 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3918
3919 new_rtx = legitimize_tls_address (new_rtx, reg);
3920 new_rtx = plus_constant (Pmode, new_rtx,
3921 INTVAL (XEXP (XEXP (addr, 0), 1)));
3922 new_rtx = force_operand (new_rtx, 0);
3923 }
3924
3925 else
3926 gcc_unreachable (); /* for now ... */
3927
3928 return new_rtx;
3929 }
3930
3931 /* Emit insns making the address in operands[1] valid for a standard
3932 move to operands[0]. operands[1] is replaced by an address which
3933 should be used instead of the former RTX to emit the move
3934 pattern. */
3935
3936 void
3937 emit_symbolic_move (rtx *operands)
3938 {
3939 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3940
3941 if (GET_CODE (operands[0]) == MEM)
3942 operands[1] = force_reg (Pmode, operands[1]);
3943 else if (TLS_SYMBOLIC_CONST (operands[1]))
3944 operands[1] = legitimize_tls_address (operands[1], temp);
3945 else if (flag_pic)
3946 operands[1] = legitimize_pic_address (operands[1], temp);
3947 }
3948
3949 /* Try machine-dependent ways of modifying an illegitimate address X
3950 to be legitimate. If we find one, return the new, valid address.
3951
3952 OLDX is the address as it was before break_out_memory_refs was called.
3953 In some cases it is useful to look at this to decide what needs to be done.
3954
3955 MODE is the mode of the operand pointed to by X.
3956
3957 When -fpic is used, special handling is needed for symbolic references.
3958 See comments by legitimize_pic_address for details. */
3959
3960 static rtx
3961 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3962 enum machine_mode mode ATTRIBUTE_UNUSED)
3963 {
3964 rtx constant_term = const0_rtx;
3965
3966 if (TLS_SYMBOLIC_CONST (x))
3967 {
3968 x = legitimize_tls_address (x, 0);
3969
3970 if (s390_legitimate_address_p (mode, x, FALSE))
3971 return x;
3972 }
3973 else if (GET_CODE (x) == PLUS
3974 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3975 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3976 {
3977 return x;
3978 }
3979 else if (flag_pic)
3980 {
3981 if (SYMBOLIC_CONST (x)
3982 || (GET_CODE (x) == PLUS
3983 && (SYMBOLIC_CONST (XEXP (x, 0))
3984 || SYMBOLIC_CONST (XEXP (x, 1)))))
3985 x = legitimize_pic_address (x, 0);
3986
3987 if (s390_legitimate_address_p (mode, x, FALSE))
3988 return x;
3989 }
3990
3991 x = eliminate_constant_term (x, &constant_term);
3992
3993 /* Optimize loading of large displacements by splitting them
3994 into the multiple of 4K and the rest; this allows the
3995 former to be CSE'd if possible.
3996
3997 Don't do this if the displacement is added to a register
3998 pointing into the stack frame, as the offsets will
3999 change later anyway. */
4000
4001 if (GET_CODE (constant_term) == CONST_INT
4002 && !TARGET_LONG_DISPLACEMENT
4003 && !DISP_IN_RANGE (INTVAL (constant_term))
4004 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4005 {
4006 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4007 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4008
4009 rtx temp = gen_reg_rtx (Pmode);
4010 rtx val = force_operand (GEN_INT (upper), temp);
4011 if (val != temp)
4012 emit_move_insn (temp, val);
4013
4014 x = gen_rtx_PLUS (Pmode, x, temp);
4015 constant_term = GEN_INT (lower);
4016 }
4017
4018 if (GET_CODE (x) == PLUS)
4019 {
4020 if (GET_CODE (XEXP (x, 0)) == REG)
4021 {
4022 rtx temp = gen_reg_rtx (Pmode);
4023 rtx val = force_operand (XEXP (x, 1), temp);
4024 if (val != temp)
4025 emit_move_insn (temp, val);
4026
4027 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4028 }
4029
4030 else if (GET_CODE (XEXP (x, 1)) == REG)
4031 {
4032 rtx temp = gen_reg_rtx (Pmode);
4033 rtx val = force_operand (XEXP (x, 0), temp);
4034 if (val != temp)
4035 emit_move_insn (temp, val);
4036
4037 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4038 }
4039 }
4040
4041 if (constant_term != const0_rtx)
4042 x = gen_rtx_PLUS (Pmode, x, constant_term);
4043
4044 return x;
4045 }
4046
4047 /* Try a machine-dependent way of reloading an illegitimate address AD
4048 operand. If we find one, push the reload and return the new address.
4049
4050 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4051 and TYPE is the reload type of the current reload. */
4052
4053 rtx
4054 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4055 int opnum, int type)
4056 {
4057 if (!optimize || TARGET_LONG_DISPLACEMENT)
4058 return NULL_RTX;
4059
4060 if (GET_CODE (ad) == PLUS)
4061 {
4062 rtx tem = simplify_binary_operation (PLUS, Pmode,
4063 XEXP (ad, 0), XEXP (ad, 1));
4064 if (tem)
4065 ad = tem;
4066 }
4067
4068 if (GET_CODE (ad) == PLUS
4069 && GET_CODE (XEXP (ad, 0)) == REG
4070 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4071 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4072 {
4073 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4074 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4075 rtx cst, tem, new_rtx;
4076
4077 cst = GEN_INT (upper);
4078 if (!legitimate_reload_constant_p (cst))
4079 cst = force_const_mem (Pmode, cst);
4080
4081 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4082 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4083
4084 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4085 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4086 opnum, (enum reload_type) type);
4087 return new_rtx;
4088 }
4089
4090 return NULL_RTX;
4091 }
4092
4093 /* Emit code to move LEN bytes from DST to SRC. */
4094
4095 bool
4096 s390_expand_movmem (rtx dst, rtx src, rtx len)
4097 {
4098 /* When tuning for z10 or higher we rely on the Glibc functions to
4099 do the right thing. Only for constant lengths below 64k we will
4100 generate inline code. */
4101 if (s390_tune >= PROCESSOR_2097_Z10
4102 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4103 return false;
4104
4105 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4106 {
4107 if (INTVAL (len) > 0)
4108 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4109 }
4110
4111 else if (TARGET_MVCLE)
4112 {
4113 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4114 }
4115
4116 else
4117 {
4118 rtx dst_addr, src_addr, count, blocks, temp;
4119 rtx loop_start_label = gen_label_rtx ();
4120 rtx loop_end_label = gen_label_rtx ();
4121 rtx end_label = gen_label_rtx ();
4122 enum machine_mode mode;
4123
4124 mode = GET_MODE (len);
4125 if (mode == VOIDmode)
4126 mode = Pmode;
4127
4128 dst_addr = gen_reg_rtx (Pmode);
4129 src_addr = gen_reg_rtx (Pmode);
4130 count = gen_reg_rtx (mode);
4131 blocks = gen_reg_rtx (mode);
4132
4133 convert_move (count, len, 1);
4134 emit_cmp_and_jump_insns (count, const0_rtx,
4135 EQ, NULL_RTX, mode, 1, end_label);
4136
4137 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4138 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4139 dst = change_address (dst, VOIDmode, dst_addr);
4140 src = change_address (src, VOIDmode, src_addr);
4141
4142 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4143 OPTAB_DIRECT);
4144 if (temp != count)
4145 emit_move_insn (count, temp);
4146
4147 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4148 OPTAB_DIRECT);
4149 if (temp != blocks)
4150 emit_move_insn (blocks, temp);
4151
4152 emit_cmp_and_jump_insns (blocks, const0_rtx,
4153 EQ, NULL_RTX, mode, 1, loop_end_label);
4154
4155 emit_label (loop_start_label);
4156
4157 if (TARGET_Z10
4158 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4159 {
4160 rtx prefetch;
4161
4162 /* Issue a read prefetch for the +3 cache line. */
4163 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4164 const0_rtx, const0_rtx);
4165 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4166 emit_insn (prefetch);
4167
4168 /* Issue a write prefetch for the +3 cache line. */
4169 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4170 const1_rtx, const0_rtx);
4171 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4172 emit_insn (prefetch);
4173 }
4174
4175 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4176 s390_load_address (dst_addr,
4177 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4178 s390_load_address (src_addr,
4179 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4180
4181 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4182 OPTAB_DIRECT);
4183 if (temp != blocks)
4184 emit_move_insn (blocks, temp);
4185
4186 emit_cmp_and_jump_insns (blocks, const0_rtx,
4187 EQ, NULL_RTX, mode, 1, loop_end_label);
4188
4189 emit_jump (loop_start_label);
4190 emit_label (loop_end_label);
4191
4192 emit_insn (gen_movmem_short (dst, src,
4193 convert_to_mode (Pmode, count, 1)));
4194 emit_label (end_label);
4195 }
4196 return true;
4197 }
4198
4199 /* Emit code to set LEN bytes at DST to VAL.
4200 Make use of clrmem if VAL is zero. */
4201
4202 void
4203 s390_expand_setmem (rtx dst, rtx len, rtx val)
4204 {
4205 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4206 return;
4207
4208 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4209
4210 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4211 {
4212 if (val == const0_rtx && INTVAL (len) <= 256)
4213 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4214 else
4215 {
4216 /* Initialize memory by storing the first byte. */
4217 emit_move_insn (adjust_address (dst, QImode, 0), val);
4218
4219 if (INTVAL (len) > 1)
4220 {
4221 /* Initiate 1 byte overlap move.
4222 The first byte of DST is propagated through DSTP1.
4223 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4224 DST is set to size 1 so the rest of the memory location
4225 does not count as source operand. */
4226 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4227 set_mem_size (dst, 1);
4228
4229 emit_insn (gen_movmem_short (dstp1, dst,
4230 GEN_INT (INTVAL (len) - 2)));
4231 }
4232 }
4233 }
4234
4235 else if (TARGET_MVCLE)
4236 {
4237 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4238 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4239 }
4240
4241 else
4242 {
4243 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4244 rtx loop_start_label = gen_label_rtx ();
4245 rtx loop_end_label = gen_label_rtx ();
4246 rtx end_label = gen_label_rtx ();
4247 enum machine_mode mode;
4248
4249 mode = GET_MODE (len);
4250 if (mode == VOIDmode)
4251 mode = Pmode;
4252
4253 dst_addr = gen_reg_rtx (Pmode);
4254 count = gen_reg_rtx (mode);
4255 blocks = gen_reg_rtx (mode);
4256
4257 convert_move (count, len, 1);
4258 emit_cmp_and_jump_insns (count, const0_rtx,
4259 EQ, NULL_RTX, mode, 1, end_label);
4260
4261 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4262 dst = change_address (dst, VOIDmode, dst_addr);
4263
4264 if (val == const0_rtx)
4265 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4266 OPTAB_DIRECT);
4267 else
4268 {
4269 dstp1 = adjust_address (dst, VOIDmode, 1);
4270 set_mem_size (dst, 1);
4271
4272 /* Initialize memory by storing the first byte. */
4273 emit_move_insn (adjust_address (dst, QImode, 0), val);
4274
4275 /* If count is 1 we are done. */
4276 emit_cmp_and_jump_insns (count, const1_rtx,
4277 EQ, NULL_RTX, mode, 1, end_label);
4278
4279 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4280 OPTAB_DIRECT);
4281 }
4282 if (temp != count)
4283 emit_move_insn (count, temp);
4284
4285 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4286 OPTAB_DIRECT);
4287 if (temp != blocks)
4288 emit_move_insn (blocks, temp);
4289
4290 emit_cmp_and_jump_insns (blocks, const0_rtx,
4291 EQ, NULL_RTX, mode, 1, loop_end_label);
4292
4293 emit_label (loop_start_label);
4294
4295 if (TARGET_Z10
4296 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4297 {
4298 /* Issue a write prefetch for the +4 cache line. */
4299 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4300 GEN_INT (1024)),
4301 const1_rtx, const0_rtx);
4302 emit_insn (prefetch);
4303 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4304 }
4305
4306 if (val == const0_rtx)
4307 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4308 else
4309 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4310 s390_load_address (dst_addr,
4311 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4312
4313 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4314 OPTAB_DIRECT);
4315 if (temp != blocks)
4316 emit_move_insn (blocks, temp);
4317
4318 emit_cmp_and_jump_insns (blocks, const0_rtx,
4319 EQ, NULL_RTX, mode, 1, loop_end_label);
4320
4321 emit_jump (loop_start_label);
4322 emit_label (loop_end_label);
4323
4324 if (val == const0_rtx)
4325 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4326 else
4327 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4328 emit_label (end_label);
4329 }
4330 }
4331
4332 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4333 and return the result in TARGET. */
4334
4335 bool
4336 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4337 {
4338 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4339 rtx tmp;
4340
4341 /* When tuning for z10 or higher we rely on the Glibc functions to
4342 do the right thing. Only for constant lengths below 64k we will
4343 generate inline code. */
4344 if (s390_tune >= PROCESSOR_2097_Z10
4345 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4346 return false;
4347
4348 /* As the result of CMPINT is inverted compared to what we need,
4349 we have to swap the operands. */
4350 tmp = op0; op0 = op1; op1 = tmp;
4351
4352 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4353 {
4354 if (INTVAL (len) > 0)
4355 {
4356 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4357 emit_insn (gen_cmpint (target, ccreg));
4358 }
4359 else
4360 emit_move_insn (target, const0_rtx);
4361 }
4362 else if (TARGET_MVCLE)
4363 {
4364 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4365 emit_insn (gen_cmpint (target, ccreg));
4366 }
4367 else
4368 {
4369 rtx addr0, addr1, count, blocks, temp;
4370 rtx loop_start_label = gen_label_rtx ();
4371 rtx loop_end_label = gen_label_rtx ();
4372 rtx end_label = gen_label_rtx ();
4373 enum machine_mode mode;
4374
4375 mode = GET_MODE (len);
4376 if (mode == VOIDmode)
4377 mode = Pmode;
4378
4379 addr0 = gen_reg_rtx (Pmode);
4380 addr1 = gen_reg_rtx (Pmode);
4381 count = gen_reg_rtx (mode);
4382 blocks = gen_reg_rtx (mode);
4383
4384 convert_move (count, len, 1);
4385 emit_cmp_and_jump_insns (count, const0_rtx,
4386 EQ, NULL_RTX, mode, 1, end_label);
4387
4388 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4389 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4390 op0 = change_address (op0, VOIDmode, addr0);
4391 op1 = change_address (op1, VOIDmode, addr1);
4392
4393 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4394 OPTAB_DIRECT);
4395 if (temp != count)
4396 emit_move_insn (count, temp);
4397
4398 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4399 OPTAB_DIRECT);
4400 if (temp != blocks)
4401 emit_move_insn (blocks, temp);
4402
4403 emit_cmp_and_jump_insns (blocks, const0_rtx,
4404 EQ, NULL_RTX, mode, 1, loop_end_label);
4405
4406 emit_label (loop_start_label);
4407
4408 if (TARGET_Z10
4409 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4410 {
4411 rtx prefetch;
4412
4413 /* Issue a read prefetch for the +2 cache line of operand 1. */
4414 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4415 const0_rtx, const0_rtx);
4416 emit_insn (prefetch);
4417 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4418
4419 /* Issue a read prefetch for the +2 cache line of operand 2. */
4420 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4421 const0_rtx, const0_rtx);
4422 emit_insn (prefetch);
4423 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4424 }
4425
4426 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4427 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4428 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4429 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4430 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4431 emit_jump_insn (temp);
4432
4433 s390_load_address (addr0,
4434 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4435 s390_load_address (addr1,
4436 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4437
4438 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4439 OPTAB_DIRECT);
4440 if (temp != blocks)
4441 emit_move_insn (blocks, temp);
4442
4443 emit_cmp_and_jump_insns (blocks, const0_rtx,
4444 EQ, NULL_RTX, mode, 1, loop_end_label);
4445
4446 emit_jump (loop_start_label);
4447 emit_label (loop_end_label);
4448
4449 emit_insn (gen_cmpmem_short (op0, op1,
4450 convert_to_mode (Pmode, count, 1)));
4451 emit_label (end_label);
4452
4453 emit_insn (gen_cmpint (target, ccreg));
4454 }
4455 return true;
4456 }
4457
4458
4459 /* Expand conditional increment or decrement using alc/slb instructions.
4460 Should generate code setting DST to either SRC or SRC + INCREMENT,
4461 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4462 Returns true if successful, false otherwise.
4463
4464 That makes it possible to implement some if-constructs without jumps e.g.:
4465 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4466 unsigned int a, b, c;
4467 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4468 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4469 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4470 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4471
4472 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4473 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4474 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4475 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4476 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4477
4478 bool
4479 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4480 rtx dst, rtx src, rtx increment)
4481 {
4482 enum machine_mode cmp_mode;
4483 enum machine_mode cc_mode;
4484 rtx op_res;
4485 rtx insn;
4486 rtvec p;
4487 int ret;
4488
4489 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4490 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4491 cmp_mode = SImode;
4492 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4493 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4494 cmp_mode = DImode;
4495 else
4496 return false;
4497
4498 /* Try ADD LOGICAL WITH CARRY. */
4499 if (increment == const1_rtx)
4500 {
4501 /* Determine CC mode to use. */
4502 if (cmp_code == EQ || cmp_code == NE)
4503 {
4504 if (cmp_op1 != const0_rtx)
4505 {
4506 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4507 NULL_RTX, 0, OPTAB_WIDEN);
4508 cmp_op1 = const0_rtx;
4509 }
4510
4511 cmp_code = cmp_code == EQ ? LEU : GTU;
4512 }
4513
4514 if (cmp_code == LTU || cmp_code == LEU)
4515 {
4516 rtx tem = cmp_op0;
4517 cmp_op0 = cmp_op1;
4518 cmp_op1 = tem;
4519 cmp_code = swap_condition (cmp_code);
4520 }
4521
4522 switch (cmp_code)
4523 {
4524 case GTU:
4525 cc_mode = CCUmode;
4526 break;
4527
4528 case GEU:
4529 cc_mode = CCL3mode;
4530 break;
4531
4532 default:
4533 return false;
4534 }
4535
4536 /* Emit comparison instruction pattern. */
4537 if (!register_operand (cmp_op0, cmp_mode))
4538 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4539
4540 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4541 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4542 /* We use insn_invalid_p here to add clobbers if required. */
4543 ret = insn_invalid_p (emit_insn (insn), false);
4544 gcc_assert (!ret);
4545
4546 /* Emit ALC instruction pattern. */
4547 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4548 gen_rtx_REG (cc_mode, CC_REGNUM),
4549 const0_rtx);
4550
4551 if (src != const0_rtx)
4552 {
4553 if (!register_operand (src, GET_MODE (dst)))
4554 src = force_reg (GET_MODE (dst), src);
4555
4556 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4557 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4558 }
4559
4560 p = rtvec_alloc (2);
4561 RTVEC_ELT (p, 0) =
4562 gen_rtx_SET (VOIDmode, dst, op_res);
4563 RTVEC_ELT (p, 1) =
4564 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4565 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4566
4567 return true;
4568 }
4569
4570 /* Try SUBTRACT LOGICAL WITH BORROW. */
4571 if (increment == constm1_rtx)
4572 {
4573 /* Determine CC mode to use. */
4574 if (cmp_code == EQ || cmp_code == NE)
4575 {
4576 if (cmp_op1 != const0_rtx)
4577 {
4578 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4579 NULL_RTX, 0, OPTAB_WIDEN);
4580 cmp_op1 = const0_rtx;
4581 }
4582
4583 cmp_code = cmp_code == EQ ? LEU : GTU;
4584 }
4585
4586 if (cmp_code == GTU || cmp_code == GEU)
4587 {
4588 rtx tem = cmp_op0;
4589 cmp_op0 = cmp_op1;
4590 cmp_op1 = tem;
4591 cmp_code = swap_condition (cmp_code);
4592 }
4593
4594 switch (cmp_code)
4595 {
4596 case LEU:
4597 cc_mode = CCUmode;
4598 break;
4599
4600 case LTU:
4601 cc_mode = CCL3mode;
4602 break;
4603
4604 default:
4605 return false;
4606 }
4607
4608 /* Emit comparison instruction pattern. */
4609 if (!register_operand (cmp_op0, cmp_mode))
4610 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4611
4612 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4613 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4614 /* We use insn_invalid_p here to add clobbers if required. */
4615 ret = insn_invalid_p (emit_insn (insn), false);
4616 gcc_assert (!ret);
4617
4618 /* Emit SLB instruction pattern. */
4619 if (!register_operand (src, GET_MODE (dst)))
4620 src = force_reg (GET_MODE (dst), src);
4621
4622 op_res = gen_rtx_MINUS (GET_MODE (dst),
4623 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4624 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4625 gen_rtx_REG (cc_mode, CC_REGNUM),
4626 const0_rtx));
4627 p = rtvec_alloc (2);
4628 RTVEC_ELT (p, 0) =
4629 gen_rtx_SET (VOIDmode, dst, op_res);
4630 RTVEC_ELT (p, 1) =
4631 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4632 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4633
4634 return true;
4635 }
4636
4637 return false;
4638 }
4639
4640 /* Expand code for the insv template. Return true if successful. */
4641
4642 bool
4643 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4644 {
4645 int bitsize = INTVAL (op1);
4646 int bitpos = INTVAL (op2);
4647 enum machine_mode mode = GET_MODE (dest);
4648 enum machine_mode smode;
4649 int smode_bsize, mode_bsize;
4650 rtx op, clobber;
4651
4652 if (bitsize + bitpos > GET_MODE_SIZE (mode))
4653 return false;
4654
4655 /* Generate INSERT IMMEDIATE (IILL et al). */
4656 /* (set (ze (reg)) (const_int)). */
4657 if (TARGET_ZARCH
4658 && register_operand (dest, word_mode)
4659 && (bitpos % 16) == 0
4660 && (bitsize % 16) == 0
4661 && const_int_operand (src, VOIDmode))
4662 {
4663 HOST_WIDE_INT val = INTVAL (src);
4664 int regpos = bitpos + bitsize;
4665
4666 while (regpos > bitpos)
4667 {
4668 enum machine_mode putmode;
4669 int putsize;
4670
4671 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4672 putmode = SImode;
4673 else
4674 putmode = HImode;
4675
4676 putsize = GET_MODE_BITSIZE (putmode);
4677 regpos -= putsize;
4678 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4679 GEN_INT (putsize),
4680 GEN_INT (regpos)),
4681 gen_int_mode (val, putmode));
4682 val >>= putsize;
4683 }
4684 gcc_assert (regpos == bitpos);
4685 return true;
4686 }
4687
4688 smode = smallest_mode_for_size (bitsize, MODE_INT);
4689 smode_bsize = GET_MODE_BITSIZE (smode);
4690 mode_bsize = GET_MODE_BITSIZE (mode);
4691
4692 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4693 if (bitpos == 0
4694 && (bitsize % BITS_PER_UNIT) == 0
4695 && MEM_P (dest)
4696 && (register_operand (src, word_mode)
4697 || const_int_operand (src, VOIDmode)))
4698 {
4699 /* Emit standard pattern if possible. */
4700 if (smode_bsize == bitsize)
4701 {
4702 emit_move_insn (adjust_address (dest, smode, 0),
4703 gen_lowpart (smode, src));
4704 return true;
4705 }
4706
4707 /* (set (ze (mem)) (const_int)). */
4708 else if (const_int_operand (src, VOIDmode))
4709 {
4710 int size = bitsize / BITS_PER_UNIT;
4711 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4712 BLKmode,
4713 UNITS_PER_WORD - size);
4714
4715 dest = adjust_address (dest, BLKmode, 0);
4716 set_mem_size (dest, size);
4717 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4718 return true;
4719 }
4720
4721 /* (set (ze (mem)) (reg)). */
4722 else if (register_operand (src, word_mode))
4723 {
4724 if (bitsize <= 32)
4725 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4726 const0_rtx), src);
4727 else
4728 {
4729 /* Emit st,stcmh sequence. */
4730 int stcmh_width = bitsize - 32;
4731 int size = stcmh_width / BITS_PER_UNIT;
4732
4733 emit_move_insn (adjust_address (dest, SImode, size),
4734 gen_lowpart (SImode, src));
4735 set_mem_size (dest, size);
4736 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4737 GEN_INT (stcmh_width),
4738 const0_rtx),
4739 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4740 }
4741 return true;
4742 }
4743 }
4744
4745 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4746 if ((bitpos % BITS_PER_UNIT) == 0
4747 && (bitsize % BITS_PER_UNIT) == 0
4748 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4749 && MEM_P (src)
4750 && (mode == DImode || mode == SImode)
4751 && register_operand (dest, mode))
4752 {
4753 /* Emit a strict_low_part pattern if possible. */
4754 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4755 {
4756 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4757 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4758 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4759 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4760 return true;
4761 }
4762
4763 /* ??? There are more powerful versions of ICM that are not
4764 completely represented in the md file. */
4765 }
4766
4767 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4768 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4769 {
4770 enum machine_mode mode_s = GET_MODE (src);
4771
4772 if (mode_s == VOIDmode)
4773 {
4774 /* Assume const_int etc already in the proper mode. */
4775 src = force_reg (mode, src);
4776 }
4777 else if (mode_s != mode)
4778 {
4779 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4780 src = force_reg (mode_s, src);
4781 src = gen_lowpart (mode, src);
4782 }
4783
4784 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4785 op = gen_rtx_SET (VOIDmode, op, src);
4786
4787 if (!TARGET_ZEC12)
4788 {
4789 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4790 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4791 }
4792 emit_insn (op);
4793
4794 return true;
4795 }
4796
4797 return false;
4798 }
4799
4800 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4801 register that holds VAL of mode MODE shifted by COUNT bits. */
4802
4803 static inline rtx
4804 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4805 {
4806 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4807 NULL_RTX, 1, OPTAB_DIRECT);
4808 return expand_simple_binop (SImode, ASHIFT, val, count,
4809 NULL_RTX, 1, OPTAB_DIRECT);
4810 }
4811
4812 /* Structure to hold the initial parameters for a compare_and_swap operation
4813 in HImode and QImode. */
4814
4815 struct alignment_context
4816 {
4817 rtx memsi; /* SI aligned memory location. */
4818 rtx shift; /* Bit offset with regard to lsb. */
4819 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4820 rtx modemaski; /* ~modemask */
4821 bool aligned; /* True if memory is aligned, false else. */
4822 };
4823
4824 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4825 structure AC for transparent simplifying, if the memory alignment is known
4826 to be at least 32bit. MEM is the memory location for the actual operation
4827 and MODE its mode. */
4828
4829 static void
4830 init_alignment_context (struct alignment_context *ac, rtx mem,
4831 enum machine_mode mode)
4832 {
4833 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4834 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4835
4836 if (ac->aligned)
4837 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4838 else
4839 {
4840 /* Alignment is unknown. */
4841 rtx byteoffset, addr, align;
4842
4843 /* Force the address into a register. */
4844 addr = force_reg (Pmode, XEXP (mem, 0));
4845
4846 /* Align it to SImode. */
4847 align = expand_simple_binop (Pmode, AND, addr,
4848 GEN_INT (-GET_MODE_SIZE (SImode)),
4849 NULL_RTX, 1, OPTAB_DIRECT);
4850 /* Generate MEM. */
4851 ac->memsi = gen_rtx_MEM (SImode, align);
4852 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4853 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4854 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4855
4856 /* Calculate shiftcount. */
4857 byteoffset = expand_simple_binop (Pmode, AND, addr,
4858 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4859 NULL_RTX, 1, OPTAB_DIRECT);
4860 /* As we already have some offset, evaluate the remaining distance. */
4861 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4862 NULL_RTX, 1, OPTAB_DIRECT);
4863 }
4864
4865 /* Shift is the byte count, but we need the bitcount. */
4866 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4867 NULL_RTX, 1, OPTAB_DIRECT);
4868
4869 /* Calculate masks. */
4870 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4871 GEN_INT (GET_MODE_MASK (mode)),
4872 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4873 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4874 NULL_RTX, 1);
4875 }
4876
4877 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4878 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4879 perform the merge in SEQ2. */
4880
4881 static rtx
4882 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4883 enum machine_mode mode, rtx val, rtx ins)
4884 {
4885 rtx tmp;
4886
4887 if (ac->aligned)
4888 {
4889 start_sequence ();
4890 tmp = copy_to_mode_reg (SImode, val);
4891 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4892 const0_rtx, ins))
4893 {
4894 *seq1 = NULL;
4895 *seq2 = get_insns ();
4896 end_sequence ();
4897 return tmp;
4898 }
4899 end_sequence ();
4900 }
4901
4902 /* Failed to use insv. Generate a two part shift and mask. */
4903 start_sequence ();
4904 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4905 *seq1 = get_insns ();
4906 end_sequence ();
4907
4908 start_sequence ();
4909 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4910 *seq2 = get_insns ();
4911 end_sequence ();
4912
4913 return tmp;
4914 }
4915
4916 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4917 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4918 value to set if CMP == MEM. */
4919
4920 void
4921 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4922 rtx cmp, rtx new_rtx, bool is_weak)
4923 {
4924 struct alignment_context ac;
4925 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4926 rtx res = gen_reg_rtx (SImode);
4927 rtx csloop = NULL, csend = NULL;
4928
4929 gcc_assert (MEM_P (mem));
4930
4931 init_alignment_context (&ac, mem, mode);
4932
4933 /* Load full word. Subsequent loads are performed by CS. */
4934 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4935 NULL_RTX, 1, OPTAB_DIRECT);
4936
4937 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4938 possible, we try to use insv to make this happen efficiently. If
4939 that fails we'll generate code both inside and outside the loop. */
4940 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4941 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4942
4943 if (seq0)
4944 emit_insn (seq0);
4945 if (seq1)
4946 emit_insn (seq1);
4947
4948 /* Start CS loop. */
4949 if (!is_weak)
4950 {
4951 /* Begin assuming success. */
4952 emit_move_insn (btarget, const1_rtx);
4953
4954 csloop = gen_label_rtx ();
4955 csend = gen_label_rtx ();
4956 emit_label (csloop);
4957 }
4958
4959 /* val = "<mem>00..0<mem>"
4960 * cmp = "00..0<cmp>00..0"
4961 * new = "00..0<new>00..0"
4962 */
4963
4964 emit_insn (seq2);
4965 emit_insn (seq3);
4966
4967 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4968 if (is_weak)
4969 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4970 else
4971 {
4972 rtx tmp;
4973
4974 /* Jump to end if we're done (likely?). */
4975 s390_emit_jump (csend, cc);
4976
4977 /* Check for changes outside mode, and loop internal if so.
4978 Arrange the moves so that the compare is adjacent to the
4979 branch so that we can generate CRJ. */
4980 tmp = copy_to_reg (val);
4981 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4982 1, OPTAB_DIRECT);
4983 cc = s390_emit_compare (NE, val, tmp);
4984 s390_emit_jump (csloop, cc);
4985
4986 /* Failed. */
4987 emit_move_insn (btarget, const0_rtx);
4988 emit_label (csend);
4989 }
4990
4991 /* Return the correct part of the bitfield. */
4992 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4993 NULL_RTX, 1, OPTAB_DIRECT), 1);
4994 }
4995
4996 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4997 and VAL the value to play with. If AFTER is true then store the value
4998 MEM holds after the operation, if AFTER is false then store the value MEM
4999 holds before the operation. If TARGET is zero then discard that value, else
5000 store it to TARGET. */
5001
5002 void
5003 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
5004 rtx target, rtx mem, rtx val, bool after)
5005 {
5006 struct alignment_context ac;
5007 rtx cmp;
5008 rtx new_rtx = gen_reg_rtx (SImode);
5009 rtx orig = gen_reg_rtx (SImode);
5010 rtx csloop = gen_label_rtx ();
5011
5012 gcc_assert (!target || register_operand (target, VOIDmode));
5013 gcc_assert (MEM_P (mem));
5014
5015 init_alignment_context (&ac, mem, mode);
5016
5017 /* Shift val to the correct bit positions.
5018 Preserve "icm", but prevent "ex icm". */
5019 if (!(ac.aligned && code == SET && MEM_P (val)))
5020 val = s390_expand_mask_and_shift (val, mode, ac.shift);
5021
5022 /* Further preparation insns. */
5023 if (code == PLUS || code == MINUS)
5024 emit_move_insn (orig, val);
5025 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
5026 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
5027 NULL_RTX, 1, OPTAB_DIRECT);
5028
5029 /* Load full word. Subsequent loads are performed by CS. */
5030 cmp = force_reg (SImode, ac.memsi);
5031
5032 /* Start CS loop. */
5033 emit_label (csloop);
5034 emit_move_insn (new_rtx, cmp);
5035
5036 /* Patch new with val at correct position. */
5037 switch (code)
5038 {
5039 case PLUS:
5040 case MINUS:
5041 val = expand_simple_binop (SImode, code, new_rtx, orig,
5042 NULL_RTX, 1, OPTAB_DIRECT);
5043 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5044 NULL_RTX, 1, OPTAB_DIRECT);
5045 /* FALLTHRU */
5046 case SET:
5047 if (ac.aligned && MEM_P (val))
5048 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5049 0, 0, SImode, val);
5050 else
5051 {
5052 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5053 NULL_RTX, 1, OPTAB_DIRECT);
5054 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5055 NULL_RTX, 1, OPTAB_DIRECT);
5056 }
5057 break;
5058 case AND:
5059 case IOR:
5060 case XOR:
5061 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5062 NULL_RTX, 1, OPTAB_DIRECT);
5063 break;
5064 case MULT: /* NAND */
5065 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5066 NULL_RTX, 1, OPTAB_DIRECT);
5067 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5068 NULL_RTX, 1, OPTAB_DIRECT);
5069 break;
5070 default:
5071 gcc_unreachable ();
5072 }
5073
5074 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5075 ac.memsi, cmp, new_rtx));
5076
5077 /* Return the correct part of the bitfield. */
5078 if (target)
5079 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5080 after ? new_rtx : cmp, ac.shift,
5081 NULL_RTX, 1, OPTAB_DIRECT), 1);
5082 }
5083
5084 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5085 We need to emit DTP-relative relocations. */
5086
5087 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5088
5089 static void
5090 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5091 {
5092 switch (size)
5093 {
5094 case 4:
5095 fputs ("\t.long\t", file);
5096 break;
5097 case 8:
5098 fputs ("\t.quad\t", file);
5099 break;
5100 default:
5101 gcc_unreachable ();
5102 }
5103 output_addr_const (file, x);
5104 fputs ("@DTPOFF", file);
5105 }
5106
5107 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5108 /* Implement TARGET_MANGLE_TYPE. */
5109
5110 static const char *
5111 s390_mangle_type (const_tree type)
5112 {
5113 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5114 && TARGET_LONG_DOUBLE_128)
5115 return "g";
5116
5117 /* For all other types, use normal C++ mangling. */
5118 return NULL;
5119 }
5120 #endif
5121
5122 /* In the name of slightly smaller debug output, and to cater to
5123 general assembler lossage, recognize various UNSPEC sequences
5124 and turn them back into a direct symbol reference. */
5125
5126 static rtx
5127 s390_delegitimize_address (rtx orig_x)
5128 {
5129 rtx x, y;
5130
5131 orig_x = delegitimize_mem_from_attrs (orig_x);
5132 x = orig_x;
5133
5134 /* Extract the symbol ref from:
5135 (plus:SI (reg:SI 12 %r12)
5136 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5137 UNSPEC_GOTOFF/PLTOFF)))
5138 and
5139 (plus:SI (reg:SI 12 %r12)
5140 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5141 UNSPEC_GOTOFF/PLTOFF)
5142 (const_int 4 [0x4])))) */
5143 if (GET_CODE (x) == PLUS
5144 && REG_P (XEXP (x, 0))
5145 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5146 && GET_CODE (XEXP (x, 1)) == CONST)
5147 {
5148 HOST_WIDE_INT offset = 0;
5149
5150 /* The const operand. */
5151 y = XEXP (XEXP (x, 1), 0);
5152
5153 if (GET_CODE (y) == PLUS
5154 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5155 {
5156 offset = INTVAL (XEXP (y, 1));
5157 y = XEXP (y, 0);
5158 }
5159
5160 if (GET_CODE (y) == UNSPEC
5161 && (XINT (y, 1) == UNSPEC_GOTOFF
5162 || XINT (y, 1) == UNSPEC_PLTOFF))
5163 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5164 }
5165
5166 if (GET_CODE (x) != MEM)
5167 return orig_x;
5168
5169 x = XEXP (x, 0);
5170 if (GET_CODE (x) == PLUS
5171 && GET_CODE (XEXP (x, 1)) == CONST
5172 && GET_CODE (XEXP (x, 0)) == REG
5173 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5174 {
5175 y = XEXP (XEXP (x, 1), 0);
5176 if (GET_CODE (y) == UNSPEC
5177 && XINT (y, 1) == UNSPEC_GOT)
5178 y = XVECEXP (y, 0, 0);
5179 else
5180 return orig_x;
5181 }
5182 else if (GET_CODE (x) == CONST)
5183 {
5184 /* Extract the symbol ref from:
5185 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5186 UNSPEC_PLT/GOTENT))) */
5187
5188 y = XEXP (x, 0);
5189 if (GET_CODE (y) == UNSPEC
5190 && (XINT (y, 1) == UNSPEC_GOTENT
5191 || XINT (y, 1) == UNSPEC_PLT))
5192 y = XVECEXP (y, 0, 0);
5193 else
5194 return orig_x;
5195 }
5196 else
5197 return orig_x;
5198
5199 if (GET_MODE (orig_x) != Pmode)
5200 {
5201 if (GET_MODE (orig_x) == BLKmode)
5202 return orig_x;
5203 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5204 if (y == NULL_RTX)
5205 return orig_x;
5206 }
5207 return y;
5208 }
5209
5210 /* Output operand OP to stdio stream FILE.
5211 OP is an address (register + offset) which is not used to address data;
5212 instead the rightmost bits are interpreted as the value. */
5213
5214 static void
5215 print_shift_count_operand (FILE *file, rtx op)
5216 {
5217 HOST_WIDE_INT offset;
5218 rtx base;
5219
5220 /* Extract base register and offset. */
5221 if (!s390_decompose_shift_count (op, &base, &offset))
5222 gcc_unreachable ();
5223
5224 /* Sanity check. */
5225 if (base)
5226 {
5227 gcc_assert (GET_CODE (base) == REG);
5228 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5229 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5230 }
5231
5232 /* Offsets are constricted to twelve bits. */
5233 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5234 if (base)
5235 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5236 }
5237
5238 /* See 'get_some_local_dynamic_name'. */
5239
5240 static int
5241 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5242 {
5243 rtx x = *px;
5244
5245 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5246 {
5247 x = get_pool_constant (x);
5248 return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
5249 }
5250
5251 if (GET_CODE (x) == SYMBOL_REF
5252 && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
5253 {
5254 cfun->machine->some_ld_name = XSTR (x, 0);
5255 return 1;
5256 }
5257
5258 return 0;
5259 }
5260
5261 /* Locate some local-dynamic symbol still in use by this function
5262 so that we can print its name in local-dynamic base patterns. */
5263
5264 static const char *
5265 get_some_local_dynamic_name (void)
5266 {
5267 rtx insn;
5268
5269 if (cfun->machine->some_ld_name)
5270 return cfun->machine->some_ld_name;
5271
5272 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5273 if (INSN_P (insn)
5274 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5275 return cfun->machine->some_ld_name;
5276
5277 gcc_unreachable ();
5278 }
5279
5280 /* Output machine-dependent UNSPECs occurring in address constant X
5281 in assembler syntax to stdio stream FILE. Returns true if the
5282 constant X could be recognized, false otherwise. */
5283
5284 static bool
5285 s390_output_addr_const_extra (FILE *file, rtx x)
5286 {
5287 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5288 switch (XINT (x, 1))
5289 {
5290 case UNSPEC_GOTENT:
5291 output_addr_const (file, XVECEXP (x, 0, 0));
5292 fprintf (file, "@GOTENT");
5293 return true;
5294 case UNSPEC_GOT:
5295 output_addr_const (file, XVECEXP (x, 0, 0));
5296 fprintf (file, "@GOT");
5297 return true;
5298 case UNSPEC_GOTOFF:
5299 output_addr_const (file, XVECEXP (x, 0, 0));
5300 fprintf (file, "@GOTOFF");
5301 return true;
5302 case UNSPEC_PLT:
5303 output_addr_const (file, XVECEXP (x, 0, 0));
5304 fprintf (file, "@PLT");
5305 return true;
5306 case UNSPEC_PLTOFF:
5307 output_addr_const (file, XVECEXP (x, 0, 0));
5308 fprintf (file, "@PLTOFF");
5309 return true;
5310 case UNSPEC_TLSGD:
5311 output_addr_const (file, XVECEXP (x, 0, 0));
5312 fprintf (file, "@TLSGD");
5313 return true;
5314 case UNSPEC_TLSLDM:
5315 assemble_name (file, get_some_local_dynamic_name ());
5316 fprintf (file, "@TLSLDM");
5317 return true;
5318 case UNSPEC_DTPOFF:
5319 output_addr_const (file, XVECEXP (x, 0, 0));
5320 fprintf (file, "@DTPOFF");
5321 return true;
5322 case UNSPEC_NTPOFF:
5323 output_addr_const (file, XVECEXP (x, 0, 0));
5324 fprintf (file, "@NTPOFF");
5325 return true;
5326 case UNSPEC_GOTNTPOFF:
5327 output_addr_const (file, XVECEXP (x, 0, 0));
5328 fprintf (file, "@GOTNTPOFF");
5329 return true;
5330 case UNSPEC_INDNTPOFF:
5331 output_addr_const (file, XVECEXP (x, 0, 0));
5332 fprintf (file, "@INDNTPOFF");
5333 return true;
5334 }
5335
5336 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5337 switch (XINT (x, 1))
5338 {
5339 case UNSPEC_POOL_OFFSET:
5340 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5341 output_addr_const (file, x);
5342 return true;
5343 }
5344 return false;
5345 }
5346
5347 /* Output address operand ADDR in assembler syntax to
5348 stdio stream FILE. */
5349
5350 void
5351 print_operand_address (FILE *file, rtx addr)
5352 {
5353 struct s390_address ad;
5354
5355 if (s390_loadrelative_operand_p (addr, NULL, NULL))
5356 {
5357 if (!TARGET_Z10)
5358 {
5359 output_operand_lossage ("symbolic memory references are "
5360 "only supported on z10 or later");
5361 return;
5362 }
5363 output_addr_const (file, addr);
5364 return;
5365 }
5366
5367 if (!s390_decompose_address (addr, &ad)
5368 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5369 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5370 output_operand_lossage ("cannot decompose address");
5371
5372 if (ad.disp)
5373 output_addr_const (file, ad.disp);
5374 else
5375 fprintf (file, "0");
5376
5377 if (ad.base && ad.indx)
5378 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5379 reg_names[REGNO (ad.base)]);
5380 else if (ad.base)
5381 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5382 }
5383
5384 /* Output operand X in assembler syntax to stdio stream FILE.
5385 CODE specified the format flag. The following format flags
5386 are recognized:
5387
5388 'C': print opcode suffix for branch condition.
5389 'D': print opcode suffix for inverse branch condition.
5390 'E': print opcode suffix for branch on index instruction.
5391 'G': print the size of the operand in bytes.
5392 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5393 'M': print the second word of a TImode operand.
5394 'N': print the second word of a DImode operand.
5395 'O': print only the displacement of a memory reference.
5396 'R': print only the base register of a memory reference.
5397 'S': print S-type memory reference (base+displacement).
5398 'Y': print shift count operand.
5399
5400 'b': print integer X as if it's an unsigned byte.
5401 'c': print integer X as if it's an signed byte.
5402 'e': "end" of DImode contiguous bitmask X.
5403 'f': "end" of SImode contiguous bitmask X.
5404 'h': print integer X as if it's a signed halfword.
5405 'i': print the first nonzero HImode part of X.
5406 'j': print the first HImode part unequal to -1 of X.
5407 'k': print the first nonzero SImode part of X.
5408 'm': print the first SImode part unequal to -1 of X.
5409 'o': print integer X as if it's an unsigned 32bit word.
5410 's': "start" of DImode contiguous bitmask X.
5411 't': "start" of SImode contiguous bitmask X.
5412 'x': print integer X as if it's an unsigned halfword.
5413 */
5414
5415 void
5416 print_operand (FILE *file, rtx x, int code)
5417 {
5418 HOST_WIDE_INT ival;
5419
5420 switch (code)
5421 {
5422 case 'C':
5423 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5424 return;
5425
5426 case 'D':
5427 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5428 return;
5429
5430 case 'E':
5431 if (GET_CODE (x) == LE)
5432 fprintf (file, "l");
5433 else if (GET_CODE (x) == GT)
5434 fprintf (file, "h");
5435 else
5436 output_operand_lossage ("invalid comparison operator "
5437 "for 'E' output modifier");
5438 return;
5439
5440 case 'J':
5441 if (GET_CODE (x) == SYMBOL_REF)
5442 {
5443 fprintf (file, "%s", ":tls_load:");
5444 output_addr_const (file, x);
5445 }
5446 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5447 {
5448 fprintf (file, "%s", ":tls_gdcall:");
5449 output_addr_const (file, XVECEXP (x, 0, 0));
5450 }
5451 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5452 {
5453 fprintf (file, "%s", ":tls_ldcall:");
5454 assemble_name (file, get_some_local_dynamic_name ());
5455 }
5456 else
5457 output_operand_lossage ("invalid reference for 'J' output modifier");
5458 return;
5459
5460 case 'G':
5461 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5462 return;
5463
5464 case 'O':
5465 {
5466 struct s390_address ad;
5467 int ret;
5468
5469 if (!MEM_P (x))
5470 {
5471 output_operand_lossage ("memory reference expected for "
5472 "'O' output modifier");
5473 return;
5474 }
5475
5476 ret = s390_decompose_address (XEXP (x, 0), &ad);
5477
5478 if (!ret
5479 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5480 || ad.indx)
5481 {
5482 output_operand_lossage ("invalid address for 'O' output modifier");
5483 return;
5484 }
5485
5486 if (ad.disp)
5487 output_addr_const (file, ad.disp);
5488 else
5489 fprintf (file, "0");
5490 }
5491 return;
5492
5493 case 'R':
5494 {
5495 struct s390_address ad;
5496 int ret;
5497
5498 if (!MEM_P (x))
5499 {
5500 output_operand_lossage ("memory reference expected for "
5501 "'R' output modifier");
5502 return;
5503 }
5504
5505 ret = s390_decompose_address (XEXP (x, 0), &ad);
5506
5507 if (!ret
5508 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5509 || ad.indx)
5510 {
5511 output_operand_lossage ("invalid address for 'R' output modifier");
5512 return;
5513 }
5514
5515 if (ad.base)
5516 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5517 else
5518 fprintf (file, "0");
5519 }
5520 return;
5521
5522 case 'S':
5523 {
5524 struct s390_address ad;
5525 int ret;
5526
5527 if (!MEM_P (x))
5528 {
5529 output_operand_lossage ("memory reference expected for "
5530 "'S' output modifier");
5531 return;
5532 }
5533 ret = s390_decompose_address (XEXP (x, 0), &ad);
5534
5535 if (!ret
5536 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5537 || ad.indx)
5538 {
5539 output_operand_lossage ("invalid address for 'S' output modifier");
5540 return;
5541 }
5542
5543 if (ad.disp)
5544 output_addr_const (file, ad.disp);
5545 else
5546 fprintf (file, "0");
5547
5548 if (ad.base)
5549 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5550 }
5551 return;
5552
5553 case 'N':
5554 if (GET_CODE (x) == REG)
5555 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5556 else if (GET_CODE (x) == MEM)
5557 x = change_address (x, VOIDmode,
5558 plus_constant (Pmode, XEXP (x, 0), 4));
5559 else
5560 output_operand_lossage ("register or memory expression expected "
5561 "for 'N' output modifier");
5562 break;
5563
5564 case 'M':
5565 if (GET_CODE (x) == REG)
5566 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5567 else if (GET_CODE (x) == MEM)
5568 x = change_address (x, VOIDmode,
5569 plus_constant (Pmode, XEXP (x, 0), 8));
5570 else
5571 output_operand_lossage ("register or memory expression expected "
5572 "for 'M' output modifier");
5573 break;
5574
5575 case 'Y':
5576 print_shift_count_operand (file, x);
5577 return;
5578 }
5579
5580 switch (GET_CODE (x))
5581 {
5582 case REG:
5583 fprintf (file, "%s", reg_names[REGNO (x)]);
5584 break;
5585
5586 case MEM:
5587 output_address (XEXP (x, 0));
5588 break;
5589
5590 case CONST:
5591 case CODE_LABEL:
5592 case LABEL_REF:
5593 case SYMBOL_REF:
5594 output_addr_const (file, x);
5595 break;
5596
5597 case CONST_INT:
5598 ival = INTVAL (x);
5599 switch (code)
5600 {
5601 case 0:
5602 break;
5603 case 'b':
5604 ival &= 0xff;
5605 break;
5606 case 'c':
5607 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5608 break;
5609 case 'x':
5610 ival &= 0xffff;
5611 break;
5612 case 'h':
5613 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5614 break;
5615 case 'i':
5616 ival = s390_extract_part (x, HImode, 0);
5617 break;
5618 case 'j':
5619 ival = s390_extract_part (x, HImode, -1);
5620 break;
5621 case 'k':
5622 ival = s390_extract_part (x, SImode, 0);
5623 break;
5624 case 'm':
5625 ival = s390_extract_part (x, SImode, -1);
5626 break;
5627 case 'o':
5628 ival &= 0xffffffff;
5629 break;
5630 case 'e': case 'f':
5631 case 's': case 't':
5632 {
5633 int pos, len;
5634 bool ok;
5635
5636 len = (code == 's' || code == 'e' ? 64 : 32);
5637 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5638 gcc_assert (ok);
5639 if (code == 's' || code == 't')
5640 ival = 64 - pos - len;
5641 else
5642 ival = 64 - 1 - pos;
5643 }
5644 break;
5645 default:
5646 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5647 }
5648 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5649 break;
5650
5651 case CONST_DOUBLE:
5652 gcc_assert (GET_MODE (x) == VOIDmode);
5653 if (code == 'b')
5654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5655 else if (code == 'x')
5656 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5657 else if (code == 'h')
5658 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5659 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5660 else
5661 {
5662 if (code == 0)
5663 output_operand_lossage ("invalid constant - try using "
5664 "an output modifier");
5665 else
5666 output_operand_lossage ("invalid constant for output modifier '%c'",
5667 code);
5668 }
5669 break;
5670
5671 default:
5672 if (code == 0)
5673 output_operand_lossage ("invalid expression - try using "
5674 "an output modifier");
5675 else
5676 output_operand_lossage ("invalid expression for output "
5677 "modifier '%c'", code);
5678 break;
5679 }
5680 }
5681
5682 /* Target hook for assembling integer objects. We need to define it
5683 here to work a round a bug in some versions of GAS, which couldn't
5684 handle values smaller than INT_MIN when printed in decimal. */
5685
5686 static bool
5687 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5688 {
5689 if (size == 8 && aligned_p
5690 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5691 {
5692 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5693 INTVAL (x));
5694 return true;
5695 }
5696 return default_assemble_integer (x, size, aligned_p);
5697 }
5698
5699 /* Returns true if register REGNO is used for forming
5700 a memory address in expression X. */
5701
5702 static bool
5703 reg_used_in_mem_p (int regno, rtx x)
5704 {
5705 enum rtx_code code = GET_CODE (x);
5706 int i, j;
5707 const char *fmt;
5708
5709 if (code == MEM)
5710 {
5711 if (refers_to_regno_p (regno, regno+1,
5712 XEXP (x, 0), 0))
5713 return true;
5714 }
5715 else if (code == SET
5716 && GET_CODE (SET_DEST (x)) == PC)
5717 {
5718 if (refers_to_regno_p (regno, regno+1,
5719 SET_SRC (x), 0))
5720 return true;
5721 }
5722
5723 fmt = GET_RTX_FORMAT (code);
5724 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5725 {
5726 if (fmt[i] == 'e'
5727 && reg_used_in_mem_p (regno, XEXP (x, i)))
5728 return true;
5729
5730 else if (fmt[i] == 'E')
5731 for (j = 0; j < XVECLEN (x, i); j++)
5732 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5733 return true;
5734 }
5735 return false;
5736 }
5737
5738 /* Returns true if expression DEP_RTX sets an address register
5739 used by instruction INSN to address memory. */
5740
5741 static bool
5742 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5743 {
5744 rtx target, pat;
5745
5746 if (NONJUMP_INSN_P (dep_rtx))
5747 dep_rtx = PATTERN (dep_rtx);
5748
5749 if (GET_CODE (dep_rtx) == SET)
5750 {
5751 target = SET_DEST (dep_rtx);
5752 if (GET_CODE (target) == STRICT_LOW_PART)
5753 target = XEXP (target, 0);
5754 while (GET_CODE (target) == SUBREG)
5755 target = SUBREG_REG (target);
5756
5757 if (GET_CODE (target) == REG)
5758 {
5759 int regno = REGNO (target);
5760
5761 if (s390_safe_attr_type (insn) == TYPE_LA)
5762 {
5763 pat = PATTERN (insn);
5764 if (GET_CODE (pat) == PARALLEL)
5765 {
5766 gcc_assert (XVECLEN (pat, 0) == 2);
5767 pat = XVECEXP (pat, 0, 0);
5768 }
5769 gcc_assert (GET_CODE (pat) == SET);
5770 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5771 }
5772 else if (get_attr_atype (insn) == ATYPE_AGEN)
5773 return reg_used_in_mem_p (regno, PATTERN (insn));
5774 }
5775 }
5776 return false;
5777 }
5778
5779 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5780
5781 int
5782 s390_agen_dep_p (rtx dep_insn, rtx insn)
5783 {
5784 rtx dep_rtx = PATTERN (dep_insn);
5785 int i;
5786
5787 if (GET_CODE (dep_rtx) == SET
5788 && addr_generation_dependency_p (dep_rtx, insn))
5789 return 1;
5790 else if (GET_CODE (dep_rtx) == PARALLEL)
5791 {
5792 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5793 {
5794 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5795 return 1;
5796 }
5797 }
5798 return 0;
5799 }
5800
5801
5802 /* A C statement (sans semicolon) to update the integer scheduling priority
5803 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5804 reduce the priority to execute INSN later. Do not define this macro if
5805 you do not need to adjust the scheduling priorities of insns.
5806
5807 A STD instruction should be scheduled earlier,
5808 in order to use the bypass. */
5809 static int
5810 s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
5811 {
5812 if (! INSN_P (insn))
5813 return priority;
5814
5815 if (s390_tune != PROCESSOR_2084_Z990
5816 && s390_tune != PROCESSOR_2094_Z9_109
5817 && s390_tune != PROCESSOR_2097_Z10
5818 && s390_tune != PROCESSOR_2817_Z196
5819 && s390_tune != PROCESSOR_2827_ZEC12)
5820 return priority;
5821
5822 switch (s390_safe_attr_type (insn))
5823 {
5824 case TYPE_FSTOREDF:
5825 case TYPE_FSTORESF:
5826 priority = priority << 3;
5827 break;
5828 case TYPE_STORE:
5829 case TYPE_STM:
5830 priority = priority << 1;
5831 break;
5832 default:
5833 break;
5834 }
5835 return priority;
5836 }
5837
5838
5839 /* The number of instructions that can be issued per cycle. */
5840
5841 static int
5842 s390_issue_rate (void)
5843 {
5844 switch (s390_tune)
5845 {
5846 case PROCESSOR_2084_Z990:
5847 case PROCESSOR_2094_Z9_109:
5848 case PROCESSOR_2817_Z196:
5849 return 3;
5850 case PROCESSOR_2097_Z10:
5851 case PROCESSOR_2827_ZEC12:
5852 return 2;
5853 default:
5854 return 1;
5855 }
5856 }
5857
5858 static int
5859 s390_first_cycle_multipass_dfa_lookahead (void)
5860 {
5861 return 4;
5862 }
5863
5864 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5865 Fix up MEMs as required. */
5866
5867 static void
5868 annotate_constant_pool_refs (rtx *x)
5869 {
5870 int i, j;
5871 const char *fmt;
5872
5873 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5874 || !CONSTANT_POOL_ADDRESS_P (*x));
5875
5876 /* Literal pool references can only occur inside a MEM ... */
5877 if (GET_CODE (*x) == MEM)
5878 {
5879 rtx memref = XEXP (*x, 0);
5880
5881 if (GET_CODE (memref) == SYMBOL_REF
5882 && CONSTANT_POOL_ADDRESS_P (memref))
5883 {
5884 rtx base = cfun->machine->base_reg;
5885 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5886 UNSPEC_LTREF);
5887
5888 *x = replace_equiv_address (*x, addr);
5889 return;
5890 }
5891
5892 if (GET_CODE (memref) == CONST
5893 && GET_CODE (XEXP (memref, 0)) == PLUS
5894 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5895 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5896 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5897 {
5898 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5899 rtx sym = XEXP (XEXP (memref, 0), 0);
5900 rtx base = cfun->machine->base_reg;
5901 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5902 UNSPEC_LTREF);
5903
5904 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5905 return;
5906 }
5907 }
5908
5909 /* ... or a load-address type pattern. */
5910 if (GET_CODE (*x) == SET)
5911 {
5912 rtx addrref = SET_SRC (*x);
5913
5914 if (GET_CODE (addrref) == SYMBOL_REF
5915 && CONSTANT_POOL_ADDRESS_P (addrref))
5916 {
5917 rtx base = cfun->machine->base_reg;
5918 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5919 UNSPEC_LTREF);
5920
5921 SET_SRC (*x) = addr;
5922 return;
5923 }
5924
5925 if (GET_CODE (addrref) == CONST
5926 && GET_CODE (XEXP (addrref, 0)) == PLUS
5927 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5928 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5929 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5930 {
5931 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5932 rtx sym = XEXP (XEXP (addrref, 0), 0);
5933 rtx base = cfun->machine->base_reg;
5934 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5935 UNSPEC_LTREF);
5936
5937 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5938 return;
5939 }
5940 }
5941
5942 /* Annotate LTREL_BASE as well. */
5943 if (GET_CODE (*x) == UNSPEC
5944 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5945 {
5946 rtx base = cfun->machine->base_reg;
5947 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
5948 UNSPEC_LTREL_BASE);
5949 return;
5950 }
5951
5952 fmt = GET_RTX_FORMAT (GET_CODE (*x));
5953 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
5954 {
5955 if (fmt[i] == 'e')
5956 {
5957 annotate_constant_pool_refs (&XEXP (*x, i));
5958 }
5959 else if (fmt[i] == 'E')
5960 {
5961 for (j = 0; j < XVECLEN (*x, i); j++)
5962 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
5963 }
5964 }
5965 }
5966
5967 /* Split all branches that exceed the maximum distance.
5968 Returns true if this created a new literal pool entry. */
5969
5970 static int
5971 s390_split_branches (void)
5972 {
5973 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
5974 int new_literal = 0, ret;
5975 rtx insn, pat, tmp, target;
5976 rtx *label;
5977
5978 /* We need correct insn addresses. */
5979
5980 shorten_branches (get_insns ());
5981
5982 /* Find all branches that exceed 64KB, and split them. */
5983
5984 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5985 {
5986 if (! JUMP_P (insn))
5987 continue;
5988
5989 pat = PATTERN (insn);
5990 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
5991 pat = XVECEXP (pat, 0, 0);
5992 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
5993 continue;
5994
5995 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
5996 {
5997 label = &SET_SRC (pat);
5998 }
5999 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
6000 {
6001 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6002 label = &XEXP (SET_SRC (pat), 1);
6003 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6004 label = &XEXP (SET_SRC (pat), 2);
6005 else
6006 continue;
6007 }
6008 else
6009 continue;
6010
6011 if (get_attr_length (insn) <= 4)
6012 continue;
6013
6014 /* We are going to use the return register as scratch register,
6015 make sure it will be saved/restored by the prologue/epilogue. */
6016 cfun_frame_layout.save_return_addr_p = 1;
6017
6018 if (!flag_pic)
6019 {
6020 new_literal = 1;
6021 tmp = force_const_mem (Pmode, *label);
6022 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
6023 INSN_ADDRESSES_NEW (tmp, -1);
6024 annotate_constant_pool_refs (&PATTERN (tmp));
6025
6026 target = temp_reg;
6027 }
6028 else
6029 {
6030 new_literal = 1;
6031 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6032 UNSPEC_LTREL_OFFSET);
6033 target = gen_rtx_CONST (Pmode, target);
6034 target = force_const_mem (Pmode, target);
6035 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6036 INSN_ADDRESSES_NEW (tmp, -1);
6037 annotate_constant_pool_refs (&PATTERN (tmp));
6038
6039 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6040 cfun->machine->base_reg),
6041 UNSPEC_LTREL_BASE);
6042 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6043 }
6044
6045 ret = validate_change (insn, label, target, 0);
6046 gcc_assert (ret);
6047 }
6048
6049 return new_literal;
6050 }
6051
6052
6053 /* Find an annotated literal pool symbol referenced in RTX X,
6054 and store it at REF. Will abort if X contains references to
6055 more than one such pool symbol; multiple references to the same
6056 symbol are allowed, however.
6057
6058 The rtx pointed to by REF must be initialized to NULL_RTX
6059 by the caller before calling this routine. */
6060
6061 static void
6062 find_constant_pool_ref (rtx x, rtx *ref)
6063 {
6064 int i, j;
6065 const char *fmt;
6066
6067 /* Ignore LTREL_BASE references. */
6068 if (GET_CODE (x) == UNSPEC
6069 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6070 return;
6071 /* Likewise POOL_ENTRY insns. */
6072 if (GET_CODE (x) == UNSPEC_VOLATILE
6073 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6074 return;
6075
6076 gcc_assert (GET_CODE (x) != SYMBOL_REF
6077 || !CONSTANT_POOL_ADDRESS_P (x));
6078
6079 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6080 {
6081 rtx sym = XVECEXP (x, 0, 0);
6082 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6083 && CONSTANT_POOL_ADDRESS_P (sym));
6084
6085 if (*ref == NULL_RTX)
6086 *ref = sym;
6087 else
6088 gcc_assert (*ref == sym);
6089
6090 return;
6091 }
6092
6093 fmt = GET_RTX_FORMAT (GET_CODE (x));
6094 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6095 {
6096 if (fmt[i] == 'e')
6097 {
6098 find_constant_pool_ref (XEXP (x, i), ref);
6099 }
6100 else if (fmt[i] == 'E')
6101 {
6102 for (j = 0; j < XVECLEN (x, i); j++)
6103 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6104 }
6105 }
6106 }
6107
6108 /* Replace every reference to the annotated literal pool
6109 symbol REF in X by its base plus OFFSET. */
6110
6111 static void
6112 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6113 {
6114 int i, j;
6115 const char *fmt;
6116
6117 gcc_assert (*x != ref);
6118
6119 if (GET_CODE (*x) == UNSPEC
6120 && XINT (*x, 1) == UNSPEC_LTREF
6121 && XVECEXP (*x, 0, 0) == ref)
6122 {
6123 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6124 return;
6125 }
6126
6127 if (GET_CODE (*x) == PLUS
6128 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6129 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6130 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6131 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6132 {
6133 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6134 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6135 return;
6136 }
6137
6138 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6139 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6140 {
6141 if (fmt[i] == 'e')
6142 {
6143 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6144 }
6145 else if (fmt[i] == 'E')
6146 {
6147 for (j = 0; j < XVECLEN (*x, i); j++)
6148 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6149 }
6150 }
6151 }
6152
6153 /* Check whether X contains an UNSPEC_LTREL_BASE.
6154 Return its constant pool symbol if found, NULL_RTX otherwise. */
6155
6156 static rtx
6157 find_ltrel_base (rtx x)
6158 {
6159 int i, j;
6160 const char *fmt;
6161
6162 if (GET_CODE (x) == UNSPEC
6163 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6164 return XVECEXP (x, 0, 0);
6165
6166 fmt = GET_RTX_FORMAT (GET_CODE (x));
6167 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6168 {
6169 if (fmt[i] == 'e')
6170 {
6171 rtx fnd = find_ltrel_base (XEXP (x, i));
6172 if (fnd)
6173 return fnd;
6174 }
6175 else if (fmt[i] == 'E')
6176 {
6177 for (j = 0; j < XVECLEN (x, i); j++)
6178 {
6179 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6180 if (fnd)
6181 return fnd;
6182 }
6183 }
6184 }
6185
6186 return NULL_RTX;
6187 }
6188
6189 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6190
6191 static void
6192 replace_ltrel_base (rtx *x)
6193 {
6194 int i, j;
6195 const char *fmt;
6196
6197 if (GET_CODE (*x) == UNSPEC
6198 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6199 {
6200 *x = XVECEXP (*x, 0, 1);
6201 return;
6202 }
6203
6204 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6205 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6206 {
6207 if (fmt[i] == 'e')
6208 {
6209 replace_ltrel_base (&XEXP (*x, i));
6210 }
6211 else if (fmt[i] == 'E')
6212 {
6213 for (j = 0; j < XVECLEN (*x, i); j++)
6214 replace_ltrel_base (&XVECEXP (*x, i, j));
6215 }
6216 }
6217 }
6218
6219
6220 /* We keep a list of constants which we have to add to internal
6221 constant tables in the middle of large functions. */
6222
6223 #define NR_C_MODES 11
6224 enum machine_mode constant_modes[NR_C_MODES] =
6225 {
6226 TFmode, TImode, TDmode,
6227 DFmode, DImode, DDmode,
6228 SFmode, SImode, SDmode,
6229 HImode,
6230 QImode
6231 };
6232
6233 struct constant
6234 {
6235 struct constant *next;
6236 rtx value;
6237 rtx label;
6238 };
6239
6240 struct constant_pool
6241 {
6242 struct constant_pool *next;
6243 rtx first_insn;
6244 rtx pool_insn;
6245 bitmap insns;
6246 rtx emit_pool_after;
6247
6248 struct constant *constants[NR_C_MODES];
6249 struct constant *execute;
6250 rtx label;
6251 int size;
6252 };
6253
6254 /* Allocate new constant_pool structure. */
6255
6256 static struct constant_pool *
6257 s390_alloc_pool (void)
6258 {
6259 struct constant_pool *pool;
6260 int i;
6261
6262 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6263 pool->next = NULL;
6264 for (i = 0; i < NR_C_MODES; i++)
6265 pool->constants[i] = NULL;
6266
6267 pool->execute = NULL;
6268 pool->label = gen_label_rtx ();
6269 pool->first_insn = NULL_RTX;
6270 pool->pool_insn = NULL_RTX;
6271 pool->insns = BITMAP_ALLOC (NULL);
6272 pool->size = 0;
6273 pool->emit_pool_after = NULL_RTX;
6274
6275 return pool;
6276 }
6277
6278 /* Create new constant pool covering instructions starting at INSN
6279 and chain it to the end of POOL_LIST. */
6280
6281 static struct constant_pool *
6282 s390_start_pool (struct constant_pool **pool_list, rtx insn)
6283 {
6284 struct constant_pool *pool, **prev;
6285
6286 pool = s390_alloc_pool ();
6287 pool->first_insn = insn;
6288
6289 for (prev = pool_list; *prev; prev = &(*prev)->next)
6290 ;
6291 *prev = pool;
6292
6293 return pool;
6294 }
6295
6296 /* End range of instructions covered by POOL at INSN and emit
6297 placeholder insn representing the pool. */
6298
6299 static void
6300 s390_end_pool (struct constant_pool *pool, rtx insn)
6301 {
6302 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6303
6304 if (!insn)
6305 insn = get_last_insn ();
6306
6307 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6308 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6309 }
6310
6311 /* Add INSN to the list of insns covered by POOL. */
6312
6313 static void
6314 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6315 {
6316 bitmap_set_bit (pool->insns, INSN_UID (insn));
6317 }
6318
6319 /* Return pool out of POOL_LIST that covers INSN. */
6320
6321 static struct constant_pool *
6322 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6323 {
6324 struct constant_pool *pool;
6325
6326 for (pool = pool_list; pool; pool = pool->next)
6327 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6328 break;
6329
6330 return pool;
6331 }
6332
6333 /* Add constant VAL of mode MODE to the constant pool POOL. */
6334
6335 static void
6336 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6337 {
6338 struct constant *c;
6339 int i;
6340
6341 for (i = 0; i < NR_C_MODES; i++)
6342 if (constant_modes[i] == mode)
6343 break;
6344 gcc_assert (i != NR_C_MODES);
6345
6346 for (c = pool->constants[i]; c != NULL; c = c->next)
6347 if (rtx_equal_p (val, c->value))
6348 break;
6349
6350 if (c == NULL)
6351 {
6352 c = (struct constant *) xmalloc (sizeof *c);
6353 c->value = val;
6354 c->label = gen_label_rtx ();
6355 c->next = pool->constants[i];
6356 pool->constants[i] = c;
6357 pool->size += GET_MODE_SIZE (mode);
6358 }
6359 }
6360
6361 /* Return an rtx that represents the offset of X from the start of
6362 pool POOL. */
6363
6364 static rtx
6365 s390_pool_offset (struct constant_pool *pool, rtx x)
6366 {
6367 rtx label;
6368
6369 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6370 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6371 UNSPEC_POOL_OFFSET);
6372 return gen_rtx_CONST (GET_MODE (x), x);
6373 }
6374
6375 /* Find constant VAL of mode MODE in the constant pool POOL.
6376 Return an RTX describing the distance from the start of
6377 the pool to the location of the new constant. */
6378
6379 static rtx
6380 s390_find_constant (struct constant_pool *pool, rtx val,
6381 enum machine_mode mode)
6382 {
6383 struct constant *c;
6384 int i;
6385
6386 for (i = 0; i < NR_C_MODES; i++)
6387 if (constant_modes[i] == mode)
6388 break;
6389 gcc_assert (i != NR_C_MODES);
6390
6391 for (c = pool->constants[i]; c != NULL; c = c->next)
6392 if (rtx_equal_p (val, c->value))
6393 break;
6394
6395 gcc_assert (c);
6396
6397 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6398 }
6399
6400 /* Check whether INSN is an execute. Return the label_ref to its
6401 execute target template if so, NULL_RTX otherwise. */
6402
6403 static rtx
6404 s390_execute_label (rtx insn)
6405 {
6406 if (NONJUMP_INSN_P (insn)
6407 && GET_CODE (PATTERN (insn)) == PARALLEL
6408 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6409 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6410 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6411
6412 return NULL_RTX;
6413 }
6414
6415 /* Add execute target for INSN to the constant pool POOL. */
6416
6417 static void
6418 s390_add_execute (struct constant_pool *pool, rtx insn)
6419 {
6420 struct constant *c;
6421
6422 for (c = pool->execute; c != NULL; c = c->next)
6423 if (INSN_UID (insn) == INSN_UID (c->value))
6424 break;
6425
6426 if (c == NULL)
6427 {
6428 c = (struct constant *) xmalloc (sizeof *c);
6429 c->value = insn;
6430 c->label = gen_label_rtx ();
6431 c->next = pool->execute;
6432 pool->execute = c;
6433 pool->size += 6;
6434 }
6435 }
6436
6437 /* Find execute target for INSN in the constant pool POOL.
6438 Return an RTX describing the distance from the start of
6439 the pool to the location of the execute target. */
6440
6441 static rtx
6442 s390_find_execute (struct constant_pool *pool, rtx insn)
6443 {
6444 struct constant *c;
6445
6446 for (c = pool->execute; c != NULL; c = c->next)
6447 if (INSN_UID (insn) == INSN_UID (c->value))
6448 break;
6449
6450 gcc_assert (c);
6451
6452 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6453 }
6454
6455 /* For an execute INSN, extract the execute target template. */
6456
6457 static rtx
6458 s390_execute_target (rtx insn)
6459 {
6460 rtx pattern = PATTERN (insn);
6461 gcc_assert (s390_execute_label (insn));
6462
6463 if (XVECLEN (pattern, 0) == 2)
6464 {
6465 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6466 }
6467 else
6468 {
6469 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6470 int i;
6471
6472 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6473 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6474
6475 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6476 }
6477
6478 return pattern;
6479 }
6480
6481 /* Indicate that INSN cannot be duplicated. This is the case for
6482 execute insns that carry a unique label. */
6483
6484 static bool
6485 s390_cannot_copy_insn_p (rtx insn)
6486 {
6487 rtx label = s390_execute_label (insn);
6488 return label && label != const0_rtx;
6489 }
6490
6491 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6492 do not emit the pool base label. */
6493
6494 static void
6495 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6496 {
6497 struct constant *c;
6498 rtx insn = pool->pool_insn;
6499 int i;
6500
6501 /* Switch to rodata section. */
6502 if (TARGET_CPU_ZARCH)
6503 {
6504 insn = emit_insn_after (gen_pool_section_start (), insn);
6505 INSN_ADDRESSES_NEW (insn, -1);
6506 }
6507
6508 /* Ensure minimum pool alignment. */
6509 if (TARGET_CPU_ZARCH)
6510 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6511 else
6512 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6513 INSN_ADDRESSES_NEW (insn, -1);
6514
6515 /* Emit pool base label. */
6516 if (!remote_label)
6517 {
6518 insn = emit_label_after (pool->label, insn);
6519 INSN_ADDRESSES_NEW (insn, -1);
6520 }
6521
6522 /* Dump constants in descending alignment requirement order,
6523 ensuring proper alignment for every constant. */
6524 for (i = 0; i < NR_C_MODES; i++)
6525 for (c = pool->constants[i]; c; c = c->next)
6526 {
6527 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6528 rtx value = copy_rtx (c->value);
6529 if (GET_CODE (value) == CONST
6530 && GET_CODE (XEXP (value, 0)) == UNSPEC
6531 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6532 && XVECLEN (XEXP (value, 0), 0) == 1)
6533 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6534
6535 insn = emit_label_after (c->label, insn);
6536 INSN_ADDRESSES_NEW (insn, -1);
6537
6538 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6539 gen_rtvec (1, value),
6540 UNSPECV_POOL_ENTRY);
6541 insn = emit_insn_after (value, insn);
6542 INSN_ADDRESSES_NEW (insn, -1);
6543 }
6544
6545 /* Ensure minimum alignment for instructions. */
6546 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6547 INSN_ADDRESSES_NEW (insn, -1);
6548
6549 /* Output in-pool execute template insns. */
6550 for (c = pool->execute; c; c = c->next)
6551 {
6552 insn = emit_label_after (c->label, insn);
6553 INSN_ADDRESSES_NEW (insn, -1);
6554
6555 insn = emit_insn_after (s390_execute_target (c->value), insn);
6556 INSN_ADDRESSES_NEW (insn, -1);
6557 }
6558
6559 /* Switch back to previous section. */
6560 if (TARGET_CPU_ZARCH)
6561 {
6562 insn = emit_insn_after (gen_pool_section_end (), insn);
6563 INSN_ADDRESSES_NEW (insn, -1);
6564 }
6565
6566 insn = emit_barrier_after (insn);
6567 INSN_ADDRESSES_NEW (insn, -1);
6568
6569 /* Remove placeholder insn. */
6570 remove_insn (pool->pool_insn);
6571 }
6572
6573 /* Free all memory used by POOL. */
6574
6575 static void
6576 s390_free_pool (struct constant_pool *pool)
6577 {
6578 struct constant *c, *next;
6579 int i;
6580
6581 for (i = 0; i < NR_C_MODES; i++)
6582 for (c = pool->constants[i]; c; c = next)
6583 {
6584 next = c->next;
6585 free (c);
6586 }
6587
6588 for (c = pool->execute; c; c = next)
6589 {
6590 next = c->next;
6591 free (c);
6592 }
6593
6594 BITMAP_FREE (pool->insns);
6595 free (pool);
6596 }
6597
6598
6599 /* Collect main literal pool. Return NULL on overflow. */
6600
6601 static struct constant_pool *
6602 s390_mainpool_start (void)
6603 {
6604 struct constant_pool *pool;
6605 rtx insn;
6606
6607 pool = s390_alloc_pool ();
6608
6609 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6610 {
6611 if (NONJUMP_INSN_P (insn)
6612 && GET_CODE (PATTERN (insn)) == SET
6613 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6614 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6615 {
6616 gcc_assert (!pool->pool_insn);
6617 pool->pool_insn = insn;
6618 }
6619
6620 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6621 {
6622 s390_add_execute (pool, insn);
6623 }
6624 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6625 {
6626 rtx pool_ref = NULL_RTX;
6627 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6628 if (pool_ref)
6629 {
6630 rtx constant = get_pool_constant (pool_ref);
6631 enum machine_mode mode = get_pool_mode (pool_ref);
6632 s390_add_constant (pool, constant, mode);
6633 }
6634 }
6635
6636 /* If hot/cold partitioning is enabled we have to make sure that
6637 the literal pool is emitted in the same section where the
6638 initialization of the literal pool base pointer takes place.
6639 emit_pool_after is only used in the non-overflow case on non
6640 Z cpus where we can emit the literal pool at the end of the
6641 function body within the text section. */
6642 if (NOTE_P (insn)
6643 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6644 && !pool->emit_pool_after)
6645 pool->emit_pool_after = PREV_INSN (insn);
6646 }
6647
6648 gcc_assert (pool->pool_insn || pool->size == 0);
6649
6650 if (pool->size >= 4096)
6651 {
6652 /* We're going to chunkify the pool, so remove the main
6653 pool placeholder insn. */
6654 remove_insn (pool->pool_insn);
6655
6656 s390_free_pool (pool);
6657 pool = NULL;
6658 }
6659
6660 /* If the functions ends with the section where the literal pool
6661 should be emitted set the marker to its end. */
6662 if (pool && !pool->emit_pool_after)
6663 pool->emit_pool_after = get_last_insn ();
6664
6665 return pool;
6666 }
6667
6668 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6669 Modify the current function to output the pool constants as well as
6670 the pool register setup instruction. */
6671
6672 static void
6673 s390_mainpool_finish (struct constant_pool *pool)
6674 {
6675 rtx base_reg = cfun->machine->base_reg;
6676 rtx insn;
6677
6678 /* If the pool is empty, we're done. */
6679 if (pool->size == 0)
6680 {
6681 /* We don't actually need a base register after all. */
6682 cfun->machine->base_reg = NULL_RTX;
6683
6684 if (pool->pool_insn)
6685 remove_insn (pool->pool_insn);
6686 s390_free_pool (pool);
6687 return;
6688 }
6689
6690 /* We need correct insn addresses. */
6691 shorten_branches (get_insns ());
6692
6693 /* On zSeries, we use a LARL to load the pool register. The pool is
6694 located in the .rodata section, so we emit it after the function. */
6695 if (TARGET_CPU_ZARCH)
6696 {
6697 insn = gen_main_base_64 (base_reg, pool->label);
6698 insn = emit_insn_after (insn, pool->pool_insn);
6699 INSN_ADDRESSES_NEW (insn, -1);
6700 remove_insn (pool->pool_insn);
6701
6702 insn = get_last_insn ();
6703 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6704 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6705
6706 s390_dump_pool (pool, 0);
6707 }
6708
6709 /* On S/390, if the total size of the function's code plus literal pool
6710 does not exceed 4096 bytes, we use BASR to set up a function base
6711 pointer, and emit the literal pool at the end of the function. */
6712 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6713 + pool->size + 8 /* alignment slop */ < 4096)
6714 {
6715 insn = gen_main_base_31_small (base_reg, pool->label);
6716 insn = emit_insn_after (insn, pool->pool_insn);
6717 INSN_ADDRESSES_NEW (insn, -1);
6718 remove_insn (pool->pool_insn);
6719
6720 insn = emit_label_after (pool->label, insn);
6721 INSN_ADDRESSES_NEW (insn, -1);
6722
6723 /* emit_pool_after will be set by s390_mainpool_start to the
6724 last insn of the section where the literal pool should be
6725 emitted. */
6726 insn = pool->emit_pool_after;
6727
6728 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6729 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6730
6731 s390_dump_pool (pool, 1);
6732 }
6733
6734 /* Otherwise, we emit an inline literal pool and use BASR to branch
6735 over it, setting up the pool register at the same time. */
6736 else
6737 {
6738 rtx pool_end = gen_label_rtx ();
6739
6740 insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
6741 insn = emit_jump_insn_after (insn, pool->pool_insn);
6742 JUMP_LABEL (insn) = pool_end;
6743 INSN_ADDRESSES_NEW (insn, -1);
6744 remove_insn (pool->pool_insn);
6745
6746 insn = emit_label_after (pool->label, insn);
6747 INSN_ADDRESSES_NEW (insn, -1);
6748
6749 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6750 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6751
6752 insn = emit_label_after (pool_end, pool->pool_insn);
6753 INSN_ADDRESSES_NEW (insn, -1);
6754
6755 s390_dump_pool (pool, 1);
6756 }
6757
6758
6759 /* Replace all literal pool references. */
6760
6761 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6762 {
6763 if (INSN_P (insn))
6764 replace_ltrel_base (&PATTERN (insn));
6765
6766 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6767 {
6768 rtx addr, pool_ref = NULL_RTX;
6769 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6770 if (pool_ref)
6771 {
6772 if (s390_execute_label (insn))
6773 addr = s390_find_execute (pool, insn);
6774 else
6775 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6776 get_pool_mode (pool_ref));
6777
6778 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6779 INSN_CODE (insn) = -1;
6780 }
6781 }
6782 }
6783
6784
6785 /* Free the pool. */
6786 s390_free_pool (pool);
6787 }
6788
6789 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6790 We have decided we cannot use this pool, so revert all changes
6791 to the current function that were done by s390_mainpool_start. */
6792 static void
6793 s390_mainpool_cancel (struct constant_pool *pool)
6794 {
6795 /* We didn't actually change the instruction stream, so simply
6796 free the pool memory. */
6797 s390_free_pool (pool);
6798 }
6799
6800
6801 /* Chunkify the literal pool. */
6802
6803 #define S390_POOL_CHUNK_MIN 0xc00
6804 #define S390_POOL_CHUNK_MAX 0xe00
6805
6806 static struct constant_pool *
6807 s390_chunkify_start (void)
6808 {
6809 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6810 int extra_size = 0;
6811 bitmap far_labels;
6812 rtx pending_ltrel = NULL_RTX;
6813 rtx insn;
6814
6815 rtx (*gen_reload_base) (rtx, rtx) =
6816 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6817
6818
6819 /* We need correct insn addresses. */
6820
6821 shorten_branches (get_insns ());
6822
6823 /* Scan all insns and move literals to pool chunks. */
6824
6825 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6826 {
6827 bool section_switch_p = false;
6828
6829 /* Check for pending LTREL_BASE. */
6830 if (INSN_P (insn))
6831 {
6832 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6833 if (ltrel_base)
6834 {
6835 gcc_assert (ltrel_base == pending_ltrel);
6836 pending_ltrel = NULL_RTX;
6837 }
6838 }
6839
6840 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6841 {
6842 if (!curr_pool)
6843 curr_pool = s390_start_pool (&pool_list, insn);
6844
6845 s390_add_execute (curr_pool, insn);
6846 s390_add_pool_insn (curr_pool, insn);
6847 }
6848 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6849 {
6850 rtx pool_ref = NULL_RTX;
6851 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6852 if (pool_ref)
6853 {
6854 rtx constant = get_pool_constant (pool_ref);
6855 enum machine_mode mode = get_pool_mode (pool_ref);
6856
6857 if (!curr_pool)
6858 curr_pool = s390_start_pool (&pool_list, insn);
6859
6860 s390_add_constant (curr_pool, constant, mode);
6861 s390_add_pool_insn (curr_pool, insn);
6862
6863 /* Don't split the pool chunk between a LTREL_OFFSET load
6864 and the corresponding LTREL_BASE. */
6865 if (GET_CODE (constant) == CONST
6866 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6867 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6868 {
6869 gcc_assert (!pending_ltrel);
6870 pending_ltrel = pool_ref;
6871 }
6872 }
6873 }
6874
6875 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
6876 {
6877 if (curr_pool)
6878 s390_add_pool_insn (curr_pool, insn);
6879 /* An LTREL_BASE must follow within the same basic block. */
6880 gcc_assert (!pending_ltrel);
6881 }
6882
6883 if (NOTE_P (insn))
6884 switch (NOTE_KIND (insn))
6885 {
6886 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6887 section_switch_p = true;
6888 break;
6889 case NOTE_INSN_VAR_LOCATION:
6890 case NOTE_INSN_CALL_ARG_LOCATION:
6891 continue;
6892 default:
6893 break;
6894 }
6895
6896 if (!curr_pool
6897 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6898 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6899 continue;
6900
6901 if (TARGET_CPU_ZARCH)
6902 {
6903 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6904 continue;
6905
6906 s390_end_pool (curr_pool, NULL_RTX);
6907 curr_pool = NULL;
6908 }
6909 else
6910 {
6911 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6912 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6913 + extra_size;
6914
6915 /* We will later have to insert base register reload insns.
6916 Those will have an effect on code size, which we need to
6917 consider here. This calculation makes rather pessimistic
6918 worst-case assumptions. */
6919 if (LABEL_P (insn))
6920 extra_size += 6;
6921
6922 if (chunk_size < S390_POOL_CHUNK_MIN
6923 && curr_pool->size < S390_POOL_CHUNK_MIN
6924 && !section_switch_p)
6925 continue;
6926
6927 /* Pool chunks can only be inserted after BARRIERs ... */
6928 if (BARRIER_P (insn))
6929 {
6930 s390_end_pool (curr_pool, insn);
6931 curr_pool = NULL;
6932 extra_size = 0;
6933 }
6934
6935 /* ... so if we don't find one in time, create one. */
6936 else if (chunk_size > S390_POOL_CHUNK_MAX
6937 || curr_pool->size > S390_POOL_CHUNK_MAX
6938 || section_switch_p)
6939 {
6940 rtx label, jump, barrier, next, prev;
6941
6942 if (!section_switch_p)
6943 {
6944 /* We can insert the barrier only after a 'real' insn. */
6945 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
6946 continue;
6947 if (get_attr_length (insn) == 0)
6948 continue;
6949 /* Don't separate LTREL_BASE from the corresponding
6950 LTREL_OFFSET load. */
6951 if (pending_ltrel)
6952 continue;
6953 next = insn;
6954 do
6955 {
6956 insn = next;
6957 next = NEXT_INSN (insn);
6958 }
6959 while (next
6960 && NOTE_P (next)
6961 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
6962 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
6963 }
6964 else
6965 {
6966 gcc_assert (!pending_ltrel);
6967
6968 /* The old pool has to end before the section switch
6969 note in order to make it part of the current
6970 section. */
6971 insn = PREV_INSN (insn);
6972 }
6973
6974 label = gen_label_rtx ();
6975 prev = insn;
6976 if (prev && NOTE_P (prev))
6977 prev = prev_nonnote_insn (prev);
6978 if (prev)
6979 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
6980 INSN_LOCATION (prev));
6981 else
6982 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
6983 barrier = emit_barrier_after (jump);
6984 insn = emit_label_after (label, barrier);
6985 JUMP_LABEL (jump) = label;
6986 LABEL_NUSES (label) = 1;
6987
6988 INSN_ADDRESSES_NEW (jump, -1);
6989 INSN_ADDRESSES_NEW (barrier, -1);
6990 INSN_ADDRESSES_NEW (insn, -1);
6991
6992 s390_end_pool (curr_pool, barrier);
6993 curr_pool = NULL;
6994 extra_size = 0;
6995 }
6996 }
6997 }
6998
6999 if (curr_pool)
7000 s390_end_pool (curr_pool, NULL_RTX);
7001 gcc_assert (!pending_ltrel);
7002
7003 /* Find all labels that are branched into
7004 from an insn belonging to a different chunk. */
7005
7006 far_labels = BITMAP_ALLOC (NULL);
7007
7008 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7009 {
7010 /* Labels marked with LABEL_PRESERVE_P can be target
7011 of non-local jumps, so we have to mark them.
7012 The same holds for named labels.
7013
7014 Don't do that, however, if it is the label before
7015 a jump table. */
7016
7017 if (LABEL_P (insn)
7018 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7019 {
7020 rtx vec_insn = next_real_insn (insn);
7021 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
7022 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7023 }
7024
7025 /* If we have a direct jump (conditional or unconditional)
7026 or a casesi jump, check all potential targets. */
7027 else if (JUMP_P (insn))
7028 {
7029 rtx pat = PATTERN (insn);
7030 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
7031 pat = XVECEXP (pat, 0, 0);
7032
7033 if (GET_CODE (pat) == SET)
7034 {
7035 rtx label = JUMP_LABEL (insn);
7036 if (label)
7037 {
7038 if (s390_find_pool (pool_list, label)
7039 != s390_find_pool (pool_list, insn))
7040 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7041 }
7042 }
7043 else if (GET_CODE (pat) == PARALLEL
7044 && XVECLEN (pat, 0) == 2
7045 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
7046 && GET_CODE (XVECEXP (pat, 0, 1)) == USE
7047 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
7048 {
7049 /* Find the jump table used by this casesi jump. */
7050 rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
7051 rtx vec_insn = next_real_insn (vec_label);
7052 if (vec_insn && JUMP_TABLE_DATA_P (vec_insn))
7053 {
7054 rtx vec_pat = PATTERN (vec_insn);
7055 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7056
7057 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7058 {
7059 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7060
7061 if (s390_find_pool (pool_list, label)
7062 != s390_find_pool (pool_list, insn))
7063 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7064 }
7065 }
7066 }
7067 }
7068 }
7069
7070 /* Insert base register reload insns before every pool. */
7071
7072 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7073 {
7074 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7075 curr_pool->label);
7076 rtx insn = curr_pool->first_insn;
7077 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7078 }
7079
7080 /* Insert base register reload insns at every far label. */
7081
7082 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7083 if (LABEL_P (insn)
7084 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7085 {
7086 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7087 if (pool)
7088 {
7089 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7090 pool->label);
7091 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7092 }
7093 }
7094
7095
7096 BITMAP_FREE (far_labels);
7097
7098
7099 /* Recompute insn addresses. */
7100
7101 init_insn_lengths ();
7102 shorten_branches (get_insns ());
7103
7104 return pool_list;
7105 }
7106
7107 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7108 After we have decided to use this list, finish implementing
7109 all changes to the current function as required. */
7110
7111 static void
7112 s390_chunkify_finish (struct constant_pool *pool_list)
7113 {
7114 struct constant_pool *curr_pool = NULL;
7115 rtx insn;
7116
7117
7118 /* Replace all literal pool references. */
7119
7120 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7121 {
7122 if (INSN_P (insn))
7123 replace_ltrel_base (&PATTERN (insn));
7124
7125 curr_pool = s390_find_pool (pool_list, insn);
7126 if (!curr_pool)
7127 continue;
7128
7129 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7130 {
7131 rtx addr, pool_ref = NULL_RTX;
7132 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7133 if (pool_ref)
7134 {
7135 if (s390_execute_label (insn))
7136 addr = s390_find_execute (curr_pool, insn);
7137 else
7138 addr = s390_find_constant (curr_pool,
7139 get_pool_constant (pool_ref),
7140 get_pool_mode (pool_ref));
7141
7142 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7143 INSN_CODE (insn) = -1;
7144 }
7145 }
7146 }
7147
7148 /* Dump out all literal pools. */
7149
7150 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7151 s390_dump_pool (curr_pool, 0);
7152
7153 /* Free pool list. */
7154
7155 while (pool_list)
7156 {
7157 struct constant_pool *next = pool_list->next;
7158 s390_free_pool (pool_list);
7159 pool_list = next;
7160 }
7161 }
7162
7163 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7164 We have decided we cannot use this list, so revert all changes
7165 to the current function that were done by s390_chunkify_start. */
7166
7167 static void
7168 s390_chunkify_cancel (struct constant_pool *pool_list)
7169 {
7170 struct constant_pool *curr_pool = NULL;
7171 rtx insn;
7172
7173 /* Remove all pool placeholder insns. */
7174
7175 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7176 {
7177 /* Did we insert an extra barrier? Remove it. */
7178 rtx barrier = PREV_INSN (curr_pool->pool_insn);
7179 rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
7180 rtx label = NEXT_INSN (curr_pool->pool_insn);
7181
7182 if (jump && JUMP_P (jump)
7183 && barrier && BARRIER_P (barrier)
7184 && label && LABEL_P (label)
7185 && GET_CODE (PATTERN (jump)) == SET
7186 && SET_DEST (PATTERN (jump)) == pc_rtx
7187 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7188 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7189 {
7190 remove_insn (jump);
7191 remove_insn (barrier);
7192 remove_insn (label);
7193 }
7194
7195 remove_insn (curr_pool->pool_insn);
7196 }
7197
7198 /* Remove all base register reload insns. */
7199
7200 for (insn = get_insns (); insn; )
7201 {
7202 rtx next_insn = NEXT_INSN (insn);
7203
7204 if (NONJUMP_INSN_P (insn)
7205 && GET_CODE (PATTERN (insn)) == SET
7206 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7207 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7208 remove_insn (insn);
7209
7210 insn = next_insn;
7211 }
7212
7213 /* Free pool list. */
7214
7215 while (pool_list)
7216 {
7217 struct constant_pool *next = pool_list->next;
7218 s390_free_pool (pool_list);
7219 pool_list = next;
7220 }
7221 }
7222
7223 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7224
7225 void
7226 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7227 {
7228 REAL_VALUE_TYPE r;
7229
7230 switch (GET_MODE_CLASS (mode))
7231 {
7232 case MODE_FLOAT:
7233 case MODE_DECIMAL_FLOAT:
7234 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7235
7236 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7237 assemble_real (r, mode, align);
7238 break;
7239
7240 case MODE_INT:
7241 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7242 mark_symbol_refs_as_used (exp);
7243 break;
7244
7245 default:
7246 gcc_unreachable ();
7247 }
7248 }
7249
7250
7251 /* Return an RTL expression representing the value of the return address
7252 for the frame COUNT steps up from the current frame. FRAME is the
7253 frame pointer of that frame. */
7254
7255 rtx
7256 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7257 {
7258 int offset;
7259 rtx addr;
7260
7261 /* Without backchain, we fail for all but the current frame. */
7262
7263 if (!TARGET_BACKCHAIN && count > 0)
7264 return NULL_RTX;
7265
7266 /* For the current frame, we need to make sure the initial
7267 value of RETURN_REGNUM is actually saved. */
7268
7269 if (count == 0)
7270 {
7271 /* On non-z architectures branch splitting could overwrite r14. */
7272 if (TARGET_CPU_ZARCH)
7273 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7274 else
7275 {
7276 cfun_frame_layout.save_return_addr_p = true;
7277 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7278 }
7279 }
7280
7281 if (TARGET_PACKED_STACK)
7282 offset = -2 * UNITS_PER_LONG;
7283 else
7284 offset = RETURN_REGNUM * UNITS_PER_LONG;
7285
7286 addr = plus_constant (Pmode, frame, offset);
7287 addr = memory_address (Pmode, addr);
7288 return gen_rtx_MEM (Pmode, addr);
7289 }
7290
7291 /* Return an RTL expression representing the back chain stored in
7292 the current stack frame. */
7293
7294 rtx
7295 s390_back_chain_rtx (void)
7296 {
7297 rtx chain;
7298
7299 gcc_assert (TARGET_BACKCHAIN);
7300
7301 if (TARGET_PACKED_STACK)
7302 chain = plus_constant (Pmode, stack_pointer_rtx,
7303 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7304 else
7305 chain = stack_pointer_rtx;
7306
7307 chain = gen_rtx_MEM (Pmode, chain);
7308 return chain;
7309 }
7310
7311 /* Find first call clobbered register unused in a function.
7312 This could be used as base register in a leaf function
7313 or for holding the return address before epilogue. */
7314
7315 static int
7316 find_unused_clobbered_reg (void)
7317 {
7318 int i;
7319 for (i = 0; i < 6; i++)
7320 if (!df_regs_ever_live_p (i))
7321 return i;
7322 return 0;
7323 }
7324
7325
7326 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7327 clobbered hard regs in SETREG. */
7328
7329 static void
7330 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7331 {
7332 int *regs_ever_clobbered = (int *)data;
7333 unsigned int i, regno;
7334 enum machine_mode mode = GET_MODE (setreg);
7335
7336 if (GET_CODE (setreg) == SUBREG)
7337 {
7338 rtx inner = SUBREG_REG (setreg);
7339 if (!GENERAL_REG_P (inner))
7340 return;
7341 regno = subreg_regno (setreg);
7342 }
7343 else if (GENERAL_REG_P (setreg))
7344 regno = REGNO (setreg);
7345 else
7346 return;
7347
7348 for (i = regno;
7349 i < regno + HARD_REGNO_NREGS (regno, mode);
7350 i++)
7351 regs_ever_clobbered[i] = 1;
7352 }
7353
7354 /* Walks through all basic blocks of the current function looking
7355 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7356 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7357 each of those regs. */
7358
7359 static void
7360 s390_regs_ever_clobbered (int *regs_ever_clobbered)
7361 {
7362 basic_block cur_bb;
7363 rtx cur_insn;
7364 unsigned int i;
7365
7366 memset (regs_ever_clobbered, 0, 16 * sizeof (int));
7367
7368 /* For non-leaf functions we have to consider all call clobbered regs to be
7369 clobbered. */
7370 if (!crtl->is_leaf)
7371 {
7372 for (i = 0; i < 16; i++)
7373 regs_ever_clobbered[i] = call_really_used_regs[i];
7374 }
7375
7376 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7377 this work is done by liveness analysis (mark_regs_live_at_end).
7378 Special care is needed for functions containing landing pads. Landing pads
7379 may use the eh registers, but the code which sets these registers is not
7380 contained in that function. Hence s390_regs_ever_clobbered is not able to
7381 deal with this automatically. */
7382 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7383 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7384 if (crtl->calls_eh_return
7385 || (cfun->machine->has_landing_pad_p
7386 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7387 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7388
7389 /* For nonlocal gotos all call-saved registers have to be saved.
7390 This flag is also set for the unwinding code in libgcc.
7391 See expand_builtin_unwind_init. For regs_ever_live this is done by
7392 reload. */
7393 if (cfun->has_nonlocal_label)
7394 for (i = 0; i < 16; i++)
7395 if (!call_really_used_regs[i])
7396 regs_ever_clobbered[i] = 1;
7397
7398 FOR_EACH_BB (cur_bb)
7399 {
7400 FOR_BB_INSNS (cur_bb, cur_insn)
7401 {
7402 if (INSN_P (cur_insn))
7403 note_stores (PATTERN (cur_insn),
7404 s390_reg_clobbered_rtx,
7405 regs_ever_clobbered);
7406 }
7407 }
7408 }
7409
7410 /* Determine the frame area which actually has to be accessed
7411 in the function epilogue. The values are stored at the
7412 given pointers AREA_BOTTOM (address of the lowest used stack
7413 address) and AREA_TOP (address of the first item which does
7414 not belong to the stack frame). */
7415
7416 static void
7417 s390_frame_area (int *area_bottom, int *area_top)
7418 {
7419 int b, t;
7420 int i;
7421
7422 b = INT_MAX;
7423 t = INT_MIN;
7424
7425 if (cfun_frame_layout.first_restore_gpr != -1)
7426 {
7427 b = (cfun_frame_layout.gprs_offset
7428 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7429 t = b + (cfun_frame_layout.last_restore_gpr
7430 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7431 }
7432
7433 if (TARGET_64BIT && cfun_save_high_fprs_p)
7434 {
7435 b = MIN (b, cfun_frame_layout.f8_offset);
7436 t = MAX (t, (cfun_frame_layout.f8_offset
7437 + cfun_frame_layout.high_fprs * 8));
7438 }
7439
7440 if (!TARGET_64BIT)
7441 for (i = 2; i < 4; i++)
7442 if (cfun_fpr_bit_p (i))
7443 {
7444 b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
7445 t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
7446 }
7447
7448 *area_bottom = b;
7449 *area_top = t;
7450 }
7451
7452 /* Fill cfun->machine with info about register usage of current function.
7453 Return in CLOBBERED_REGS which GPRs are currently considered set. */
7454
7455 static void
7456 s390_register_info (int clobbered_regs[])
7457 {
7458 int i, j;
7459
7460 /* fprs 8 - 15 are call saved for 64 Bit ABI. */
7461 cfun_frame_layout.fpr_bitmap = 0;
7462 cfun_frame_layout.high_fprs = 0;
7463 if (TARGET_64BIT)
7464 for (i = 24; i < 32; i++)
7465 if (df_regs_ever_live_p (i) && !global_regs[i])
7466 {
7467 cfun_set_fpr_bit (i - 16);
7468 cfun_frame_layout.high_fprs++;
7469 }
7470
7471 /* Find first and last gpr to be saved. We trust regs_ever_live
7472 data, except that we don't save and restore global registers.
7473
7474 Also, all registers with special meaning to the compiler need
7475 to be handled extra. */
7476
7477 s390_regs_ever_clobbered (clobbered_regs);
7478
7479 for (i = 0; i < 16; i++)
7480 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
7481
7482 if (frame_pointer_needed)
7483 clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
7484
7485 if (flag_pic)
7486 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7487 |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7488
7489 clobbered_regs[BASE_REGNUM]
7490 |= (cfun->machine->base_reg
7491 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7492
7493 clobbered_regs[RETURN_REGNUM]
7494 |= (!crtl->is_leaf
7495 || TARGET_TPF_PROFILING
7496 || cfun->machine->split_branches_pending_p
7497 || cfun_frame_layout.save_return_addr_p
7498 || crtl->calls_eh_return
7499 || cfun->stdarg);
7500
7501 clobbered_regs[STACK_POINTER_REGNUM]
7502 |= (!crtl->is_leaf
7503 || TARGET_TPF_PROFILING
7504 || cfun_save_high_fprs_p
7505 || get_frame_size () > 0
7506 || cfun->calls_alloca
7507 || cfun->stdarg);
7508
7509 for (i = 6; i < 16; i++)
7510 if (df_regs_ever_live_p (i) || clobbered_regs[i])
7511 break;
7512 for (j = 15; j > i; j--)
7513 if (df_regs_ever_live_p (j) || clobbered_regs[j])
7514 break;
7515
7516 if (i == 16)
7517 {
7518 /* Nothing to save/restore. */
7519 cfun_frame_layout.first_save_gpr_slot = -1;
7520 cfun_frame_layout.last_save_gpr_slot = -1;
7521 cfun_frame_layout.first_save_gpr = -1;
7522 cfun_frame_layout.first_restore_gpr = -1;
7523 cfun_frame_layout.last_save_gpr = -1;
7524 cfun_frame_layout.last_restore_gpr = -1;
7525 }
7526 else
7527 {
7528 /* Save slots for gprs from i to j. */
7529 cfun_frame_layout.first_save_gpr_slot = i;
7530 cfun_frame_layout.last_save_gpr_slot = j;
7531
7532 for (i = cfun_frame_layout.first_save_gpr_slot;
7533 i < cfun_frame_layout.last_save_gpr_slot + 1;
7534 i++)
7535 if (clobbered_regs[i])
7536 break;
7537
7538 for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
7539 if (clobbered_regs[j])
7540 break;
7541
7542 if (i == cfun_frame_layout.last_save_gpr_slot + 1)
7543 {
7544 /* Nothing to save/restore. */
7545 cfun_frame_layout.first_save_gpr = -1;
7546 cfun_frame_layout.first_restore_gpr = -1;
7547 cfun_frame_layout.last_save_gpr = -1;
7548 cfun_frame_layout.last_restore_gpr = -1;
7549 }
7550 else
7551 {
7552 /* Save / Restore from gpr i to j. */
7553 cfun_frame_layout.first_save_gpr = i;
7554 cfun_frame_layout.first_restore_gpr = i;
7555 cfun_frame_layout.last_save_gpr = j;
7556 cfun_frame_layout.last_restore_gpr = j;
7557 }
7558 }
7559
7560 if (cfun->stdarg)
7561 {
7562 /* Varargs functions need to save gprs 2 to 6. */
7563 if (cfun->va_list_gpr_size
7564 && crtl->args.info.gprs < GP_ARG_NUM_REG)
7565 {
7566 int min_gpr = crtl->args.info.gprs;
7567 int max_gpr = min_gpr + cfun->va_list_gpr_size;
7568 if (max_gpr > GP_ARG_NUM_REG)
7569 max_gpr = GP_ARG_NUM_REG;
7570
7571 if (cfun_frame_layout.first_save_gpr == -1
7572 || cfun_frame_layout.first_save_gpr > 2 + min_gpr)
7573 {
7574 cfun_frame_layout.first_save_gpr = 2 + min_gpr;
7575 cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr;
7576 }
7577
7578 if (cfun_frame_layout.last_save_gpr == -1
7579 || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1)
7580 {
7581 cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1;
7582 cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1;
7583 }
7584 }
7585
7586 /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved. */
7587 if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size
7588 && crtl->args.info.fprs < FP_ARG_NUM_REG)
7589 {
7590 int min_fpr = crtl->args.info.fprs;
7591 int max_fpr = min_fpr + cfun->va_list_fpr_size;
7592 if (max_fpr > FP_ARG_NUM_REG)
7593 max_fpr = FP_ARG_NUM_REG;
7594
7595 /* ??? This is currently required to ensure proper location
7596 of the fpr save slots within the va_list save area. */
7597 if (TARGET_PACKED_STACK)
7598 min_fpr = 0;
7599
7600 for (i = min_fpr; i < max_fpr; i++)
7601 cfun_set_fpr_bit (i);
7602 }
7603 }
7604
7605 if (!TARGET_64BIT)
7606 for (i = 2; i < 4; i++)
7607 if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16])
7608 cfun_set_fpr_bit (i);
7609 }
7610
7611 /* Fill cfun->machine with info about frame of current function. */
7612
7613 static void
7614 s390_frame_info (void)
7615 {
7616 int i;
7617
7618 cfun_frame_layout.frame_size = get_frame_size ();
7619 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7620 fatal_error ("total size of local variables exceeds architecture limit");
7621
7622 if (!TARGET_PACKED_STACK)
7623 {
7624 cfun_frame_layout.backchain_offset = 0;
7625 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7626 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7627 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7628 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7629 * UNITS_PER_LONG);
7630 }
7631 else if (TARGET_BACKCHAIN) /* kernel stack layout */
7632 {
7633 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7634 - UNITS_PER_LONG);
7635 cfun_frame_layout.gprs_offset
7636 = (cfun_frame_layout.backchain_offset
7637 - (STACK_POINTER_REGNUM - cfun_frame_layout.first_save_gpr_slot + 1)
7638 * UNITS_PER_LONG);
7639
7640 if (TARGET_64BIT)
7641 {
7642 cfun_frame_layout.f4_offset
7643 = (cfun_frame_layout.gprs_offset
7644 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7645
7646 cfun_frame_layout.f0_offset
7647 = (cfun_frame_layout.f4_offset
7648 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7649 }
7650 else
7651 {
7652 /* On 31 bit we have to care about alignment of the
7653 floating point regs to provide fastest access. */
7654 cfun_frame_layout.f0_offset
7655 = ((cfun_frame_layout.gprs_offset
7656 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
7657 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7658
7659 cfun_frame_layout.f4_offset
7660 = (cfun_frame_layout.f0_offset
7661 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7662 }
7663 }
7664 else /* no backchain */
7665 {
7666 cfun_frame_layout.f4_offset
7667 = (STACK_POINTER_OFFSET
7668 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7669
7670 cfun_frame_layout.f0_offset
7671 = (cfun_frame_layout.f4_offset
7672 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7673
7674 cfun_frame_layout.gprs_offset
7675 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7676 }
7677
7678 if (crtl->is_leaf
7679 && !TARGET_TPF_PROFILING
7680 && cfun_frame_layout.frame_size == 0
7681 && !cfun_save_high_fprs_p
7682 && !cfun->calls_alloca
7683 && !cfun->stdarg)
7684 return;
7685
7686 if (!TARGET_PACKED_STACK)
7687 cfun_frame_layout.frame_size += (STACK_POINTER_OFFSET
7688 + crtl->outgoing_args_size
7689 + cfun_frame_layout.high_fprs * 8);
7690 else
7691 {
7692 if (TARGET_BACKCHAIN)
7693 cfun_frame_layout.frame_size += UNITS_PER_LONG;
7694
7695 /* No alignment trouble here because f8-f15 are only saved under
7696 64 bit. */
7697 cfun_frame_layout.f8_offset = (MIN (MIN (cfun_frame_layout.f0_offset,
7698 cfun_frame_layout.f4_offset),
7699 cfun_frame_layout.gprs_offset)
7700 - cfun_frame_layout.high_fprs * 8);
7701
7702 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7703
7704 for (i = 0; i < 8; i++)
7705 if (cfun_fpr_bit_p (i))
7706 cfun_frame_layout.frame_size += 8;
7707
7708 cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
7709
7710 /* If under 31 bit an odd number of gprs has to be saved we have to adjust
7711 the frame size to sustain 8 byte alignment of stack frames. */
7712 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7713 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7714 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7715
7716 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7717 }
7718 }
7719
7720 /* Generate frame layout. Fills in register and frame data for the current
7721 function in cfun->machine. This routine can be called multiple times;
7722 it will re-do the complete frame layout every time. */
7723
7724 static void
7725 s390_init_frame_layout (void)
7726 {
7727 HOST_WIDE_INT frame_size;
7728 int base_used;
7729 int clobbered_regs[16];
7730
7731 /* On S/390 machines, we may need to perform branch splitting, which
7732 will require both base and return address register. We have no
7733 choice but to assume we're going to need them until right at the
7734 end of the machine dependent reorg phase. */
7735 if (!TARGET_CPU_ZARCH)
7736 cfun->machine->split_branches_pending_p = true;
7737
7738 do
7739 {
7740 frame_size = cfun_frame_layout.frame_size;
7741
7742 /* Try to predict whether we'll need the base register. */
7743 base_used = cfun->machine->split_branches_pending_p
7744 || crtl->uses_const_pool
7745 || (!DISP_IN_RANGE (frame_size)
7746 && !CONST_OK_FOR_K (frame_size));
7747
7748 /* Decide which register to use as literal pool base. In small
7749 leaf functions, try to use an unused call-clobbered register
7750 as base register to avoid save/restore overhead. */
7751 if (!base_used)
7752 cfun->machine->base_reg = NULL_RTX;
7753 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7754 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7755 else
7756 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7757
7758 s390_register_info (clobbered_regs);
7759 s390_frame_info ();
7760 }
7761 while (frame_size != cfun_frame_layout.frame_size);
7762 }
7763
7764 /* Update frame layout. Recompute actual register save data based on
7765 current info and update regs_ever_live for the special registers.
7766 May be called multiple times, but may never cause *more* registers
7767 to be saved than s390_init_frame_layout allocated room for. */
7768
7769 static void
7770 s390_update_frame_layout (void)
7771 {
7772 int clobbered_regs[16];
7773
7774 s390_register_info (clobbered_regs);
7775
7776 df_set_regs_ever_live (BASE_REGNUM,
7777 clobbered_regs[BASE_REGNUM] ? true : false);
7778 df_set_regs_ever_live (RETURN_REGNUM,
7779 clobbered_regs[RETURN_REGNUM] ? true : false);
7780 df_set_regs_ever_live (STACK_POINTER_REGNUM,
7781 clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
7782
7783 if (cfun->machine->base_reg)
7784 df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true);
7785 }
7786
7787 /* Return true if it is legal to put a value with MODE into REGNO. */
7788
7789 bool
7790 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
7791 {
7792 switch (REGNO_REG_CLASS (regno))
7793 {
7794 case FP_REGS:
7795 if (REGNO_PAIR_OK (regno, mode))
7796 {
7797 if (mode == SImode || mode == DImode)
7798 return true;
7799
7800 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
7801 return true;
7802 }
7803 break;
7804 case ADDR_REGS:
7805 if (FRAME_REGNO_P (regno) && mode == Pmode)
7806 return true;
7807
7808 /* fallthrough */
7809 case GENERAL_REGS:
7810 if (REGNO_PAIR_OK (regno, mode))
7811 {
7812 if (TARGET_ZARCH
7813 || (mode != TFmode && mode != TCmode && mode != TDmode))
7814 return true;
7815 }
7816 break;
7817 case CC_REGS:
7818 if (GET_MODE_CLASS (mode) == MODE_CC)
7819 return true;
7820 break;
7821 case ACCESS_REGS:
7822 if (REGNO_PAIR_OK (regno, mode))
7823 {
7824 if (mode == SImode || mode == Pmode)
7825 return true;
7826 }
7827 break;
7828 default:
7829 return false;
7830 }
7831
7832 return false;
7833 }
7834
7835 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
7836
7837 bool
7838 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
7839 {
7840 /* Once we've decided upon a register to use as base register, it must
7841 no longer be used for any other purpose. */
7842 if (cfun->machine->base_reg)
7843 if (REGNO (cfun->machine->base_reg) == old_reg
7844 || REGNO (cfun->machine->base_reg) == new_reg)
7845 return false;
7846
7847 return true;
7848 }
7849
7850 /* Maximum number of registers to represent a value of mode MODE
7851 in a register of class RCLASS. */
7852
7853 int
7854 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
7855 {
7856 switch (rclass)
7857 {
7858 case FP_REGS:
7859 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7860 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
7861 else
7862 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
7863 case ACCESS_REGS:
7864 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
7865 default:
7866 break;
7867 }
7868 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7869 }
7870
7871 /* Return true if register FROM can be eliminated via register TO. */
7872
7873 static bool
7874 s390_can_eliminate (const int from, const int to)
7875 {
7876 /* On zSeries machines, we have not marked the base register as fixed.
7877 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
7878 If a function requires the base register, we say here that this
7879 elimination cannot be performed. This will cause reload to free
7880 up the base register (as if it were fixed). On the other hand,
7881 if the current function does *not* require the base register, we
7882 say here the elimination succeeds, which in turn allows reload
7883 to allocate the base register for any other purpose. */
7884 if (from == BASE_REGNUM && to == BASE_REGNUM)
7885 {
7886 if (TARGET_CPU_ZARCH)
7887 {
7888 s390_init_frame_layout ();
7889 return cfun->machine->base_reg == NULL_RTX;
7890 }
7891
7892 return false;
7893 }
7894
7895 /* Everything else must point into the stack frame. */
7896 gcc_assert (to == STACK_POINTER_REGNUM
7897 || to == HARD_FRAME_POINTER_REGNUM);
7898
7899 gcc_assert (from == FRAME_POINTER_REGNUM
7900 || from == ARG_POINTER_REGNUM
7901 || from == RETURN_ADDRESS_POINTER_REGNUM);
7902
7903 /* Make sure we actually saved the return address. */
7904 if (from == RETURN_ADDRESS_POINTER_REGNUM)
7905 if (!crtl->calls_eh_return
7906 && !cfun->stdarg
7907 && !cfun_frame_layout.save_return_addr_p)
7908 return false;
7909
7910 return true;
7911 }
7912
7913 /* Return offset between register FROM and TO initially after prolog. */
7914
7915 HOST_WIDE_INT
7916 s390_initial_elimination_offset (int from, int to)
7917 {
7918 HOST_WIDE_INT offset;
7919 int index;
7920
7921 /* ??? Why are we called for non-eliminable pairs? */
7922 if (!s390_can_eliminate (from, to))
7923 return 0;
7924
7925 switch (from)
7926 {
7927 case FRAME_POINTER_REGNUM:
7928 offset = (get_frame_size()
7929 + STACK_POINTER_OFFSET
7930 + crtl->outgoing_args_size);
7931 break;
7932
7933 case ARG_POINTER_REGNUM:
7934 s390_init_frame_layout ();
7935 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
7936 break;
7937
7938 case RETURN_ADDRESS_POINTER_REGNUM:
7939 s390_init_frame_layout ();
7940 index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot;
7941 gcc_assert (index >= 0);
7942 offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset;
7943 offset += index * UNITS_PER_LONG;
7944 break;
7945
7946 case BASE_REGNUM:
7947 offset = 0;
7948 break;
7949
7950 default:
7951 gcc_unreachable ();
7952 }
7953
7954 return offset;
7955 }
7956
7957 /* Emit insn to save fpr REGNUM at offset OFFSET relative
7958 to register BASE. Return generated insn. */
7959
7960 static rtx
7961 save_fpr (rtx base, int offset, int regnum)
7962 {
7963 rtx addr;
7964 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7965
7966 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
7967 set_mem_alias_set (addr, get_varargs_alias_set ());
7968 else
7969 set_mem_alias_set (addr, get_frame_alias_set ());
7970
7971 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
7972 }
7973
7974 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
7975 to register BASE. Return generated insn. */
7976
7977 static rtx
7978 restore_fpr (rtx base, int offset, int regnum)
7979 {
7980 rtx addr;
7981 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7982 set_mem_alias_set (addr, get_frame_alias_set ());
7983
7984 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
7985 }
7986
7987 /* Return true if REGNO is a global register, but not one
7988 of the special ones that need to be saved/restored in anyway. */
7989
7990 static inline bool
7991 global_not_special_regno_p (int regno)
7992 {
7993 return (global_regs[regno]
7994 /* These registers are special and need to be
7995 restored in any case. */
7996 && !(regno == STACK_POINTER_REGNUM
7997 || regno == RETURN_REGNUM
7998 || regno == BASE_REGNUM
7999 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
8000 }
8001
8002 /* Generate insn to save registers FIRST to LAST into
8003 the register save area located at offset OFFSET
8004 relative to register BASE. */
8005
8006 static rtx
8007 save_gprs (rtx base, int offset, int first, int last)
8008 {
8009 rtx addr, insn, note;
8010 int i;
8011
8012 addr = plus_constant (Pmode, base, offset);
8013 addr = gen_rtx_MEM (Pmode, addr);
8014
8015 set_mem_alias_set (addr, get_frame_alias_set ());
8016
8017 /* Special-case single register. */
8018 if (first == last)
8019 {
8020 if (TARGET_64BIT)
8021 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8022 else
8023 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8024
8025 if (!global_not_special_regno_p (first))
8026 RTX_FRAME_RELATED_P (insn) = 1;
8027 return insn;
8028 }
8029
8030
8031 insn = gen_store_multiple (addr,
8032 gen_rtx_REG (Pmode, first),
8033 GEN_INT (last - first + 1));
8034
8035 if (first <= 6 && cfun->stdarg)
8036 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8037 {
8038 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8039
8040 if (first + i <= 6)
8041 set_mem_alias_set (mem, get_varargs_alias_set ());
8042 }
8043
8044 /* We need to set the FRAME_RELATED flag on all SETs
8045 inside the store-multiple pattern.
8046
8047 However, we must not emit DWARF records for registers 2..5
8048 if they are stored for use by variable arguments ...
8049
8050 ??? Unfortunately, it is not enough to simply not the
8051 FRAME_RELATED flags for those SETs, because the first SET
8052 of the PARALLEL is always treated as if it had the flag
8053 set, even if it does not. Therefore we emit a new pattern
8054 without those registers as REG_FRAME_RELATED_EXPR note. */
8055
8056 if (first >= 6 && !global_not_special_regno_p (first))
8057 {
8058 rtx pat = PATTERN (insn);
8059
8060 for (i = 0; i < XVECLEN (pat, 0); i++)
8061 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8062 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8063 0, i)))))
8064 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8065
8066 RTX_FRAME_RELATED_P (insn) = 1;
8067 }
8068 else if (last >= 6)
8069 {
8070 int start;
8071
8072 for (start = first >= 6 ? first : 6; start <= last; start++)
8073 if (!global_not_special_regno_p (start))
8074 break;
8075
8076 if (start > last)
8077 return insn;
8078
8079 addr = plus_constant (Pmode, base,
8080 offset + (start - first) * UNITS_PER_LONG);
8081 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8082 gen_rtx_REG (Pmode, start),
8083 GEN_INT (last - start + 1));
8084 note = PATTERN (note);
8085
8086 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8087
8088 for (i = 0; i < XVECLEN (note, 0); i++)
8089 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8090 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8091 0, i)))))
8092 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8093
8094 RTX_FRAME_RELATED_P (insn) = 1;
8095 }
8096
8097 return insn;
8098 }
8099
8100 /* Generate insn to restore registers FIRST to LAST from
8101 the register save area located at offset OFFSET
8102 relative to register BASE. */
8103
8104 static rtx
8105 restore_gprs (rtx base, int offset, int first, int last)
8106 {
8107 rtx addr, insn;
8108
8109 addr = plus_constant (Pmode, base, offset);
8110 addr = gen_rtx_MEM (Pmode, addr);
8111 set_mem_alias_set (addr, get_frame_alias_set ());
8112
8113 /* Special-case single register. */
8114 if (first == last)
8115 {
8116 if (TARGET_64BIT)
8117 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8118 else
8119 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8120
8121 return insn;
8122 }
8123
8124 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8125 addr,
8126 GEN_INT (last - first + 1));
8127 return insn;
8128 }
8129
8130 /* Return insn sequence to load the GOT register. */
8131
8132 static GTY(()) rtx got_symbol;
8133 rtx
8134 s390_load_got (void)
8135 {
8136 rtx insns;
8137
8138 /* We cannot use pic_offset_table_rtx here since we use this
8139 function also for non-pic if __tls_get_offset is called and in
8140 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8141 aren't usable. */
8142 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8143
8144 if (!got_symbol)
8145 {
8146 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8147 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8148 }
8149
8150 start_sequence ();
8151
8152 if (TARGET_CPU_ZARCH)
8153 {
8154 emit_move_insn (got_rtx, got_symbol);
8155 }
8156 else
8157 {
8158 rtx offset;
8159
8160 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8161 UNSPEC_LTREL_OFFSET);
8162 offset = gen_rtx_CONST (Pmode, offset);
8163 offset = force_const_mem (Pmode, offset);
8164
8165 emit_move_insn (got_rtx, offset);
8166
8167 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8168 UNSPEC_LTREL_BASE);
8169 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8170
8171 emit_move_insn (got_rtx, offset);
8172 }
8173
8174 insns = get_insns ();
8175 end_sequence ();
8176 return insns;
8177 }
8178
8179 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8180 and the change to the stack pointer. */
8181
8182 static void
8183 s390_emit_stack_tie (void)
8184 {
8185 rtx mem = gen_frame_mem (BLKmode,
8186 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8187
8188 emit_insn (gen_stack_tie (mem));
8189 }
8190
8191 /* Expand the prologue into a bunch of separate insns. */
8192
8193 void
8194 s390_emit_prologue (void)
8195 {
8196 rtx insn, addr;
8197 rtx temp_reg;
8198 int i;
8199 int offset;
8200 int next_fpr = 0;
8201
8202 /* Complete frame layout. */
8203
8204 s390_update_frame_layout ();
8205
8206 /* Annotate all constant pool references to let the scheduler know
8207 they implicitly use the base register. */
8208
8209 push_topmost_sequence ();
8210
8211 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8212 if (INSN_P (insn))
8213 {
8214 annotate_constant_pool_refs (&PATTERN (insn));
8215 df_insn_rescan (insn);
8216 }
8217
8218 pop_topmost_sequence ();
8219
8220 /* Choose best register to use for temp use within prologue.
8221 See below for why TPF must use the register 1. */
8222
8223 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8224 && !crtl->is_leaf
8225 && !TARGET_TPF_PROFILING)
8226 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8227 else
8228 temp_reg = gen_rtx_REG (Pmode, 1);
8229
8230 /* Save call saved gprs. */
8231 if (cfun_frame_layout.first_save_gpr != -1)
8232 {
8233 insn = save_gprs (stack_pointer_rtx,
8234 cfun_frame_layout.gprs_offset +
8235 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8236 - cfun_frame_layout.first_save_gpr_slot),
8237 cfun_frame_layout.first_save_gpr,
8238 cfun_frame_layout.last_save_gpr);
8239 emit_insn (insn);
8240 }
8241
8242 /* Dummy insn to mark literal pool slot. */
8243
8244 if (cfun->machine->base_reg)
8245 emit_insn (gen_main_pool (cfun->machine->base_reg));
8246
8247 offset = cfun_frame_layout.f0_offset;
8248
8249 /* Save f0 and f2. */
8250 for (i = 0; i < 2; i++)
8251 {
8252 if (cfun_fpr_bit_p (i))
8253 {
8254 save_fpr (stack_pointer_rtx, offset, i + 16);
8255 offset += 8;
8256 }
8257 else if (!TARGET_PACKED_STACK)
8258 offset += 8;
8259 }
8260
8261 /* Save f4 and f6. */
8262 offset = cfun_frame_layout.f4_offset;
8263 for (i = 2; i < 4; i++)
8264 {
8265 if (cfun_fpr_bit_p (i))
8266 {
8267 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8268 offset += 8;
8269
8270 /* If f4 and f6 are call clobbered they are saved due to stdargs and
8271 therefore are not frame related. */
8272 if (!call_really_used_regs[i + 16])
8273 RTX_FRAME_RELATED_P (insn) = 1;
8274 }
8275 else if (!TARGET_PACKED_STACK)
8276 offset += 8;
8277 }
8278
8279 if (TARGET_PACKED_STACK
8280 && cfun_save_high_fprs_p
8281 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8282 {
8283 offset = (cfun_frame_layout.f8_offset
8284 + (cfun_frame_layout.high_fprs - 1) * 8);
8285
8286 for (i = 15; i > 7 && offset >= 0; i--)
8287 if (cfun_fpr_bit_p (i))
8288 {
8289 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8290
8291 RTX_FRAME_RELATED_P (insn) = 1;
8292 offset -= 8;
8293 }
8294 if (offset >= cfun_frame_layout.f8_offset)
8295 next_fpr = i + 16;
8296 }
8297
8298 if (!TARGET_PACKED_STACK)
8299 next_fpr = cfun_save_high_fprs_p ? 31 : 0;
8300
8301 if (flag_stack_usage_info)
8302 current_function_static_stack_size = cfun_frame_layout.frame_size;
8303
8304 /* Decrement stack pointer. */
8305
8306 if (cfun_frame_layout.frame_size > 0)
8307 {
8308 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8309 rtx real_frame_off;
8310
8311 if (s390_stack_size)
8312 {
8313 HOST_WIDE_INT stack_guard;
8314
8315 if (s390_stack_guard)
8316 stack_guard = s390_stack_guard;
8317 else
8318 {
8319 /* If no value for stack guard is provided the smallest power of 2
8320 larger than the current frame size is chosen. */
8321 stack_guard = 1;
8322 while (stack_guard < cfun_frame_layout.frame_size)
8323 stack_guard <<= 1;
8324 }
8325
8326 if (cfun_frame_layout.frame_size >= s390_stack_size)
8327 {
8328 warning (0, "frame size of function %qs is %wd"
8329 " bytes exceeding user provided stack limit of "
8330 "%d bytes. "
8331 "An unconditional trap is added.",
8332 current_function_name(), cfun_frame_layout.frame_size,
8333 s390_stack_size);
8334 emit_insn (gen_trap ());
8335 }
8336 else
8337 {
8338 /* stack_guard has to be smaller than s390_stack_size.
8339 Otherwise we would emit an AND with zero which would
8340 not match the test under mask pattern. */
8341 if (stack_guard >= s390_stack_size)
8342 {
8343 warning (0, "frame size of function %qs is %wd"
8344 " bytes which is more than half the stack size. "
8345 "The dynamic check would not be reliable. "
8346 "No check emitted for this function.",
8347 current_function_name(),
8348 cfun_frame_layout.frame_size);
8349 }
8350 else
8351 {
8352 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8353 & ~(stack_guard - 1));
8354
8355 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8356 GEN_INT (stack_check_mask));
8357 if (TARGET_64BIT)
8358 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8359 t, const0_rtx),
8360 t, const0_rtx, const0_rtx));
8361 else
8362 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8363 t, const0_rtx),
8364 t, const0_rtx, const0_rtx));
8365 }
8366 }
8367 }
8368
8369 if (s390_warn_framesize > 0
8370 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8371 warning (0, "frame size of %qs is %wd bytes",
8372 current_function_name (), cfun_frame_layout.frame_size);
8373
8374 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8375 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8376
8377 /* Save incoming stack pointer into temp reg. */
8378 if (TARGET_BACKCHAIN || next_fpr)
8379 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8380
8381 /* Subtract frame size from stack pointer. */
8382
8383 if (DISP_IN_RANGE (INTVAL (frame_off)))
8384 {
8385 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8386 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8387 frame_off));
8388 insn = emit_insn (insn);
8389 }
8390 else
8391 {
8392 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8393 frame_off = force_const_mem (Pmode, frame_off);
8394
8395 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8396 annotate_constant_pool_refs (&PATTERN (insn));
8397 }
8398
8399 RTX_FRAME_RELATED_P (insn) = 1;
8400 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8401 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8402 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8403 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8404 real_frame_off)));
8405
8406 /* Set backchain. */
8407
8408 if (TARGET_BACKCHAIN)
8409 {
8410 if (cfun_frame_layout.backchain_offset)
8411 addr = gen_rtx_MEM (Pmode,
8412 plus_constant (Pmode, stack_pointer_rtx,
8413 cfun_frame_layout.backchain_offset));
8414 else
8415 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8416 set_mem_alias_set (addr, get_frame_alias_set ());
8417 insn = emit_insn (gen_move_insn (addr, temp_reg));
8418 }
8419
8420 /* If we support non-call exceptions (e.g. for Java),
8421 we need to make sure the backchain pointer is set up
8422 before any possibly trapping memory access. */
8423 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8424 {
8425 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8426 emit_clobber (addr);
8427 }
8428 }
8429
8430 /* Save fprs 8 - 15 (64 bit ABI). */
8431
8432 if (cfun_save_high_fprs_p && next_fpr)
8433 {
8434 /* If the stack might be accessed through a different register
8435 we have to make sure that the stack pointer decrement is not
8436 moved below the use of the stack slots. */
8437 s390_emit_stack_tie ();
8438
8439 insn = emit_insn (gen_add2_insn (temp_reg,
8440 GEN_INT (cfun_frame_layout.f8_offset)));
8441
8442 offset = 0;
8443
8444 for (i = 24; i <= next_fpr; i++)
8445 if (cfun_fpr_bit_p (i - 16))
8446 {
8447 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8448 cfun_frame_layout.frame_size
8449 + cfun_frame_layout.f8_offset
8450 + offset);
8451
8452 insn = save_fpr (temp_reg, offset, i);
8453 offset += 8;
8454 RTX_FRAME_RELATED_P (insn) = 1;
8455 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8456 gen_rtx_SET (VOIDmode,
8457 gen_rtx_MEM (DFmode, addr),
8458 gen_rtx_REG (DFmode, i)));
8459 }
8460 }
8461
8462 /* Set frame pointer, if needed. */
8463
8464 if (frame_pointer_needed)
8465 {
8466 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8467 RTX_FRAME_RELATED_P (insn) = 1;
8468 }
8469
8470 /* Set up got pointer, if needed. */
8471
8472 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8473 {
8474 rtx insns = s390_load_got ();
8475
8476 for (insn = insns; insn; insn = NEXT_INSN (insn))
8477 annotate_constant_pool_refs (&PATTERN (insn));
8478
8479 emit_insn (insns);
8480 }
8481
8482 if (TARGET_TPF_PROFILING)
8483 {
8484 /* Generate a BAS instruction to serve as a function
8485 entry intercept to facilitate the use of tracing
8486 algorithms located at the branch target. */
8487 emit_insn (gen_prologue_tpf ());
8488
8489 /* Emit a blockage here so that all code
8490 lies between the profiling mechanisms. */
8491 emit_insn (gen_blockage ());
8492 }
8493 }
8494
8495 /* Expand the epilogue into a bunch of separate insns. */
8496
8497 void
8498 s390_emit_epilogue (bool sibcall)
8499 {
8500 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8501 int area_bottom, area_top, offset = 0;
8502 int next_offset;
8503 rtvec p;
8504 int i;
8505
8506 if (TARGET_TPF_PROFILING)
8507 {
8508
8509 /* Generate a BAS instruction to serve as a function
8510 entry intercept to facilitate the use of tracing
8511 algorithms located at the branch target. */
8512
8513 /* Emit a blockage here so that all code
8514 lies between the profiling mechanisms. */
8515 emit_insn (gen_blockage ());
8516
8517 emit_insn (gen_epilogue_tpf ());
8518 }
8519
8520 /* Check whether to use frame or stack pointer for restore. */
8521
8522 frame_pointer = (frame_pointer_needed
8523 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8524
8525 s390_frame_area (&area_bottom, &area_top);
8526
8527 /* Check whether we can access the register save area.
8528 If not, increment the frame pointer as required. */
8529
8530 if (area_top <= area_bottom)
8531 {
8532 /* Nothing to restore. */
8533 }
8534 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8535 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8536 {
8537 /* Area is in range. */
8538 offset = cfun_frame_layout.frame_size;
8539 }
8540 else
8541 {
8542 rtx insn, frame_off, cfa;
8543
8544 offset = area_bottom < 0 ? -area_bottom : 0;
8545 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8546
8547 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8548 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8549 if (DISP_IN_RANGE (INTVAL (frame_off)))
8550 {
8551 insn = gen_rtx_SET (VOIDmode, frame_pointer,
8552 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8553 insn = emit_insn (insn);
8554 }
8555 else
8556 {
8557 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8558 frame_off = force_const_mem (Pmode, frame_off);
8559
8560 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
8561 annotate_constant_pool_refs (&PATTERN (insn));
8562 }
8563 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
8564 RTX_FRAME_RELATED_P (insn) = 1;
8565 }
8566
8567 /* Restore call saved fprs. */
8568
8569 if (TARGET_64BIT)
8570 {
8571 if (cfun_save_high_fprs_p)
8572 {
8573 next_offset = cfun_frame_layout.f8_offset;
8574 for (i = 24; i < 32; i++)
8575 {
8576 if (cfun_fpr_bit_p (i - 16))
8577 {
8578 restore_fpr (frame_pointer,
8579 offset + next_offset, i);
8580 cfa_restores
8581 = alloc_reg_note (REG_CFA_RESTORE,
8582 gen_rtx_REG (DFmode, i), cfa_restores);
8583 next_offset += 8;
8584 }
8585 }
8586 }
8587
8588 }
8589 else
8590 {
8591 next_offset = cfun_frame_layout.f4_offset;
8592 for (i = 18; i < 20; i++)
8593 {
8594 if (cfun_fpr_bit_p (i - 16))
8595 {
8596 restore_fpr (frame_pointer,
8597 offset + next_offset, i);
8598 cfa_restores
8599 = alloc_reg_note (REG_CFA_RESTORE,
8600 gen_rtx_REG (DFmode, i), cfa_restores);
8601 next_offset += 8;
8602 }
8603 else if (!TARGET_PACKED_STACK)
8604 next_offset += 8;
8605 }
8606
8607 }
8608
8609 /* Return register. */
8610
8611 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8612
8613 /* Restore call saved gprs. */
8614
8615 if (cfun_frame_layout.first_restore_gpr != -1)
8616 {
8617 rtx insn, addr;
8618 int i;
8619
8620 /* Check for global register and save them
8621 to stack location from where they get restored. */
8622
8623 for (i = cfun_frame_layout.first_restore_gpr;
8624 i <= cfun_frame_layout.last_restore_gpr;
8625 i++)
8626 {
8627 if (global_not_special_regno_p (i))
8628 {
8629 addr = plus_constant (Pmode, frame_pointer,
8630 offset + cfun_frame_layout.gprs_offset
8631 + (i - cfun_frame_layout.first_save_gpr_slot)
8632 * UNITS_PER_LONG);
8633 addr = gen_rtx_MEM (Pmode, addr);
8634 set_mem_alias_set (addr, get_frame_alias_set ());
8635 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
8636 }
8637 else
8638 cfa_restores
8639 = alloc_reg_note (REG_CFA_RESTORE,
8640 gen_rtx_REG (Pmode, i), cfa_restores);
8641 }
8642
8643 if (! sibcall)
8644 {
8645 /* Fetch return address from stack before load multiple,
8646 this will do good for scheduling. */
8647
8648 if (cfun_frame_layout.save_return_addr_p
8649 || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
8650 && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
8651 {
8652 int return_regnum = find_unused_clobbered_reg();
8653 if (!return_regnum)
8654 return_regnum = 4;
8655 return_reg = gen_rtx_REG (Pmode, return_regnum);
8656
8657 addr = plus_constant (Pmode, frame_pointer,
8658 offset + cfun_frame_layout.gprs_offset
8659 + (RETURN_REGNUM
8660 - cfun_frame_layout.first_save_gpr_slot)
8661 * UNITS_PER_LONG);
8662 addr = gen_rtx_MEM (Pmode, addr);
8663 set_mem_alias_set (addr, get_frame_alias_set ());
8664 emit_move_insn (return_reg, addr);
8665 }
8666 }
8667
8668 insn = restore_gprs (frame_pointer,
8669 offset + cfun_frame_layout.gprs_offset
8670 + (cfun_frame_layout.first_restore_gpr
8671 - cfun_frame_layout.first_save_gpr_slot)
8672 * UNITS_PER_LONG,
8673 cfun_frame_layout.first_restore_gpr,
8674 cfun_frame_layout.last_restore_gpr);
8675 insn = emit_insn (insn);
8676 REG_NOTES (insn) = cfa_restores;
8677 add_reg_note (insn, REG_CFA_DEF_CFA,
8678 plus_constant (Pmode, stack_pointer_rtx,
8679 STACK_POINTER_OFFSET));
8680 RTX_FRAME_RELATED_P (insn) = 1;
8681 }
8682
8683 if (! sibcall)
8684 {
8685
8686 /* Return to caller. */
8687
8688 p = rtvec_alloc (2);
8689
8690 RTVEC_ELT (p, 0) = ret_rtx;
8691 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
8692 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
8693 }
8694 }
8695
8696
8697 /* Return the size in bytes of a function argument of
8698 type TYPE and/or mode MODE. At least one of TYPE or
8699 MODE must be specified. */
8700
8701 static int
8702 s390_function_arg_size (enum machine_mode mode, const_tree type)
8703 {
8704 if (type)
8705 return int_size_in_bytes (type);
8706
8707 /* No type info available for some library calls ... */
8708 if (mode != BLKmode)
8709 return GET_MODE_SIZE (mode);
8710
8711 /* If we have neither type nor mode, abort */
8712 gcc_unreachable ();
8713 }
8714
8715 /* Return true if a function argument of type TYPE and mode MODE
8716 is to be passed in a floating-point register, if available. */
8717
8718 static bool
8719 s390_function_arg_float (enum machine_mode mode, const_tree type)
8720 {
8721 int size = s390_function_arg_size (mode, type);
8722 if (size > 8)
8723 return false;
8724
8725 /* Soft-float changes the ABI: no floating-point registers are used. */
8726 if (TARGET_SOFT_FLOAT)
8727 return false;
8728
8729 /* No type info available for some library calls ... */
8730 if (!type)
8731 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
8732
8733 /* The ABI says that record types with a single member are treated
8734 just like that member would be. */
8735 while (TREE_CODE (type) == RECORD_TYPE)
8736 {
8737 tree field, single = NULL_TREE;
8738
8739 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8740 {
8741 if (TREE_CODE (field) != FIELD_DECL)
8742 continue;
8743
8744 if (single == NULL_TREE)
8745 single = TREE_TYPE (field);
8746 else
8747 return false;
8748 }
8749
8750 if (single == NULL_TREE)
8751 return false;
8752 else
8753 type = single;
8754 }
8755
8756 return TREE_CODE (type) == REAL_TYPE;
8757 }
8758
8759 /* Return true if a function argument of type TYPE and mode MODE
8760 is to be passed in an integer register, or a pair of integer
8761 registers, if available. */
8762
8763 static bool
8764 s390_function_arg_integer (enum machine_mode mode, const_tree type)
8765 {
8766 int size = s390_function_arg_size (mode, type);
8767 if (size > 8)
8768 return false;
8769
8770 /* No type info available for some library calls ... */
8771 if (!type)
8772 return GET_MODE_CLASS (mode) == MODE_INT
8773 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
8774
8775 /* We accept small integral (and similar) types. */
8776 if (INTEGRAL_TYPE_P (type)
8777 || POINTER_TYPE_P (type)
8778 || TREE_CODE (type) == NULLPTR_TYPE
8779 || TREE_CODE (type) == OFFSET_TYPE
8780 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
8781 return true;
8782
8783 /* We also accept structs of size 1, 2, 4, 8 that are not
8784 passed in floating-point registers. */
8785 if (AGGREGATE_TYPE_P (type)
8786 && exact_log2 (size) >= 0
8787 && !s390_function_arg_float (mode, type))
8788 return true;
8789
8790 return false;
8791 }
8792
8793 /* Return 1 if a function argument of type TYPE and mode MODE
8794 is to be passed by reference. The ABI specifies that only
8795 structures of size 1, 2, 4, or 8 bytes are passed by value,
8796 all other structures (and complex numbers) are passed by
8797 reference. */
8798
8799 static bool
8800 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
8801 enum machine_mode mode, const_tree type,
8802 bool named ATTRIBUTE_UNUSED)
8803 {
8804 int size = s390_function_arg_size (mode, type);
8805 if (size > 8)
8806 return true;
8807
8808 if (type)
8809 {
8810 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
8811 return 1;
8812
8813 if (TREE_CODE (type) == COMPLEX_TYPE
8814 || TREE_CODE (type) == VECTOR_TYPE)
8815 return 1;
8816 }
8817
8818 return 0;
8819 }
8820
8821 /* Update the data in CUM to advance over an argument of mode MODE and
8822 data type TYPE. (TYPE is null for libcalls where that information
8823 may not be available.). The boolean NAMED specifies whether the
8824 argument is a named argument (as opposed to an unnamed argument
8825 matching an ellipsis). */
8826
8827 static void
8828 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
8829 const_tree type, bool named ATTRIBUTE_UNUSED)
8830 {
8831 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8832
8833 if (s390_function_arg_float (mode, type))
8834 {
8835 cum->fprs += 1;
8836 }
8837 else if (s390_function_arg_integer (mode, type))
8838 {
8839 int size = s390_function_arg_size (mode, type);
8840 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
8841 }
8842 else
8843 gcc_unreachable ();
8844 }
8845
8846 /* Define where to put the arguments to a function.
8847 Value is zero to push the argument on the stack,
8848 or a hard register in which to store the argument.
8849
8850 MODE is the argument's machine mode.
8851 TYPE is the data type of the argument (as a tree).
8852 This is null for libcalls where that information may
8853 not be available.
8854 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8855 the preceding args and about the function being called.
8856 NAMED is nonzero if this argument is a named parameter
8857 (otherwise it is an extra parameter matching an ellipsis).
8858
8859 On S/390, we use general purpose registers 2 through 6 to
8860 pass integer, pointer, and certain structure arguments, and
8861 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
8862 to pass floating point arguments. All remaining arguments
8863 are pushed to the stack. */
8864
8865 static rtx
8866 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
8867 const_tree type, bool named ATTRIBUTE_UNUSED)
8868 {
8869 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8870
8871 if (s390_function_arg_float (mode, type))
8872 {
8873 if (cum->fprs + 1 > FP_ARG_NUM_REG)
8874 return 0;
8875 else
8876 return gen_rtx_REG (mode, cum->fprs + 16);
8877 }
8878 else if (s390_function_arg_integer (mode, type))
8879 {
8880 int size = s390_function_arg_size (mode, type);
8881 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
8882
8883 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
8884 return 0;
8885 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
8886 return gen_rtx_REG (mode, cum->gprs + 2);
8887 else if (n_gprs == 2)
8888 {
8889 rtvec p = rtvec_alloc (2);
8890
8891 RTVEC_ELT (p, 0)
8892 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
8893 const0_rtx);
8894 RTVEC_ELT (p, 1)
8895 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
8896 GEN_INT (4));
8897
8898 return gen_rtx_PARALLEL (mode, p);
8899 }
8900 }
8901
8902 /* After the real arguments, expand_call calls us once again
8903 with a void_type_node type. Whatever we return here is
8904 passed as operand 2 to the call expanders.
8905
8906 We don't need this feature ... */
8907 else if (type == void_type_node)
8908 return const0_rtx;
8909
8910 gcc_unreachable ();
8911 }
8912
8913 /* Return true if return values of type TYPE should be returned
8914 in a memory buffer whose address is passed by the caller as
8915 hidden first argument. */
8916
8917 static bool
8918 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
8919 {
8920 /* We accept small integral (and similar) types. */
8921 if (INTEGRAL_TYPE_P (type)
8922 || POINTER_TYPE_P (type)
8923 || TREE_CODE (type) == OFFSET_TYPE
8924 || TREE_CODE (type) == REAL_TYPE)
8925 return int_size_in_bytes (type) > 8;
8926
8927 /* Aggregates and similar constructs are always returned
8928 in memory. */
8929 if (AGGREGATE_TYPE_P (type)
8930 || TREE_CODE (type) == COMPLEX_TYPE
8931 || TREE_CODE (type) == VECTOR_TYPE)
8932 return true;
8933
8934 /* ??? We get called on all sorts of random stuff from
8935 aggregate_value_p. We can't abort, but it's not clear
8936 what's safe to return. Pretend it's a struct I guess. */
8937 return true;
8938 }
8939
8940 /* Function arguments and return values are promoted to word size. */
8941
8942 static enum machine_mode
8943 s390_promote_function_mode (const_tree type, enum machine_mode mode,
8944 int *punsignedp,
8945 const_tree fntype ATTRIBUTE_UNUSED,
8946 int for_return ATTRIBUTE_UNUSED)
8947 {
8948 if (INTEGRAL_MODE_P (mode)
8949 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
8950 {
8951 if (type != NULL_TREE && POINTER_TYPE_P (type))
8952 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8953 return Pmode;
8954 }
8955
8956 return mode;
8957 }
8958
8959 /* Define where to return a (scalar) value of type RET_TYPE.
8960 If RET_TYPE is null, define where to return a (scalar)
8961 value of mode MODE from a libcall. */
8962
8963 static rtx
8964 s390_function_and_libcall_value (enum machine_mode mode,
8965 const_tree ret_type,
8966 const_tree fntype_or_decl,
8967 bool outgoing ATTRIBUTE_UNUSED)
8968 {
8969 /* For normal functions perform the promotion as
8970 promote_function_mode would do. */
8971 if (ret_type)
8972 {
8973 int unsignedp = TYPE_UNSIGNED (ret_type);
8974 mode = promote_function_mode (ret_type, mode, &unsignedp,
8975 fntype_or_decl, 1);
8976 }
8977
8978 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
8979 gcc_assert (GET_MODE_SIZE (mode) <= 8);
8980
8981 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
8982 return gen_rtx_REG (mode, 16);
8983 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
8984 || UNITS_PER_LONG == UNITS_PER_WORD)
8985 return gen_rtx_REG (mode, 2);
8986 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
8987 {
8988 /* This case is triggered when returning a 64 bit value with
8989 -m31 -mzarch. Although the value would fit into a single
8990 register it has to be forced into a 32 bit register pair in
8991 order to match the ABI. */
8992 rtvec p = rtvec_alloc (2);
8993
8994 RTVEC_ELT (p, 0)
8995 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
8996 RTVEC_ELT (p, 1)
8997 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
8998
8999 return gen_rtx_PARALLEL (mode, p);
9000 }
9001
9002 gcc_unreachable ();
9003 }
9004
9005 /* Define where to return a scalar return value of type RET_TYPE. */
9006
9007 static rtx
9008 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
9009 bool outgoing)
9010 {
9011 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9012 fn_decl_or_type, outgoing);
9013 }
9014
9015 /* Define where to return a scalar libcall return value of mode
9016 MODE. */
9017
9018 static rtx
9019 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9020 {
9021 return s390_function_and_libcall_value (mode, NULL_TREE,
9022 NULL_TREE, true);
9023 }
9024
9025
9026 /* Create and return the va_list datatype.
9027
9028 On S/390, va_list is an array type equivalent to
9029
9030 typedef struct __va_list_tag
9031 {
9032 long __gpr;
9033 long __fpr;
9034 void *__overflow_arg_area;
9035 void *__reg_save_area;
9036 } va_list[1];
9037
9038 where __gpr and __fpr hold the number of general purpose
9039 or floating point arguments used up to now, respectively,
9040 __overflow_arg_area points to the stack location of the
9041 next argument passed on the stack, and __reg_save_area
9042 always points to the start of the register area in the
9043 call frame of the current function. The function prologue
9044 saves all registers used for argument passing into this
9045 area if the function uses variable arguments. */
9046
9047 static tree
9048 s390_build_builtin_va_list (void)
9049 {
9050 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9051
9052 record = lang_hooks.types.make_type (RECORD_TYPE);
9053
9054 type_decl =
9055 build_decl (BUILTINS_LOCATION,
9056 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9057
9058 f_gpr = build_decl (BUILTINS_LOCATION,
9059 FIELD_DECL, get_identifier ("__gpr"),
9060 long_integer_type_node);
9061 f_fpr = build_decl (BUILTINS_LOCATION,
9062 FIELD_DECL, get_identifier ("__fpr"),
9063 long_integer_type_node);
9064 f_ovf = build_decl (BUILTINS_LOCATION,
9065 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9066 ptr_type_node);
9067 f_sav = build_decl (BUILTINS_LOCATION,
9068 FIELD_DECL, get_identifier ("__reg_save_area"),
9069 ptr_type_node);
9070
9071 va_list_gpr_counter_field = f_gpr;
9072 va_list_fpr_counter_field = f_fpr;
9073
9074 DECL_FIELD_CONTEXT (f_gpr) = record;
9075 DECL_FIELD_CONTEXT (f_fpr) = record;
9076 DECL_FIELD_CONTEXT (f_ovf) = record;
9077 DECL_FIELD_CONTEXT (f_sav) = record;
9078
9079 TYPE_STUB_DECL (record) = type_decl;
9080 TYPE_NAME (record) = type_decl;
9081 TYPE_FIELDS (record) = f_gpr;
9082 DECL_CHAIN (f_gpr) = f_fpr;
9083 DECL_CHAIN (f_fpr) = f_ovf;
9084 DECL_CHAIN (f_ovf) = f_sav;
9085
9086 layout_type (record);
9087
9088 /* The correct type is an array type of one element. */
9089 return build_array_type (record, build_index_type (size_zero_node));
9090 }
9091
9092 /* Implement va_start by filling the va_list structure VALIST.
9093 STDARG_P is always true, and ignored.
9094 NEXTARG points to the first anonymous stack argument.
9095
9096 The following global variables are used to initialize
9097 the va_list structure:
9098
9099 crtl->args.info:
9100 holds number of gprs and fprs used for named arguments.
9101 crtl->args.arg_offset_rtx:
9102 holds the offset of the first anonymous stack argument
9103 (relative to the virtual arg pointer). */
9104
9105 static void
9106 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9107 {
9108 HOST_WIDE_INT n_gpr, n_fpr;
9109 int off;
9110 tree f_gpr, f_fpr, f_ovf, f_sav;
9111 tree gpr, fpr, ovf, sav, t;
9112
9113 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9114 f_fpr = DECL_CHAIN (f_gpr);
9115 f_ovf = DECL_CHAIN (f_fpr);
9116 f_sav = DECL_CHAIN (f_ovf);
9117
9118 valist = build_simple_mem_ref (valist);
9119 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9120 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9121 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9122 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9123
9124 /* Count number of gp and fp argument registers used. */
9125
9126 n_gpr = crtl->args.info.gprs;
9127 n_fpr = crtl->args.info.fprs;
9128
9129 if (cfun->va_list_gpr_size)
9130 {
9131 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9132 build_int_cst (NULL_TREE, n_gpr));
9133 TREE_SIDE_EFFECTS (t) = 1;
9134 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9135 }
9136
9137 if (cfun->va_list_fpr_size)
9138 {
9139 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9140 build_int_cst (NULL_TREE, n_fpr));
9141 TREE_SIDE_EFFECTS (t) = 1;
9142 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9143 }
9144
9145 /* Find the overflow area. */
9146 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9147 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9148 {
9149 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9150
9151 off = INTVAL (crtl->args.arg_offset_rtx);
9152 off = off < 0 ? 0 : off;
9153 if (TARGET_DEBUG_ARG)
9154 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9155 (int)n_gpr, (int)n_fpr, off);
9156
9157 t = fold_build_pointer_plus_hwi (t, off);
9158
9159 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9160 TREE_SIDE_EFFECTS (t) = 1;
9161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9162 }
9163
9164 /* Find the register save area. */
9165 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9166 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9167 {
9168 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9169 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9170
9171 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9172 TREE_SIDE_EFFECTS (t) = 1;
9173 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9174 }
9175 }
9176
9177 /* Implement va_arg by updating the va_list structure
9178 VALIST as required to retrieve an argument of type
9179 TYPE, and returning that argument.
9180
9181 Generates code equivalent to:
9182
9183 if (integral value) {
9184 if (size <= 4 && args.gpr < 5 ||
9185 size > 4 && args.gpr < 4 )
9186 ret = args.reg_save_area[args.gpr+8]
9187 else
9188 ret = *args.overflow_arg_area++;
9189 } else if (float value) {
9190 if (args.fgpr < 2)
9191 ret = args.reg_save_area[args.fpr+64]
9192 else
9193 ret = *args.overflow_arg_area++;
9194 } else if (aggregate value) {
9195 if (args.gpr < 5)
9196 ret = *args.reg_save_area[args.gpr]
9197 else
9198 ret = **args.overflow_arg_area++;
9199 } */
9200
9201 static tree
9202 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9203 gimple_seq *post_p ATTRIBUTE_UNUSED)
9204 {
9205 tree f_gpr, f_fpr, f_ovf, f_sav;
9206 tree gpr, fpr, ovf, sav, reg, t, u;
9207 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9208 tree lab_false, lab_over, addr;
9209
9210 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9211 f_fpr = DECL_CHAIN (f_gpr);
9212 f_ovf = DECL_CHAIN (f_fpr);
9213 f_sav = DECL_CHAIN (f_ovf);
9214
9215 valist = build_va_arg_indirect_ref (valist);
9216 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9217 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9218 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9219
9220 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9221 both appear on a lhs. */
9222 valist = unshare_expr (valist);
9223 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9224
9225 size = int_size_in_bytes (type);
9226
9227 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9228 {
9229 if (TARGET_DEBUG_ARG)
9230 {
9231 fprintf (stderr, "va_arg: aggregate type");
9232 debug_tree (type);
9233 }
9234
9235 /* Aggregates are passed by reference. */
9236 indirect_p = 1;
9237 reg = gpr;
9238 n_reg = 1;
9239
9240 /* kernel stack layout on 31 bit: It is assumed here that no padding
9241 will be added by s390_frame_info because for va_args always an even
9242 number of gprs has to be saved r15-r2 = 14 regs. */
9243 sav_ofs = 2 * UNITS_PER_LONG;
9244 sav_scale = UNITS_PER_LONG;
9245 size = UNITS_PER_LONG;
9246 max_reg = GP_ARG_NUM_REG - n_reg;
9247 }
9248 else if (s390_function_arg_float (TYPE_MODE (type), type))
9249 {
9250 if (TARGET_DEBUG_ARG)
9251 {
9252 fprintf (stderr, "va_arg: float type");
9253 debug_tree (type);
9254 }
9255
9256 /* FP args go in FP registers, if present. */
9257 indirect_p = 0;
9258 reg = fpr;
9259 n_reg = 1;
9260 sav_ofs = 16 * UNITS_PER_LONG;
9261 sav_scale = 8;
9262 max_reg = FP_ARG_NUM_REG - n_reg;
9263 }
9264 else
9265 {
9266 if (TARGET_DEBUG_ARG)
9267 {
9268 fprintf (stderr, "va_arg: other type");
9269 debug_tree (type);
9270 }
9271
9272 /* Otherwise into GP registers. */
9273 indirect_p = 0;
9274 reg = gpr;
9275 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9276
9277 /* kernel stack layout on 31 bit: It is assumed here that no padding
9278 will be added by s390_frame_info because for va_args always an even
9279 number of gprs has to be saved r15-r2 = 14 regs. */
9280 sav_ofs = 2 * UNITS_PER_LONG;
9281
9282 if (size < UNITS_PER_LONG)
9283 sav_ofs += UNITS_PER_LONG - size;
9284
9285 sav_scale = UNITS_PER_LONG;
9286 max_reg = GP_ARG_NUM_REG - n_reg;
9287 }
9288
9289 /* Pull the value out of the saved registers ... */
9290
9291 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9292 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9293 addr = create_tmp_var (ptr_type_node, "addr");
9294
9295 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9296 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9297 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9298 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9299 gimplify_and_add (t, pre_p);
9300
9301 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9302 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9303 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9304 t = fold_build_pointer_plus (t, u);
9305
9306 gimplify_assign (addr, t, pre_p);
9307
9308 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9309
9310 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9311
9312
9313 /* ... Otherwise out of the overflow area. */
9314
9315 t = ovf;
9316 if (size < UNITS_PER_LONG)
9317 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9318
9319 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9320
9321 gimplify_assign (addr, t, pre_p);
9322
9323 t = fold_build_pointer_plus_hwi (t, size);
9324 gimplify_assign (ovf, t, pre_p);
9325
9326 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9327
9328
9329 /* Increment register save count. */
9330
9331 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9332 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9333 gimplify_and_add (u, pre_p);
9334
9335 if (indirect_p)
9336 {
9337 t = build_pointer_type_for_mode (build_pointer_type (type),
9338 ptr_mode, true);
9339 addr = fold_convert (t, addr);
9340 addr = build_va_arg_indirect_ref (addr);
9341 }
9342 else
9343 {
9344 t = build_pointer_type_for_mode (type, ptr_mode, true);
9345 addr = fold_convert (t, addr);
9346 }
9347
9348 return build_va_arg_indirect_ref (addr);
9349 }
9350
9351 /* Output assembly code for the trampoline template to
9352 stdio stream FILE.
9353
9354 On S/390, we use gpr 1 internally in the trampoline code;
9355 gpr 0 is used to hold the static chain. */
9356
9357 static void
9358 s390_asm_trampoline_template (FILE *file)
9359 {
9360 rtx op[2];
9361 op[0] = gen_rtx_REG (Pmode, 0);
9362 op[1] = gen_rtx_REG (Pmode, 1);
9363
9364 if (TARGET_64BIT)
9365 {
9366 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9367 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
9368 output_asm_insn ("br\t%1", op); /* 2 byte */
9369 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
9370 }
9371 else
9372 {
9373 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9374 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
9375 output_asm_insn ("br\t%1", op); /* 2 byte */
9376 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
9377 }
9378 }
9379
9380 /* Emit RTL insns to initialize the variable parts of a trampoline.
9381 FNADDR is an RTX for the address of the function's pure code.
9382 CXT is an RTX for the static chain value for the function. */
9383
9384 static void
9385 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9386 {
9387 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9388 rtx mem;
9389
9390 emit_block_move (m_tramp, assemble_trampoline_template (),
9391 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
9392
9393 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
9394 emit_move_insn (mem, cxt);
9395 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
9396 emit_move_insn (mem, fnaddr);
9397 }
9398
9399 /* Output assembler code to FILE to increment profiler label # LABELNO
9400 for profiling a function entry. */
9401
9402 void
9403 s390_function_profiler (FILE *file, int labelno)
9404 {
9405 rtx op[7];
9406
9407 char label[128];
9408 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
9409
9410 fprintf (file, "# function profiler \n");
9411
9412 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
9413 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
9414 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
9415
9416 op[2] = gen_rtx_REG (Pmode, 1);
9417 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
9418 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
9419
9420 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
9421 if (flag_pic)
9422 {
9423 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
9424 op[4] = gen_rtx_CONST (Pmode, op[4]);
9425 }
9426
9427 if (TARGET_64BIT)
9428 {
9429 output_asm_insn ("stg\t%0,%1", op);
9430 output_asm_insn ("larl\t%2,%3", op);
9431 output_asm_insn ("brasl\t%0,%4", op);
9432 output_asm_insn ("lg\t%0,%1", op);
9433 }
9434 else if (!flag_pic)
9435 {
9436 op[6] = gen_label_rtx ();
9437
9438 output_asm_insn ("st\t%0,%1", op);
9439 output_asm_insn ("bras\t%2,%l6", op);
9440 output_asm_insn (".long\t%4", op);
9441 output_asm_insn (".long\t%3", op);
9442 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9443 output_asm_insn ("l\t%0,0(%2)", op);
9444 output_asm_insn ("l\t%2,4(%2)", op);
9445 output_asm_insn ("basr\t%0,%0", op);
9446 output_asm_insn ("l\t%0,%1", op);
9447 }
9448 else
9449 {
9450 op[5] = gen_label_rtx ();
9451 op[6] = gen_label_rtx ();
9452
9453 output_asm_insn ("st\t%0,%1", op);
9454 output_asm_insn ("bras\t%2,%l6", op);
9455 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
9456 output_asm_insn (".long\t%4-%l5", op);
9457 output_asm_insn (".long\t%3-%l5", op);
9458 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9459 output_asm_insn ("lr\t%0,%2", op);
9460 output_asm_insn ("a\t%0,0(%2)", op);
9461 output_asm_insn ("a\t%2,4(%2)", op);
9462 output_asm_insn ("basr\t%0,%0", op);
9463 output_asm_insn ("l\t%0,%1", op);
9464 }
9465 }
9466
9467 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
9468 into its SYMBOL_REF_FLAGS. */
9469
9470 static void
9471 s390_encode_section_info (tree decl, rtx rtl, int first)
9472 {
9473 default_encode_section_info (decl, rtl, first);
9474
9475 if (TREE_CODE (decl) == VAR_DECL)
9476 {
9477 /* If a variable has a forced alignment to < 2 bytes, mark it
9478 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
9479 operand. */
9480 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
9481 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
9482 if (!DECL_SIZE (decl)
9483 || !DECL_ALIGN (decl)
9484 || !host_integerp (DECL_SIZE (decl), 0)
9485 || (DECL_ALIGN (decl) <= 64
9486 && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
9487 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9488 }
9489
9490 /* Literal pool references don't have a decl so they are handled
9491 differently here. We rely on the information in the MEM_ALIGN
9492 entry to decide upon natural alignment. */
9493 if (MEM_P (rtl)
9494 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
9495 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
9496 && (MEM_ALIGN (rtl) == 0
9497 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
9498 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
9499 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9500 }
9501
9502 /* Output thunk to FILE that implements a C++ virtual function call (with
9503 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
9504 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
9505 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
9506 relative to the resulting this pointer. */
9507
9508 static void
9509 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9510 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9511 tree function)
9512 {
9513 rtx op[10];
9514 int nonlocal = 0;
9515
9516 /* Make sure unwind info is emitted for the thunk if needed. */
9517 final_start_function (emit_barrier (), file, 1);
9518
9519 /* Operand 0 is the target function. */
9520 op[0] = XEXP (DECL_RTL (function), 0);
9521 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
9522 {
9523 nonlocal = 1;
9524 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
9525 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
9526 op[0] = gen_rtx_CONST (Pmode, op[0]);
9527 }
9528
9529 /* Operand 1 is the 'this' pointer. */
9530 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9531 op[1] = gen_rtx_REG (Pmode, 3);
9532 else
9533 op[1] = gen_rtx_REG (Pmode, 2);
9534
9535 /* Operand 2 is the delta. */
9536 op[2] = GEN_INT (delta);
9537
9538 /* Operand 3 is the vcall_offset. */
9539 op[3] = GEN_INT (vcall_offset);
9540
9541 /* Operand 4 is the temporary register. */
9542 op[4] = gen_rtx_REG (Pmode, 1);
9543
9544 /* Operands 5 to 8 can be used as labels. */
9545 op[5] = NULL_RTX;
9546 op[6] = NULL_RTX;
9547 op[7] = NULL_RTX;
9548 op[8] = NULL_RTX;
9549
9550 /* Operand 9 can be used for temporary register. */
9551 op[9] = NULL_RTX;
9552
9553 /* Generate code. */
9554 if (TARGET_64BIT)
9555 {
9556 /* Setup literal pool pointer if required. */
9557 if ((!DISP_IN_RANGE (delta)
9558 && !CONST_OK_FOR_K (delta)
9559 && !CONST_OK_FOR_Os (delta))
9560 || (!DISP_IN_RANGE (vcall_offset)
9561 && !CONST_OK_FOR_K (vcall_offset)
9562 && !CONST_OK_FOR_Os (vcall_offset)))
9563 {
9564 op[5] = gen_label_rtx ();
9565 output_asm_insn ("larl\t%4,%5", op);
9566 }
9567
9568 /* Add DELTA to this pointer. */
9569 if (delta)
9570 {
9571 if (CONST_OK_FOR_J (delta))
9572 output_asm_insn ("la\t%1,%2(%1)", op);
9573 else if (DISP_IN_RANGE (delta))
9574 output_asm_insn ("lay\t%1,%2(%1)", op);
9575 else if (CONST_OK_FOR_K (delta))
9576 output_asm_insn ("aghi\t%1,%2", op);
9577 else if (CONST_OK_FOR_Os (delta))
9578 output_asm_insn ("agfi\t%1,%2", op);
9579 else
9580 {
9581 op[6] = gen_label_rtx ();
9582 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
9583 }
9584 }
9585
9586 /* Perform vcall adjustment. */
9587 if (vcall_offset)
9588 {
9589 if (DISP_IN_RANGE (vcall_offset))
9590 {
9591 output_asm_insn ("lg\t%4,0(%1)", op);
9592 output_asm_insn ("ag\t%1,%3(%4)", op);
9593 }
9594 else if (CONST_OK_FOR_K (vcall_offset))
9595 {
9596 output_asm_insn ("lghi\t%4,%3", op);
9597 output_asm_insn ("ag\t%4,0(%1)", op);
9598 output_asm_insn ("ag\t%1,0(%4)", op);
9599 }
9600 else if (CONST_OK_FOR_Os (vcall_offset))
9601 {
9602 output_asm_insn ("lgfi\t%4,%3", op);
9603 output_asm_insn ("ag\t%4,0(%1)", op);
9604 output_asm_insn ("ag\t%1,0(%4)", op);
9605 }
9606 else
9607 {
9608 op[7] = gen_label_rtx ();
9609 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
9610 output_asm_insn ("ag\t%4,0(%1)", op);
9611 output_asm_insn ("ag\t%1,0(%4)", op);
9612 }
9613 }
9614
9615 /* Jump to target. */
9616 output_asm_insn ("jg\t%0", op);
9617
9618 /* Output literal pool if required. */
9619 if (op[5])
9620 {
9621 output_asm_insn (".align\t4", op);
9622 targetm.asm_out.internal_label (file, "L",
9623 CODE_LABEL_NUMBER (op[5]));
9624 }
9625 if (op[6])
9626 {
9627 targetm.asm_out.internal_label (file, "L",
9628 CODE_LABEL_NUMBER (op[6]));
9629 output_asm_insn (".long\t%2", op);
9630 }
9631 if (op[7])
9632 {
9633 targetm.asm_out.internal_label (file, "L",
9634 CODE_LABEL_NUMBER (op[7]));
9635 output_asm_insn (".long\t%3", op);
9636 }
9637 }
9638 else
9639 {
9640 /* Setup base pointer if required. */
9641 if (!vcall_offset
9642 || (!DISP_IN_RANGE (delta)
9643 && !CONST_OK_FOR_K (delta)
9644 && !CONST_OK_FOR_Os (delta))
9645 || (!DISP_IN_RANGE (delta)
9646 && !CONST_OK_FOR_K (vcall_offset)
9647 && !CONST_OK_FOR_Os (vcall_offset)))
9648 {
9649 op[5] = gen_label_rtx ();
9650 output_asm_insn ("basr\t%4,0", op);
9651 targetm.asm_out.internal_label (file, "L",
9652 CODE_LABEL_NUMBER (op[5]));
9653 }
9654
9655 /* Add DELTA to this pointer. */
9656 if (delta)
9657 {
9658 if (CONST_OK_FOR_J (delta))
9659 output_asm_insn ("la\t%1,%2(%1)", op);
9660 else if (DISP_IN_RANGE (delta))
9661 output_asm_insn ("lay\t%1,%2(%1)", op);
9662 else if (CONST_OK_FOR_K (delta))
9663 output_asm_insn ("ahi\t%1,%2", op);
9664 else if (CONST_OK_FOR_Os (delta))
9665 output_asm_insn ("afi\t%1,%2", op);
9666 else
9667 {
9668 op[6] = gen_label_rtx ();
9669 output_asm_insn ("a\t%1,%6-%5(%4)", op);
9670 }
9671 }
9672
9673 /* Perform vcall adjustment. */
9674 if (vcall_offset)
9675 {
9676 if (CONST_OK_FOR_J (vcall_offset))
9677 {
9678 output_asm_insn ("l\t%4,0(%1)", op);
9679 output_asm_insn ("a\t%1,%3(%4)", op);
9680 }
9681 else if (DISP_IN_RANGE (vcall_offset))
9682 {
9683 output_asm_insn ("l\t%4,0(%1)", op);
9684 output_asm_insn ("ay\t%1,%3(%4)", op);
9685 }
9686 else if (CONST_OK_FOR_K (vcall_offset))
9687 {
9688 output_asm_insn ("lhi\t%4,%3", op);
9689 output_asm_insn ("a\t%4,0(%1)", op);
9690 output_asm_insn ("a\t%1,0(%4)", op);
9691 }
9692 else if (CONST_OK_FOR_Os (vcall_offset))
9693 {
9694 output_asm_insn ("iilf\t%4,%3", op);
9695 output_asm_insn ("a\t%4,0(%1)", op);
9696 output_asm_insn ("a\t%1,0(%4)", op);
9697 }
9698 else
9699 {
9700 op[7] = gen_label_rtx ();
9701 output_asm_insn ("l\t%4,%7-%5(%4)", op);
9702 output_asm_insn ("a\t%4,0(%1)", op);
9703 output_asm_insn ("a\t%1,0(%4)", op);
9704 }
9705
9706 /* We had to clobber the base pointer register.
9707 Re-setup the base pointer (with a different base). */
9708 op[5] = gen_label_rtx ();
9709 output_asm_insn ("basr\t%4,0", op);
9710 targetm.asm_out.internal_label (file, "L",
9711 CODE_LABEL_NUMBER (op[5]));
9712 }
9713
9714 /* Jump to target. */
9715 op[8] = gen_label_rtx ();
9716
9717 if (!flag_pic)
9718 output_asm_insn ("l\t%4,%8-%5(%4)", op);
9719 else if (!nonlocal)
9720 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9721 /* We cannot call through .plt, since .plt requires %r12 loaded. */
9722 else if (flag_pic == 1)
9723 {
9724 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9725 output_asm_insn ("l\t%4,%0(%4)", op);
9726 }
9727 else if (flag_pic == 2)
9728 {
9729 op[9] = gen_rtx_REG (Pmode, 0);
9730 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
9731 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9732 output_asm_insn ("ar\t%4,%9", op);
9733 output_asm_insn ("l\t%4,0(%4)", op);
9734 }
9735
9736 output_asm_insn ("br\t%4", op);
9737
9738 /* Output literal pool. */
9739 output_asm_insn (".align\t4", op);
9740
9741 if (nonlocal && flag_pic == 2)
9742 output_asm_insn (".long\t%0", op);
9743 if (nonlocal)
9744 {
9745 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9746 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
9747 }
9748
9749 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
9750 if (!flag_pic)
9751 output_asm_insn (".long\t%0", op);
9752 else
9753 output_asm_insn (".long\t%0-%5", op);
9754
9755 if (op[6])
9756 {
9757 targetm.asm_out.internal_label (file, "L",
9758 CODE_LABEL_NUMBER (op[6]));
9759 output_asm_insn (".long\t%2", op);
9760 }
9761 if (op[7])
9762 {
9763 targetm.asm_out.internal_label (file, "L",
9764 CODE_LABEL_NUMBER (op[7]));
9765 output_asm_insn (".long\t%3", op);
9766 }
9767 }
9768 final_end_function ();
9769 }
9770
9771 static bool
9772 s390_valid_pointer_mode (enum machine_mode mode)
9773 {
9774 return (mode == SImode || (TARGET_64BIT && mode == DImode));
9775 }
9776
9777 /* Checks whether the given CALL_EXPR would use a caller
9778 saved register. This is used to decide whether sibling call
9779 optimization could be performed on the respective function
9780 call. */
9781
9782 static bool
9783 s390_call_saved_register_used (tree call_expr)
9784 {
9785 CUMULATIVE_ARGS cum_v;
9786 cumulative_args_t cum;
9787 tree parameter;
9788 enum machine_mode mode;
9789 tree type;
9790 rtx parm_rtx;
9791 int reg, i;
9792
9793 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
9794 cum = pack_cumulative_args (&cum_v);
9795
9796 for (i = 0; i < call_expr_nargs (call_expr); i++)
9797 {
9798 parameter = CALL_EXPR_ARG (call_expr, i);
9799 gcc_assert (parameter);
9800
9801 /* For an undeclared variable passed as parameter we will get
9802 an ERROR_MARK node here. */
9803 if (TREE_CODE (parameter) == ERROR_MARK)
9804 return true;
9805
9806 type = TREE_TYPE (parameter);
9807 gcc_assert (type);
9808
9809 mode = TYPE_MODE (type);
9810 gcc_assert (mode);
9811
9812 if (pass_by_reference (&cum_v, mode, type, true))
9813 {
9814 mode = Pmode;
9815 type = build_pointer_type (type);
9816 }
9817
9818 parm_rtx = s390_function_arg (cum, mode, type, 0);
9819
9820 s390_function_arg_advance (cum, mode, type, 0);
9821
9822 if (!parm_rtx)
9823 continue;
9824
9825 if (REG_P (parm_rtx))
9826 {
9827 for (reg = 0;
9828 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
9829 reg++)
9830 if (!call_used_regs[reg + REGNO (parm_rtx)])
9831 return true;
9832 }
9833
9834 if (GET_CODE (parm_rtx) == PARALLEL)
9835 {
9836 int i;
9837
9838 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
9839 {
9840 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
9841
9842 gcc_assert (REG_P (r));
9843
9844 for (reg = 0;
9845 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
9846 reg++)
9847 if (!call_used_regs[reg + REGNO (r)])
9848 return true;
9849 }
9850 }
9851
9852 }
9853 return false;
9854 }
9855
9856 /* Return true if the given call expression can be
9857 turned into a sibling call.
9858 DECL holds the declaration of the function to be called whereas
9859 EXP is the call expression itself. */
9860
9861 static bool
9862 s390_function_ok_for_sibcall (tree decl, tree exp)
9863 {
9864 /* The TPF epilogue uses register 1. */
9865 if (TARGET_TPF_PROFILING)
9866 return false;
9867
9868 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
9869 which would have to be restored before the sibcall. */
9870 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
9871 return false;
9872
9873 /* Register 6 on s390 is available as an argument register but unfortunately
9874 "caller saved". This makes functions needing this register for arguments
9875 not suitable for sibcalls. */
9876 return !s390_call_saved_register_used (exp);
9877 }
9878
9879 /* Return the fixed registers used for condition codes. */
9880
9881 static bool
9882 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9883 {
9884 *p1 = CC_REGNUM;
9885 *p2 = INVALID_REGNUM;
9886
9887 return true;
9888 }
9889
9890 /* This function is used by the call expanders of the machine description.
9891 It emits the call insn itself together with the necessary operations
9892 to adjust the target address and returns the emitted insn.
9893 ADDR_LOCATION is the target address rtx
9894 TLS_CALL the location of the thread-local symbol
9895 RESULT_REG the register where the result of the call should be stored
9896 RETADDR_REG the register where the return address should be stored
9897 If this parameter is NULL_RTX the call is considered
9898 to be a sibling call. */
9899
9900 rtx
9901 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
9902 rtx retaddr_reg)
9903 {
9904 bool plt_call = false;
9905 rtx insn;
9906 rtx call;
9907 rtx clobber;
9908 rtvec vec;
9909
9910 /* Direct function calls need special treatment. */
9911 if (GET_CODE (addr_location) == SYMBOL_REF)
9912 {
9913 /* When calling a global routine in PIC mode, we must
9914 replace the symbol itself with the PLT stub. */
9915 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
9916 {
9917 if (retaddr_reg != NULL_RTX)
9918 {
9919 addr_location = gen_rtx_UNSPEC (Pmode,
9920 gen_rtvec (1, addr_location),
9921 UNSPEC_PLT);
9922 addr_location = gen_rtx_CONST (Pmode, addr_location);
9923 plt_call = true;
9924 }
9925 else
9926 /* For -fpic code the PLT entries might use r12 which is
9927 call-saved. Therefore we cannot do a sibcall when
9928 calling directly using a symbol ref. When reaching
9929 this point we decided (in s390_function_ok_for_sibcall)
9930 to do a sibcall for a function pointer but one of the
9931 optimizers was able to get rid of the function pointer
9932 by propagating the symbol ref into the call. This
9933 optimization is illegal for S/390 so we turn the direct
9934 call into a indirect call again. */
9935 addr_location = force_reg (Pmode, addr_location);
9936 }
9937
9938 /* Unless we can use the bras(l) insn, force the
9939 routine address into a register. */
9940 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
9941 {
9942 if (flag_pic)
9943 addr_location = legitimize_pic_address (addr_location, 0);
9944 else
9945 addr_location = force_reg (Pmode, addr_location);
9946 }
9947 }
9948
9949 /* If it is already an indirect call or the code above moved the
9950 SYMBOL_REF to somewhere else make sure the address can be found in
9951 register 1. */
9952 if (retaddr_reg == NULL_RTX
9953 && GET_CODE (addr_location) != SYMBOL_REF
9954 && !plt_call)
9955 {
9956 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
9957 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
9958 }
9959
9960 addr_location = gen_rtx_MEM (QImode, addr_location);
9961 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
9962
9963 if (result_reg != NULL_RTX)
9964 call = gen_rtx_SET (VOIDmode, result_reg, call);
9965
9966 if (retaddr_reg != NULL_RTX)
9967 {
9968 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
9969
9970 if (tls_call != NULL_RTX)
9971 vec = gen_rtvec (3, call, clobber,
9972 gen_rtx_USE (VOIDmode, tls_call));
9973 else
9974 vec = gen_rtvec (2, call, clobber);
9975
9976 call = gen_rtx_PARALLEL (VOIDmode, vec);
9977 }
9978
9979 insn = emit_call_insn (call);
9980
9981 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
9982 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
9983 {
9984 /* s390_function_ok_for_sibcall should
9985 have denied sibcalls in this case. */
9986 gcc_assert (retaddr_reg != NULL_RTX);
9987 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
9988 }
9989 return insn;
9990 }
9991
9992 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9993
9994 static void
9995 s390_conditional_register_usage (void)
9996 {
9997 int i;
9998
9999 if (flag_pic)
10000 {
10001 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10002 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10003 }
10004 if (TARGET_CPU_ZARCH)
10005 {
10006 fixed_regs[BASE_REGNUM] = 0;
10007 call_used_regs[BASE_REGNUM] = 0;
10008 fixed_regs[RETURN_REGNUM] = 0;
10009 call_used_regs[RETURN_REGNUM] = 0;
10010 }
10011 if (TARGET_64BIT)
10012 {
10013 for (i = 24; i < 32; i++)
10014 call_used_regs[i] = call_really_used_regs[i] = 0;
10015 }
10016 else
10017 {
10018 for (i = 18; i < 20; i++)
10019 call_used_regs[i] = call_really_used_regs[i] = 0;
10020 }
10021
10022 if (TARGET_SOFT_FLOAT)
10023 {
10024 for (i = 16; i < 32; i++)
10025 call_used_regs[i] = fixed_regs[i] = 1;
10026 }
10027 }
10028
10029 /* Corresponding function to eh_return expander. */
10030
10031 static GTY(()) rtx s390_tpf_eh_return_symbol;
10032 void
10033 s390_emit_tpf_eh_return (rtx target)
10034 {
10035 rtx insn, reg;
10036
10037 if (!s390_tpf_eh_return_symbol)
10038 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10039
10040 reg = gen_rtx_REG (Pmode, 2);
10041
10042 emit_move_insn (reg, target);
10043 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10044 gen_rtx_REG (Pmode, RETURN_REGNUM));
10045 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10046
10047 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10048 }
10049
10050 /* Rework the prologue/epilogue to avoid saving/restoring
10051 registers unnecessarily. */
10052
10053 static void
10054 s390_optimize_prologue (void)
10055 {
10056 rtx insn, new_insn, next_insn;
10057
10058 /* Do a final recompute of the frame-related data. */
10059
10060 s390_update_frame_layout ();
10061
10062 /* If all special registers are in fact used, there's nothing we
10063 can do, so no point in walking the insn list. */
10064
10065 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10066 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10067 && (TARGET_CPU_ZARCH
10068 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10069 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10070 return;
10071
10072 /* Search for prologue/epilogue insns and replace them. */
10073
10074 for (insn = get_insns (); insn; insn = next_insn)
10075 {
10076 int first, last, off;
10077 rtx set, base, offset;
10078
10079 next_insn = NEXT_INSN (insn);
10080
10081 if (! NONJUMP_INSN_P (insn))
10082 continue;
10083
10084 if (GET_CODE (PATTERN (insn)) == PARALLEL
10085 && store_multiple_operation (PATTERN (insn), VOIDmode))
10086 {
10087 set = XVECEXP (PATTERN (insn), 0, 0);
10088 first = REGNO (SET_SRC (set));
10089 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10090 offset = const0_rtx;
10091 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10092 off = INTVAL (offset);
10093
10094 if (GET_CODE (base) != REG || off < 0)
10095 continue;
10096 if (cfun_frame_layout.first_save_gpr != -1
10097 && (cfun_frame_layout.first_save_gpr < first
10098 || cfun_frame_layout.last_save_gpr > last))
10099 continue;
10100 if (REGNO (base) != STACK_POINTER_REGNUM
10101 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10102 continue;
10103 if (first > BASE_REGNUM || last < BASE_REGNUM)
10104 continue;
10105
10106 if (cfun_frame_layout.first_save_gpr != -1)
10107 {
10108 new_insn = save_gprs (base,
10109 off + (cfun_frame_layout.first_save_gpr
10110 - first) * UNITS_PER_LONG,
10111 cfun_frame_layout.first_save_gpr,
10112 cfun_frame_layout.last_save_gpr);
10113 new_insn = emit_insn_before (new_insn, insn);
10114 INSN_ADDRESSES_NEW (new_insn, -1);
10115 }
10116
10117 remove_insn (insn);
10118 continue;
10119 }
10120
10121 if (cfun_frame_layout.first_save_gpr == -1
10122 && GET_CODE (PATTERN (insn)) == SET
10123 && GET_CODE (SET_SRC (PATTERN (insn))) == REG
10124 && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM
10125 || (!TARGET_CPU_ZARCH
10126 && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM))
10127 && GET_CODE (SET_DEST (PATTERN (insn))) == MEM)
10128 {
10129 set = PATTERN (insn);
10130 first = REGNO (SET_SRC (set));
10131 offset = const0_rtx;
10132 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10133 off = INTVAL (offset);
10134
10135 if (GET_CODE (base) != REG || off < 0)
10136 continue;
10137 if (REGNO (base) != STACK_POINTER_REGNUM
10138 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10139 continue;
10140
10141 remove_insn (insn);
10142 continue;
10143 }
10144
10145 if (GET_CODE (PATTERN (insn)) == PARALLEL
10146 && load_multiple_operation (PATTERN (insn), VOIDmode))
10147 {
10148 set = XVECEXP (PATTERN (insn), 0, 0);
10149 first = REGNO (SET_DEST (set));
10150 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10151 offset = const0_rtx;
10152 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10153 off = INTVAL (offset);
10154
10155 if (GET_CODE (base) != REG || off < 0)
10156 continue;
10157 if (cfun_frame_layout.first_restore_gpr != -1
10158 && (cfun_frame_layout.first_restore_gpr < first
10159 || cfun_frame_layout.last_restore_gpr > last))
10160 continue;
10161 if (REGNO (base) != STACK_POINTER_REGNUM
10162 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10163 continue;
10164 if (first > BASE_REGNUM || last < BASE_REGNUM)
10165 continue;
10166
10167 if (cfun_frame_layout.first_restore_gpr != -1)
10168 {
10169 new_insn = restore_gprs (base,
10170 off + (cfun_frame_layout.first_restore_gpr
10171 - first) * UNITS_PER_LONG,
10172 cfun_frame_layout.first_restore_gpr,
10173 cfun_frame_layout.last_restore_gpr);
10174 new_insn = emit_insn_before (new_insn, insn);
10175 INSN_ADDRESSES_NEW (new_insn, -1);
10176 }
10177
10178 remove_insn (insn);
10179 continue;
10180 }
10181
10182 if (cfun_frame_layout.first_restore_gpr == -1
10183 && GET_CODE (PATTERN (insn)) == SET
10184 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
10185 && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM
10186 || (!TARGET_CPU_ZARCH
10187 && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM))
10188 && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
10189 {
10190 set = PATTERN (insn);
10191 first = REGNO (SET_DEST (set));
10192 offset = const0_rtx;
10193 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10194 off = INTVAL (offset);
10195
10196 if (GET_CODE (base) != REG || off < 0)
10197 continue;
10198 if (REGNO (base) != STACK_POINTER_REGNUM
10199 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10200 continue;
10201
10202 remove_insn (insn);
10203 continue;
10204 }
10205 }
10206 }
10207
10208 /* On z10 and later the dynamic branch prediction must see the
10209 backward jump within a certain windows. If not it falls back to
10210 the static prediction. This function rearranges the loop backward
10211 branch in a way which makes the static prediction always correct.
10212 The function returns true if it added an instruction. */
10213 static bool
10214 s390_fix_long_loop_prediction (rtx insn)
10215 {
10216 rtx set = single_set (insn);
10217 rtx code_label, label_ref, new_label;
10218 rtx uncond_jump;
10219 rtx cur_insn;
10220 rtx tmp;
10221 int distance;
10222
10223 /* This will exclude branch on count and branch on index patterns
10224 since these are correctly statically predicted. */
10225 if (!set
10226 || SET_DEST (set) != pc_rtx
10227 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
10228 return false;
10229
10230 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
10231 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
10232
10233 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
10234
10235 code_label = XEXP (label_ref, 0);
10236
10237 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
10238 || INSN_ADDRESSES (INSN_UID (insn)) == -1
10239 || (INSN_ADDRESSES (INSN_UID (insn))
10240 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
10241 return false;
10242
10243 for (distance = 0, cur_insn = PREV_INSN (insn);
10244 distance < PREDICT_DISTANCE - 6;
10245 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
10246 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
10247 return false;
10248
10249 new_label = gen_label_rtx ();
10250 uncond_jump = emit_jump_insn_after (
10251 gen_rtx_SET (VOIDmode, pc_rtx,
10252 gen_rtx_LABEL_REF (VOIDmode, code_label)),
10253 insn);
10254 emit_label_after (new_label, uncond_jump);
10255
10256 tmp = XEXP (SET_SRC (set), 1);
10257 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
10258 XEXP (SET_SRC (set), 2) = tmp;
10259 INSN_CODE (insn) = -1;
10260
10261 XEXP (label_ref, 0) = new_label;
10262 JUMP_LABEL (insn) = new_label;
10263 JUMP_LABEL (uncond_jump) = code_label;
10264
10265 return true;
10266 }
10267
10268 /* Returns 1 if INSN reads the value of REG for purposes not related
10269 to addressing of memory, and 0 otherwise. */
10270 static int
10271 s390_non_addr_reg_read_p (rtx reg, rtx insn)
10272 {
10273 return reg_referenced_p (reg, PATTERN (insn))
10274 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
10275 }
10276
10277 /* Starting from INSN find_cond_jump looks downwards in the insn
10278 stream for a single jump insn which is the last user of the
10279 condition code set in INSN. */
10280 static rtx
10281 find_cond_jump (rtx insn)
10282 {
10283 for (; insn; insn = NEXT_INSN (insn))
10284 {
10285 rtx ite, cc;
10286
10287 if (LABEL_P (insn))
10288 break;
10289
10290 if (!JUMP_P (insn))
10291 {
10292 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
10293 break;
10294 continue;
10295 }
10296
10297 /* This will be triggered by a return. */
10298 if (GET_CODE (PATTERN (insn)) != SET)
10299 break;
10300
10301 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
10302 ite = SET_SRC (PATTERN (insn));
10303
10304 if (GET_CODE (ite) != IF_THEN_ELSE)
10305 break;
10306
10307 cc = XEXP (XEXP (ite, 0), 0);
10308 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
10309 break;
10310
10311 if (find_reg_note (insn, REG_DEAD, cc))
10312 return insn;
10313 break;
10314 }
10315
10316 return NULL_RTX;
10317 }
10318
10319 /* Swap the condition in COND and the operands in OP0 and OP1 so that
10320 the semantics does not change. If NULL_RTX is passed as COND the
10321 function tries to find the conditional jump starting with INSN. */
10322 static void
10323 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
10324 {
10325 rtx tmp = *op0;
10326
10327 if (cond == NULL_RTX)
10328 {
10329 rtx jump = find_cond_jump (NEXT_INSN (insn));
10330 jump = jump ? single_set (jump) : NULL_RTX;
10331
10332 if (jump == NULL_RTX)
10333 return;
10334
10335 cond = XEXP (XEXP (jump, 1), 0);
10336 }
10337
10338 *op0 = *op1;
10339 *op1 = tmp;
10340 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
10341 }
10342
10343 /* On z10, instructions of the compare-and-branch family have the
10344 property to access the register occurring as second operand with
10345 its bits complemented. If such a compare is grouped with a second
10346 instruction that accesses the same register non-complemented, and
10347 if that register's value is delivered via a bypass, then the
10348 pipeline recycles, thereby causing significant performance decline.
10349 This function locates such situations and exchanges the two
10350 operands of the compare. The function return true whenever it
10351 added an insn. */
10352 static bool
10353 s390_z10_optimize_cmp (rtx insn)
10354 {
10355 rtx prev_insn, next_insn;
10356 bool insn_added_p = false;
10357 rtx cond, *op0, *op1;
10358
10359 if (GET_CODE (PATTERN (insn)) == PARALLEL)
10360 {
10361 /* Handle compare and branch and branch on count
10362 instructions. */
10363 rtx pattern = single_set (insn);
10364
10365 if (!pattern
10366 || SET_DEST (pattern) != pc_rtx
10367 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
10368 return false;
10369
10370 cond = XEXP (SET_SRC (pattern), 0);
10371 op0 = &XEXP (cond, 0);
10372 op1 = &XEXP (cond, 1);
10373 }
10374 else if (GET_CODE (PATTERN (insn)) == SET)
10375 {
10376 rtx src, dest;
10377
10378 /* Handle normal compare instructions. */
10379 src = SET_SRC (PATTERN (insn));
10380 dest = SET_DEST (PATTERN (insn));
10381
10382 if (!REG_P (dest)
10383 || !CC_REGNO_P (REGNO (dest))
10384 || GET_CODE (src) != COMPARE)
10385 return false;
10386
10387 /* s390_swap_cmp will try to find the conditional
10388 jump when passing NULL_RTX as condition. */
10389 cond = NULL_RTX;
10390 op0 = &XEXP (src, 0);
10391 op1 = &XEXP (src, 1);
10392 }
10393 else
10394 return false;
10395
10396 if (!REG_P (*op0) || !REG_P (*op1))
10397 return false;
10398
10399 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
10400 return false;
10401
10402 /* Swap the COMPARE arguments and its mask if there is a
10403 conflicting access in the previous insn. */
10404 prev_insn = prev_active_insn (insn);
10405 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10406 && reg_referenced_p (*op1, PATTERN (prev_insn)))
10407 s390_swap_cmp (cond, op0, op1, insn);
10408
10409 /* Check if there is a conflict with the next insn. If there
10410 was no conflict with the previous insn, then swap the
10411 COMPARE arguments and its mask. If we already swapped
10412 the operands, or if swapping them would cause a conflict
10413 with the previous insn, issue a NOP after the COMPARE in
10414 order to separate the two instuctions. */
10415 next_insn = next_active_insn (insn);
10416 if (next_insn != NULL_RTX && INSN_P (next_insn)
10417 && s390_non_addr_reg_read_p (*op1, next_insn))
10418 {
10419 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10420 && s390_non_addr_reg_read_p (*op0, prev_insn))
10421 {
10422 if (REGNO (*op1) == 0)
10423 emit_insn_after (gen_nop1 (), insn);
10424 else
10425 emit_insn_after (gen_nop (), insn);
10426 insn_added_p = true;
10427 }
10428 else
10429 s390_swap_cmp (cond, op0, op1, insn);
10430 }
10431 return insn_added_p;
10432 }
10433
10434 /* Perform machine-dependent processing. */
10435
10436 static void
10437 s390_reorg (void)
10438 {
10439 bool pool_overflow = false;
10440
10441 /* Make sure all splits have been performed; splits after
10442 machine_dependent_reorg might confuse insn length counts. */
10443 split_all_insns_noflow ();
10444
10445 /* Install the main literal pool and the associated base
10446 register load insns.
10447
10448 In addition, there are two problematic situations we need
10449 to correct:
10450
10451 - the literal pool might be > 4096 bytes in size, so that
10452 some of its elements cannot be directly accessed
10453
10454 - a branch target might be > 64K away from the branch, so that
10455 it is not possible to use a PC-relative instruction.
10456
10457 To fix those, we split the single literal pool into multiple
10458 pool chunks, reloading the pool base register at various
10459 points throughout the function to ensure it always points to
10460 the pool chunk the following code expects, and / or replace
10461 PC-relative branches by absolute branches.
10462
10463 However, the two problems are interdependent: splitting the
10464 literal pool can move a branch further away from its target,
10465 causing the 64K limit to overflow, and on the other hand,
10466 replacing a PC-relative branch by an absolute branch means
10467 we need to put the branch target address into the literal
10468 pool, possibly causing it to overflow.
10469
10470 So, we loop trying to fix up both problems until we manage
10471 to satisfy both conditions at the same time. Note that the
10472 loop is guaranteed to terminate as every pass of the loop
10473 strictly decreases the total number of PC-relative branches
10474 in the function. (This is not completely true as there
10475 might be branch-over-pool insns introduced by chunkify_start.
10476 Those never need to be split however.) */
10477
10478 for (;;)
10479 {
10480 struct constant_pool *pool = NULL;
10481
10482 /* Collect the literal pool. */
10483 if (!pool_overflow)
10484 {
10485 pool = s390_mainpool_start ();
10486 if (!pool)
10487 pool_overflow = true;
10488 }
10489
10490 /* If literal pool overflowed, start to chunkify it. */
10491 if (pool_overflow)
10492 pool = s390_chunkify_start ();
10493
10494 /* Split out-of-range branches. If this has created new
10495 literal pool entries, cancel current chunk list and
10496 recompute it. zSeries machines have large branch
10497 instructions, so we never need to split a branch. */
10498 if (!TARGET_CPU_ZARCH && s390_split_branches ())
10499 {
10500 if (pool_overflow)
10501 s390_chunkify_cancel (pool);
10502 else
10503 s390_mainpool_cancel (pool);
10504
10505 continue;
10506 }
10507
10508 /* If we made it up to here, both conditions are satisfied.
10509 Finish up literal pool related changes. */
10510 if (pool_overflow)
10511 s390_chunkify_finish (pool);
10512 else
10513 s390_mainpool_finish (pool);
10514
10515 /* We're done splitting branches. */
10516 cfun->machine->split_branches_pending_p = false;
10517 break;
10518 }
10519
10520 /* Generate out-of-pool execute target insns. */
10521 if (TARGET_CPU_ZARCH)
10522 {
10523 rtx insn, label, target;
10524
10525 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10526 {
10527 label = s390_execute_label (insn);
10528 if (!label)
10529 continue;
10530
10531 gcc_assert (label != const0_rtx);
10532
10533 target = emit_label (XEXP (label, 0));
10534 INSN_ADDRESSES_NEW (target, -1);
10535
10536 target = emit_insn (s390_execute_target (insn));
10537 INSN_ADDRESSES_NEW (target, -1);
10538 }
10539 }
10540
10541 /* Try to optimize prologue and epilogue further. */
10542 s390_optimize_prologue ();
10543
10544 /* Walk over the insns and do some >=z10 specific changes. */
10545 if (s390_tune == PROCESSOR_2097_Z10
10546 || s390_tune == PROCESSOR_2817_Z196
10547 || s390_tune == PROCESSOR_2827_ZEC12)
10548 {
10549 rtx insn;
10550 bool insn_added_p = false;
10551
10552 /* The insn lengths and addresses have to be up to date for the
10553 following manipulations. */
10554 shorten_branches (get_insns ());
10555
10556 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10557 {
10558 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10559 continue;
10560
10561 if (JUMP_P (insn))
10562 insn_added_p |= s390_fix_long_loop_prediction (insn);
10563
10564 if ((GET_CODE (PATTERN (insn)) == PARALLEL
10565 || GET_CODE (PATTERN (insn)) == SET)
10566 && s390_tune == PROCESSOR_2097_Z10)
10567 insn_added_p |= s390_z10_optimize_cmp (insn);
10568 }
10569
10570 /* Adjust branches if we added new instructions. */
10571 if (insn_added_p)
10572 shorten_branches (get_insns ());
10573 }
10574 }
10575
10576 /* Return true if INSN is a fp load insn writing register REGNO. */
10577 static inline bool
10578 s390_fpload_toreg (rtx insn, unsigned int regno)
10579 {
10580 rtx set;
10581 enum attr_type flag = s390_safe_attr_type (insn);
10582
10583 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
10584 return false;
10585
10586 set = single_set (insn);
10587
10588 if (set == NULL_RTX)
10589 return false;
10590
10591 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
10592 return false;
10593
10594 if (REGNO (SET_DEST (set)) != regno)
10595 return false;
10596
10597 return true;
10598 }
10599
10600 /* This value describes the distance to be avoided between an
10601 aritmetic fp instruction and an fp load writing the same register.
10602 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
10603 fine but the exact value has to be avoided. Otherwise the FP
10604 pipeline will throw an exception causing a major penalty. */
10605 #define Z10_EARLYLOAD_DISTANCE 7
10606
10607 /* Rearrange the ready list in order to avoid the situation described
10608 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
10609 moved to the very end of the ready list. */
10610 static void
10611 s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
10612 {
10613 unsigned int regno;
10614 int nready = *nready_p;
10615 rtx tmp;
10616 int i;
10617 rtx insn;
10618 rtx set;
10619 enum attr_type flag;
10620 int distance;
10621
10622 /* Skip DISTANCE - 1 active insns. */
10623 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
10624 distance > 0 && insn != NULL_RTX;
10625 distance--, insn = prev_active_insn (insn))
10626 if (CALL_P (insn) || JUMP_P (insn))
10627 return;
10628
10629 if (insn == NULL_RTX)
10630 return;
10631
10632 set = single_set (insn);
10633
10634 if (set == NULL_RTX || !REG_P (SET_DEST (set))
10635 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
10636 return;
10637
10638 flag = s390_safe_attr_type (insn);
10639
10640 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
10641 return;
10642
10643 regno = REGNO (SET_DEST (set));
10644 i = nready - 1;
10645
10646 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
10647 i--;
10648
10649 if (!i)
10650 return;
10651
10652 tmp = ready[i];
10653 memmove (&ready[1], &ready[0], sizeof (rtx) * i);
10654 ready[0] = tmp;
10655 }
10656
10657
10658 /* The s390_sched_state variable tracks the state of the current or
10659 the last instruction group.
10660
10661 0,1,2 number of instructions scheduled in the current group
10662 3 the last group is complete - normal insns
10663 4 the last group was a cracked/expanded insn */
10664
10665 static int s390_sched_state;
10666
10667 #define S390_OOO_SCHED_STATE_NORMAL 3
10668 #define S390_OOO_SCHED_STATE_CRACKED 4
10669
10670 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
10671 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
10672 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
10673 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
10674
10675 static unsigned int
10676 s390_get_sched_attrmask (rtx insn)
10677 {
10678 unsigned int mask = 0;
10679
10680 if (get_attr_ooo_cracked (insn))
10681 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
10682 if (get_attr_ooo_expanded (insn))
10683 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
10684 if (get_attr_ooo_endgroup (insn))
10685 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
10686 if (get_attr_ooo_groupalone (insn))
10687 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
10688 return mask;
10689 }
10690
10691 /* Return the scheduling score for INSN. The higher the score the
10692 better. The score is calculated from the OOO scheduling attributes
10693 of INSN and the scheduling state s390_sched_state. */
10694 static int
10695 s390_sched_score (rtx insn)
10696 {
10697 unsigned int mask = s390_get_sched_attrmask (insn);
10698 int score = 0;
10699
10700 switch (s390_sched_state)
10701 {
10702 case 0:
10703 /* Try to put insns into the first slot which would otherwise
10704 break a group. */
10705 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10706 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10707 score += 5;
10708 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10709 score += 10;
10710 case 1:
10711 /* Prefer not cracked insns while trying to put together a
10712 group. */
10713 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10714 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10715 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10716 score += 10;
10717 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
10718 score += 5;
10719 break;
10720 case 2:
10721 /* Prefer not cracked insns while trying to put together a
10722 group. */
10723 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10724 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10725 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10726 score += 10;
10727 /* Prefer endgroup insns in the last slot. */
10728 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
10729 score += 10;
10730 break;
10731 case S390_OOO_SCHED_STATE_NORMAL:
10732 /* Prefer not cracked insns if the last was not cracked. */
10733 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10734 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
10735 score += 5;
10736 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10737 score += 10;
10738 break;
10739 case S390_OOO_SCHED_STATE_CRACKED:
10740 /* Try to keep cracked insns together to prevent them from
10741 interrupting groups. */
10742 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10743 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10744 score += 5;
10745 break;
10746 }
10747 return score;
10748 }
10749
10750 /* This function is called via hook TARGET_SCHED_REORDER before
10751 issueing one insn from list READY which contains *NREADYP entries.
10752 For target z10 it reorders load instructions to avoid early load
10753 conflicts in the floating point pipeline */
10754 static int
10755 s390_sched_reorder (FILE *file, int verbose,
10756 rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
10757 {
10758 if (s390_tune == PROCESSOR_2097_Z10)
10759 if (reload_completed && *nreadyp > 1)
10760 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
10761
10762 if (s390_tune == PROCESSOR_2827_ZEC12
10763 && reload_completed
10764 && *nreadyp > 1)
10765 {
10766 int i;
10767 int last_index = *nreadyp - 1;
10768 int max_index = -1;
10769 int max_score = -1;
10770 rtx tmp;
10771
10772 /* Just move the insn with the highest score to the top (the
10773 end) of the list. A full sort is not needed since a conflict
10774 in the hazard recognition cannot happen. So the top insn in
10775 the ready list will always be taken. */
10776 for (i = last_index; i >= 0; i--)
10777 {
10778 int score;
10779
10780 if (recog_memoized (ready[i]) < 0)
10781 continue;
10782
10783 score = s390_sched_score (ready[i]);
10784 if (score > max_score)
10785 {
10786 max_score = score;
10787 max_index = i;
10788 }
10789 }
10790
10791 if (max_index != -1)
10792 {
10793 if (max_index != last_index)
10794 {
10795 tmp = ready[max_index];
10796 ready[max_index] = ready[last_index];
10797 ready[last_index] = tmp;
10798
10799 if (verbose > 5)
10800 fprintf (file,
10801 "move insn %d to the top of list\n",
10802 INSN_UID (ready[last_index]));
10803 }
10804 else if (verbose > 5)
10805 fprintf (file,
10806 "best insn %d already on top\n",
10807 INSN_UID (ready[last_index]));
10808 }
10809
10810 if (verbose > 5)
10811 {
10812 fprintf (file, "ready list ooo attributes - sched state: %d\n",
10813 s390_sched_state);
10814
10815 for (i = last_index; i >= 0; i--)
10816 {
10817 if (recog_memoized (ready[i]) < 0)
10818 continue;
10819 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
10820 s390_sched_score (ready[i]));
10821 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
10822 PRINT_OOO_ATTR (ooo_cracked);
10823 PRINT_OOO_ATTR (ooo_expanded);
10824 PRINT_OOO_ATTR (ooo_endgroup);
10825 PRINT_OOO_ATTR (ooo_groupalone);
10826 #undef PRINT_OOO_ATTR
10827 fprintf (file, "\n");
10828 }
10829 }
10830 }
10831
10832 return s390_issue_rate ();
10833 }
10834
10835
10836 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
10837 the scheduler has issued INSN. It stores the last issued insn into
10838 last_scheduled_insn in order to make it available for
10839 s390_sched_reorder. */
10840 static int
10841 s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
10842 {
10843 last_scheduled_insn = insn;
10844
10845 if (s390_tune == PROCESSOR_2827_ZEC12
10846 && reload_completed
10847 && recog_memoized (insn) >= 0)
10848 {
10849 unsigned int mask = s390_get_sched_attrmask (insn);
10850
10851 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10852 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10853 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
10854 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
10855 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10856 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10857 else
10858 {
10859 /* Only normal insns are left (mask == 0). */
10860 switch (s390_sched_state)
10861 {
10862 case 0:
10863 case 1:
10864 case 2:
10865 case S390_OOO_SCHED_STATE_NORMAL:
10866 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
10867 s390_sched_state = 1;
10868 else
10869 s390_sched_state++;
10870
10871 break;
10872 case S390_OOO_SCHED_STATE_CRACKED:
10873 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10874 break;
10875 }
10876 }
10877 if (verbose > 5)
10878 {
10879 fprintf (file, "insn %d: ", INSN_UID (insn));
10880 #define PRINT_OOO_ATTR(ATTR) \
10881 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
10882 PRINT_OOO_ATTR (ooo_cracked);
10883 PRINT_OOO_ATTR (ooo_expanded);
10884 PRINT_OOO_ATTR (ooo_endgroup);
10885 PRINT_OOO_ATTR (ooo_groupalone);
10886 #undef PRINT_OOO_ATTR
10887 fprintf (file, "\n");
10888 fprintf (file, "sched state: %d\n", s390_sched_state);
10889 }
10890 }
10891
10892 if (GET_CODE (PATTERN (insn)) != USE
10893 && GET_CODE (PATTERN (insn)) != CLOBBER)
10894 return more - 1;
10895 else
10896 return more;
10897 }
10898
10899 static void
10900 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
10901 int verbose ATTRIBUTE_UNUSED,
10902 int max_ready ATTRIBUTE_UNUSED)
10903 {
10904 last_scheduled_insn = NULL_RTX;
10905 s390_sched_state = 0;
10906 }
10907
10908 /* This function checks the whole of insn X for memory references. The
10909 function always returns zero because the framework it is called
10910 from would stop recursively analyzing the insn upon a return value
10911 other than zero. The real result of this function is updating
10912 counter variable MEM_COUNT. */
10913 static int
10914 check_dpu (rtx *x, unsigned *mem_count)
10915 {
10916 if (*x != NULL_RTX && MEM_P (*x))
10917 (*mem_count)++;
10918 return 0;
10919 }
10920
10921 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
10922 a new number struct loop *loop should be unrolled if tuned for cpus with
10923 a built-in stride prefetcher.
10924 The loop is analyzed for memory accesses by calling check_dpu for
10925 each rtx of the loop. Depending on the loop_depth and the amount of
10926 memory accesses a new number <=nunroll is returned to improve the
10927 behaviour of the hardware prefetch unit. */
10928 static unsigned
10929 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
10930 {
10931 basic_block *bbs;
10932 rtx insn;
10933 unsigned i;
10934 unsigned mem_count = 0;
10935
10936 if (s390_tune != PROCESSOR_2097_Z10
10937 && s390_tune != PROCESSOR_2817_Z196
10938 && s390_tune != PROCESSOR_2827_ZEC12)
10939 return nunroll;
10940
10941 /* Count the number of memory references within the loop body. */
10942 bbs = get_loop_body (loop);
10943 for (i = 0; i < loop->num_nodes; i++)
10944 {
10945 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
10946 if (INSN_P (insn) && INSN_CODE (insn) != -1)
10947 for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
10948 }
10949 free (bbs);
10950
10951 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
10952 if (mem_count == 0)
10953 return nunroll;
10954
10955 switch (loop_depth(loop))
10956 {
10957 case 1:
10958 return MIN (nunroll, 28 / mem_count);
10959 case 2:
10960 return MIN (nunroll, 22 / mem_count);
10961 default:
10962 return MIN (nunroll, 16 / mem_count);
10963 }
10964 }
10965
10966 /* Initialize GCC target structure. */
10967
10968 #undef TARGET_ASM_ALIGNED_HI_OP
10969 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10970 #undef TARGET_ASM_ALIGNED_DI_OP
10971 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10972 #undef TARGET_ASM_INTEGER
10973 #define TARGET_ASM_INTEGER s390_assemble_integer
10974
10975 #undef TARGET_ASM_OPEN_PAREN
10976 #define TARGET_ASM_OPEN_PAREN ""
10977
10978 #undef TARGET_ASM_CLOSE_PAREN
10979 #define TARGET_ASM_CLOSE_PAREN ""
10980
10981 #undef TARGET_OPTION_OVERRIDE
10982 #define TARGET_OPTION_OVERRIDE s390_option_override
10983
10984 #undef TARGET_ENCODE_SECTION_INFO
10985 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
10986
10987 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10988 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
10989
10990 #ifdef HAVE_AS_TLS
10991 #undef TARGET_HAVE_TLS
10992 #define TARGET_HAVE_TLS true
10993 #endif
10994 #undef TARGET_CANNOT_FORCE_CONST_MEM
10995 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
10996
10997 #undef TARGET_DELEGITIMIZE_ADDRESS
10998 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
10999
11000 #undef TARGET_LEGITIMIZE_ADDRESS
11001 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
11002
11003 #undef TARGET_RETURN_IN_MEMORY
11004 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
11005
11006 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
11007 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
11008
11009 #undef TARGET_ASM_OUTPUT_MI_THUNK
11010 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
11011 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11012 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11013
11014 #undef TARGET_SCHED_ADJUST_PRIORITY
11015 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
11016 #undef TARGET_SCHED_ISSUE_RATE
11017 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
11018 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11019 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
11020
11021 #undef TARGET_SCHED_VARIABLE_ISSUE
11022 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
11023 #undef TARGET_SCHED_REORDER
11024 #define TARGET_SCHED_REORDER s390_sched_reorder
11025 #undef TARGET_SCHED_INIT
11026 #define TARGET_SCHED_INIT s390_sched_init
11027
11028 #undef TARGET_CANNOT_COPY_INSN_P
11029 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
11030 #undef TARGET_RTX_COSTS
11031 #define TARGET_RTX_COSTS s390_rtx_costs
11032 #undef TARGET_ADDRESS_COST
11033 #define TARGET_ADDRESS_COST s390_address_cost
11034 #undef TARGET_REGISTER_MOVE_COST
11035 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
11036 #undef TARGET_MEMORY_MOVE_COST
11037 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
11038
11039 #undef TARGET_MACHINE_DEPENDENT_REORG
11040 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
11041
11042 #undef TARGET_VALID_POINTER_MODE
11043 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
11044
11045 #undef TARGET_BUILD_BUILTIN_VA_LIST
11046 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
11047 #undef TARGET_EXPAND_BUILTIN_VA_START
11048 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
11049 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11050 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
11051
11052 #undef TARGET_PROMOTE_FUNCTION_MODE
11053 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
11054 #undef TARGET_PASS_BY_REFERENCE
11055 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
11056
11057 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11058 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
11059 #undef TARGET_FUNCTION_ARG
11060 #define TARGET_FUNCTION_ARG s390_function_arg
11061 #undef TARGET_FUNCTION_ARG_ADVANCE
11062 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
11063 #undef TARGET_FUNCTION_VALUE
11064 #define TARGET_FUNCTION_VALUE s390_function_value
11065 #undef TARGET_LIBCALL_VALUE
11066 #define TARGET_LIBCALL_VALUE s390_libcall_value
11067
11068 #undef TARGET_FIXED_CONDITION_CODE_REGS
11069 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
11070
11071 #undef TARGET_CC_MODES_COMPATIBLE
11072 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
11073
11074 #undef TARGET_INVALID_WITHIN_DOLOOP
11075 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
11076
11077 #ifdef HAVE_AS_TLS
11078 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
11079 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
11080 #endif
11081
11082 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11083 #undef TARGET_MANGLE_TYPE
11084 #define TARGET_MANGLE_TYPE s390_mangle_type
11085 #endif
11086
11087 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11088 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
11089
11090 #undef TARGET_PREFERRED_RELOAD_CLASS
11091 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
11092
11093 #undef TARGET_SECONDARY_RELOAD
11094 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
11095
11096 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11097 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
11098
11099 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
11100 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
11101
11102 #undef TARGET_LEGITIMATE_ADDRESS_P
11103 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
11104
11105 #undef TARGET_LEGITIMATE_CONSTANT_P
11106 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
11107
11108 #undef TARGET_CAN_ELIMINATE
11109 #define TARGET_CAN_ELIMINATE s390_can_eliminate
11110
11111 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11112 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
11113
11114 #undef TARGET_LOOP_UNROLL_ADJUST
11115 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
11116
11117 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11118 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
11119 #undef TARGET_TRAMPOLINE_INIT
11120 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
11121
11122 #undef TARGET_UNWIND_WORD_MODE
11123 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
11124
11125 #undef TARGET_CANONICALIZE_COMPARISON
11126 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
11127
11128 struct gcc_target targetm = TARGET_INITIALIZER;
11129
11130 #include "gt-s390.h"