Update copyright years in gcc/
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2013 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "reload.h"
42 #include "diagnostic-core.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "debug.h"
48 #include "langhooks.h"
49 #include "optabs.h"
50 #include "gimple.h"
51 #include "df.h"
52 #include "params.h"
53 #include "cfgloop.h"
54 #include "opts.h"
55
56 /* Define the specific costs for a given cpu. */
57
58 struct processor_costs
59 {
60 /* multiplication */
61 const int m; /* cost of an M instruction. */
62 const int mghi; /* cost of an MGHI instruction. */
63 const int mh; /* cost of an MH instruction. */
64 const int mhi; /* cost of an MHI instruction. */
65 const int ml; /* cost of an ML instruction. */
66 const int mr; /* cost of an MR instruction. */
67 const int ms; /* cost of an MS instruction. */
68 const int msg; /* cost of an MSG instruction. */
69 const int msgf; /* cost of an MSGF instruction. */
70 const int msgfr; /* cost of an MSGFR instruction. */
71 const int msgr; /* cost of an MSGR instruction. */
72 const int msr; /* cost of an MSR instruction. */
73 const int mult_df; /* cost of multiplication in DFmode. */
74 const int mxbr;
75 /* square root */
76 const int sqxbr; /* cost of square root in TFmode. */
77 const int sqdbr; /* cost of square root in DFmode. */
78 const int sqebr; /* cost of square root in SFmode. */
79 /* multiply and add */
80 const int madbr; /* cost of multiply and add in DFmode. */
81 const int maebr; /* cost of multiply and add in SFmode. */
82 /* division */
83 const int dxbr;
84 const int ddbr;
85 const int debr;
86 const int dlgr;
87 const int dlr;
88 const int dr;
89 const int dsgfr;
90 const int dsgr;
91 };
92
93 const struct processor_costs *s390_cost;
94
95 static const
96 struct processor_costs z900_cost =
97 {
98 COSTS_N_INSNS (5), /* M */
99 COSTS_N_INSNS (10), /* MGHI */
100 COSTS_N_INSNS (5), /* MH */
101 COSTS_N_INSNS (4), /* MHI */
102 COSTS_N_INSNS (5), /* ML */
103 COSTS_N_INSNS (5), /* MR */
104 COSTS_N_INSNS (4), /* MS */
105 COSTS_N_INSNS (15), /* MSG */
106 COSTS_N_INSNS (7), /* MSGF */
107 COSTS_N_INSNS (7), /* MSGFR */
108 COSTS_N_INSNS (10), /* MSGR */
109 COSTS_N_INSNS (4), /* MSR */
110 COSTS_N_INSNS (7), /* multiplication in DFmode */
111 COSTS_N_INSNS (13), /* MXBR */
112 COSTS_N_INSNS (136), /* SQXBR */
113 COSTS_N_INSNS (44), /* SQDBR */
114 COSTS_N_INSNS (35), /* SQEBR */
115 COSTS_N_INSNS (18), /* MADBR */
116 COSTS_N_INSNS (13), /* MAEBR */
117 COSTS_N_INSNS (134), /* DXBR */
118 COSTS_N_INSNS (30), /* DDBR */
119 COSTS_N_INSNS (27), /* DEBR */
120 COSTS_N_INSNS (220), /* DLGR */
121 COSTS_N_INSNS (34), /* DLR */
122 COSTS_N_INSNS (34), /* DR */
123 COSTS_N_INSNS (32), /* DSGFR */
124 COSTS_N_INSNS (32), /* DSGR */
125 };
126
127 static const
128 struct processor_costs z990_cost =
129 {
130 COSTS_N_INSNS (4), /* M */
131 COSTS_N_INSNS (2), /* MGHI */
132 COSTS_N_INSNS (2), /* MH */
133 COSTS_N_INSNS (2), /* MHI */
134 COSTS_N_INSNS (4), /* ML */
135 COSTS_N_INSNS (4), /* MR */
136 COSTS_N_INSNS (5), /* MS */
137 COSTS_N_INSNS (6), /* MSG */
138 COSTS_N_INSNS (4), /* MSGF */
139 COSTS_N_INSNS (4), /* MSGFR */
140 COSTS_N_INSNS (4), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (1), /* multiplication in DFmode */
143 COSTS_N_INSNS (28), /* MXBR */
144 COSTS_N_INSNS (130), /* SQXBR */
145 COSTS_N_INSNS (66), /* SQDBR */
146 COSTS_N_INSNS (38), /* SQEBR */
147 COSTS_N_INSNS (1), /* MADBR */
148 COSTS_N_INSNS (1), /* MAEBR */
149 COSTS_N_INSNS (60), /* DXBR */
150 COSTS_N_INSNS (40), /* DDBR */
151 COSTS_N_INSNS (26), /* DEBR */
152 COSTS_N_INSNS (176), /* DLGR */
153 COSTS_N_INSNS (31), /* DLR */
154 COSTS_N_INSNS (31), /* DR */
155 COSTS_N_INSNS (31), /* DSGFR */
156 COSTS_N_INSNS (31), /* DSGR */
157 };
158
159 static const
160 struct processor_costs z9_109_cost =
161 {
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (30), /* DLGR */
185 COSTS_N_INSNS (23), /* DLR */
186 COSTS_N_INSNS (23), /* DR */
187 COSTS_N_INSNS (24), /* DSGFR */
188 COSTS_N_INSNS (24), /* DSGR */
189 };
190
191 static const
192 struct processor_costs z10_cost =
193 {
194 COSTS_N_INSNS (10), /* M */
195 COSTS_N_INSNS (10), /* MGHI */
196 COSTS_N_INSNS (10), /* MH */
197 COSTS_N_INSNS (10), /* MHI */
198 COSTS_N_INSNS (10), /* ML */
199 COSTS_N_INSNS (10), /* MR */
200 COSTS_N_INSNS (10), /* MS */
201 COSTS_N_INSNS (10), /* MSG */
202 COSTS_N_INSNS (10), /* MSGF */
203 COSTS_N_INSNS (10), /* MSGFR */
204 COSTS_N_INSNS (10), /* MSGR */
205 COSTS_N_INSNS (10), /* MSR */
206 COSTS_N_INSNS (1) , /* multiplication in DFmode */
207 COSTS_N_INSNS (50), /* MXBR */
208 COSTS_N_INSNS (120), /* SQXBR */
209 COSTS_N_INSNS (52), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (111), /* DXBR */
214 COSTS_N_INSNS (39), /* DDBR */
215 COSTS_N_INSNS (32), /* DEBR */
216 COSTS_N_INSNS (160), /* DLGR */
217 COSTS_N_INSNS (71), /* DLR */
218 COSTS_N_INSNS (71), /* DR */
219 COSTS_N_INSNS (71), /* DSGFR */
220 COSTS_N_INSNS (71), /* DSGR */
221 };
222
223 static const
224 struct processor_costs z196_cost =
225 {
226 COSTS_N_INSNS (7), /* M */
227 COSTS_N_INSNS (5), /* MGHI */
228 COSTS_N_INSNS (5), /* MH */
229 COSTS_N_INSNS (5), /* MHI */
230 COSTS_N_INSNS (7), /* ML */
231 COSTS_N_INSNS (7), /* MR */
232 COSTS_N_INSNS (6), /* MS */
233 COSTS_N_INSNS (8), /* MSG */
234 COSTS_N_INSNS (6), /* MSGF */
235 COSTS_N_INSNS (6), /* MSGFR */
236 COSTS_N_INSNS (8), /* MSGR */
237 COSTS_N_INSNS (6), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (40), /* MXBR B+40 */
240 COSTS_N_INSNS (100), /* SQXBR B+100 */
241 COSTS_N_INSNS (42), /* SQDBR B+42 */
242 COSTS_N_INSNS (28), /* SQEBR B+28 */
243 COSTS_N_INSNS (1), /* MADBR B */
244 COSTS_N_INSNS (1), /* MAEBR B */
245 COSTS_N_INSNS (101), /* DXBR B+101 */
246 COSTS_N_INSNS (29), /* DDBR */
247 COSTS_N_INSNS (22), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR cracked */
249 COSTS_N_INSNS (160), /* DLR cracked */
250 COSTS_N_INSNS (160), /* DR expanded */
251 COSTS_N_INSNS (160), /* DSGFR cracked */
252 COSTS_N_INSNS (160), /* DSGR cracked */
253 };
254
255 static const
256 struct processor_costs zEC12_cost =
257 {
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (131), /* DXBR B+131 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
285 };
286
287 extern int reload_completed;
288
289 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
290 static rtx last_scheduled_insn;
291
292 /* Structure used to hold the components of a S/390 memory
293 address. A legitimate address on S/390 is of the general
294 form
295 base + index + displacement
296 where any of the components is optional.
297
298 base and index are registers of the class ADDR_REGS,
299 displacement is an unsigned 12-bit immediate constant. */
300
301 struct s390_address
302 {
303 rtx base;
304 rtx indx;
305 rtx disp;
306 bool pointer;
307 bool literal_pool;
308 };
309
310 /* The following structure is embedded in the machine
311 specific part of struct function. */
312
313 struct GTY (()) s390_frame_layout
314 {
315 /* Offset within stack frame. */
316 HOST_WIDE_INT gprs_offset;
317 HOST_WIDE_INT f0_offset;
318 HOST_WIDE_INT f4_offset;
319 HOST_WIDE_INT f8_offset;
320 HOST_WIDE_INT backchain_offset;
321
322 /* Number of first and last gpr where slots in the register
323 save area are reserved for. */
324 int first_save_gpr_slot;
325 int last_save_gpr_slot;
326
327 /* Number of first and last gpr to be saved, restored. */
328 int first_save_gpr;
329 int first_restore_gpr;
330 int last_save_gpr;
331 int last_restore_gpr;
332
333 /* Bits standing for floating point registers. Set, if the
334 respective register has to be saved. Starting with reg 16 (f0)
335 at the rightmost bit.
336 Bit 15 - 8 7 6 5 4 3 2 1 0
337 fpr 15 - 8 7 5 3 1 6 4 2 0
338 reg 31 - 24 23 22 21 20 19 18 17 16 */
339 unsigned int fpr_bitmap;
340
341 /* Number of floating point registers f8-f15 which must be saved. */
342 int high_fprs;
343
344 /* Set if return address needs to be saved.
345 This flag is set by s390_return_addr_rtx if it could not use
346 the initial value of r14 and therefore depends on r14 saved
347 to the stack. */
348 bool save_return_addr_p;
349
350 /* Size of stack frame. */
351 HOST_WIDE_INT frame_size;
352 };
353
354 /* Define the structure for the machine field in struct function. */
355
356 struct GTY(()) machine_function
357 {
358 struct s390_frame_layout frame_layout;
359
360 /* Literal pool base register. */
361 rtx base_reg;
362
363 /* True if we may need to perform branch splitting. */
364 bool split_branches_pending_p;
365
366 /* Some local-dynamic TLS symbol name. */
367 const char *some_ld_name;
368
369 bool has_landing_pad_p;
370 };
371
372 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
373
374 #define cfun_frame_layout (cfun->machine->frame_layout)
375 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
376 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
377 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
378 #define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |= \
379 (1 << (BITNUM)))
380 #define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap & \
381 (1 << (BITNUM))))
382
383 /* Number of GPRs and FPRs used for argument passing. */
384 #define GP_ARG_NUM_REG 5
385 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
386
387 /* A couple of shortcuts. */
388 #define CONST_OK_FOR_J(x) \
389 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
390 #define CONST_OK_FOR_K(x) \
391 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
392 #define CONST_OK_FOR_Os(x) \
393 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
394 #define CONST_OK_FOR_Op(x) \
395 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
396 #define CONST_OK_FOR_On(x) \
397 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
398
399 #define REGNO_PAIR_OK(REGNO, MODE) \
400 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
401
402 /* That's the read ahead of the dynamic branch prediction unit in
403 bytes on a z10 (or higher) CPU. */
404 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
405
406 /* Return the alignment for LABEL. We default to the -falign-labels
407 value except for the literal pool base label. */
408 int
409 s390_label_align (rtx label)
410 {
411 rtx prev_insn = prev_active_insn (label);
412
413 if (prev_insn == NULL_RTX)
414 goto old;
415
416 prev_insn = single_set (prev_insn);
417
418 if (prev_insn == NULL_RTX)
419 goto old;
420
421 prev_insn = SET_SRC (prev_insn);
422
423 /* Don't align literal pool base labels. */
424 if (GET_CODE (prev_insn) == UNSPEC
425 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
426 return 0;
427
428 old:
429 return align_labels_log;
430 }
431
432 static enum machine_mode
433 s390_libgcc_cmp_return_mode (void)
434 {
435 return TARGET_64BIT ? DImode : SImode;
436 }
437
438 static enum machine_mode
439 s390_libgcc_shift_count_mode (void)
440 {
441 return TARGET_64BIT ? DImode : SImode;
442 }
443
444 static enum machine_mode
445 s390_unwind_word_mode (void)
446 {
447 return TARGET_64BIT ? DImode : SImode;
448 }
449
450 /* Return true if the back end supports mode MODE. */
451 static bool
452 s390_scalar_mode_supported_p (enum machine_mode mode)
453 {
454 /* In contrast to the default implementation reject TImode constants on 31bit
455 TARGET_ZARCH for ABI compliance. */
456 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
457 return false;
458
459 if (DECIMAL_FLOAT_MODE_P (mode))
460 return default_decimal_float_supported_p ();
461
462 return default_scalar_mode_supported_p (mode);
463 }
464
465 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
466
467 void
468 s390_set_has_landing_pad_p (bool value)
469 {
470 cfun->machine->has_landing_pad_p = value;
471 }
472
473 /* If two condition code modes are compatible, return a condition code
474 mode which is compatible with both. Otherwise, return
475 VOIDmode. */
476
477 static enum machine_mode
478 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
479 {
480 if (m1 == m2)
481 return m1;
482
483 switch (m1)
484 {
485 case CCZmode:
486 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
487 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
488 return m2;
489 return VOIDmode;
490
491 case CCSmode:
492 case CCUmode:
493 case CCTmode:
494 case CCSRmode:
495 case CCURmode:
496 case CCZ1mode:
497 if (m2 == CCZmode)
498 return m1;
499
500 return VOIDmode;
501
502 default:
503 return VOIDmode;
504 }
505 return VOIDmode;
506 }
507
508 /* Return true if SET either doesn't set the CC register, or else
509 the source and destination have matching CC modes and that
510 CC mode is at least as constrained as REQ_MODE. */
511
512 static bool
513 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
514 {
515 enum machine_mode set_mode;
516
517 gcc_assert (GET_CODE (set) == SET);
518
519 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
520 return 1;
521
522 set_mode = GET_MODE (SET_DEST (set));
523 switch (set_mode)
524 {
525 case CCSmode:
526 case CCSRmode:
527 case CCUmode:
528 case CCURmode:
529 case CCLmode:
530 case CCL1mode:
531 case CCL2mode:
532 case CCL3mode:
533 case CCT1mode:
534 case CCT2mode:
535 case CCT3mode:
536 if (req_mode != set_mode)
537 return 0;
538 break;
539
540 case CCZmode:
541 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
542 && req_mode != CCSRmode && req_mode != CCURmode)
543 return 0;
544 break;
545
546 case CCAPmode:
547 case CCANmode:
548 if (req_mode != CCAmode)
549 return 0;
550 break;
551
552 default:
553 gcc_unreachable ();
554 }
555
556 return (GET_MODE (SET_SRC (set)) == set_mode);
557 }
558
559 /* Return true if every SET in INSN that sets the CC register
560 has source and destination with matching CC modes and that
561 CC mode is at least as constrained as REQ_MODE.
562 If REQ_MODE is VOIDmode, always return false. */
563
564 bool
565 s390_match_ccmode (rtx insn, enum machine_mode req_mode)
566 {
567 int i;
568
569 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
570 if (req_mode == VOIDmode)
571 return false;
572
573 if (GET_CODE (PATTERN (insn)) == SET)
574 return s390_match_ccmode_set (PATTERN (insn), req_mode);
575
576 if (GET_CODE (PATTERN (insn)) == PARALLEL)
577 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
578 {
579 rtx set = XVECEXP (PATTERN (insn), 0, i);
580 if (GET_CODE (set) == SET)
581 if (!s390_match_ccmode_set (set, req_mode))
582 return false;
583 }
584
585 return true;
586 }
587
588 /* If a test-under-mask instruction can be used to implement
589 (compare (and ... OP1) OP2), return the CC mode required
590 to do that. Otherwise, return VOIDmode.
591 MIXED is true if the instruction can distinguish between
592 CC1 and CC2 for mixed selected bits (TMxx), it is false
593 if the instruction cannot (TM). */
594
595 enum machine_mode
596 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
597 {
598 int bit0, bit1;
599
600 /* ??? Fixme: should work on CONST_DOUBLE as well. */
601 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
602 return VOIDmode;
603
604 /* Selected bits all zero: CC0.
605 e.g.: int a; if ((a & (16 + 128)) == 0) */
606 if (INTVAL (op2) == 0)
607 return CCTmode;
608
609 /* Selected bits all one: CC3.
610 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
611 if (INTVAL (op2) == INTVAL (op1))
612 return CCT3mode;
613
614 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
615 int a;
616 if ((a & (16 + 128)) == 16) -> CCT1
617 if ((a & (16 + 128)) == 128) -> CCT2 */
618 if (mixed)
619 {
620 bit1 = exact_log2 (INTVAL (op2));
621 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
622 if (bit0 != -1 && bit1 != -1)
623 return bit0 > bit1 ? CCT1mode : CCT2mode;
624 }
625
626 return VOIDmode;
627 }
628
629 /* Given a comparison code OP (EQ, NE, etc.) and the operands
630 OP0 and OP1 of a COMPARE, return the mode to be used for the
631 comparison. */
632
633 enum machine_mode
634 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
635 {
636 switch (code)
637 {
638 case EQ:
639 case NE:
640 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
641 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
642 return CCAPmode;
643 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
644 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
645 return CCAPmode;
646 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
647 || GET_CODE (op1) == NEG)
648 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
649 return CCLmode;
650
651 if (GET_CODE (op0) == AND)
652 {
653 /* Check whether we can potentially do it via TM. */
654 enum machine_mode ccmode;
655 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
656 if (ccmode != VOIDmode)
657 {
658 /* Relax CCTmode to CCZmode to allow fall-back to AND
659 if that turns out to be beneficial. */
660 return ccmode == CCTmode ? CCZmode : ccmode;
661 }
662 }
663
664 if (register_operand (op0, HImode)
665 && GET_CODE (op1) == CONST_INT
666 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
667 return CCT3mode;
668 if (register_operand (op0, QImode)
669 && GET_CODE (op1) == CONST_INT
670 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
671 return CCT3mode;
672
673 return CCZmode;
674
675 case LE:
676 case LT:
677 case GE:
678 case GT:
679 /* The only overflow condition of NEG and ABS happens when
680 -INT_MAX is used as parameter, which stays negative. So
681 we have an overflow from a positive value to a negative.
682 Using CCAP mode the resulting cc can be used for comparisons. */
683 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
684 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
685 return CCAPmode;
686
687 /* If constants are involved in an add instruction it is possible to use
688 the resulting cc for comparisons with zero. Knowing the sign of the
689 constant the overflow behavior gets predictable. e.g.:
690 int a, b; if ((b = a + c) > 0)
691 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
692 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
693 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
694 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
695 /* Avoid INT32_MIN on 32 bit. */
696 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
697 {
698 if (INTVAL (XEXP((op0), 1)) < 0)
699 return CCANmode;
700 else
701 return CCAPmode;
702 }
703 /* Fall through. */
704 case UNORDERED:
705 case ORDERED:
706 case UNEQ:
707 case UNLE:
708 case UNLT:
709 case UNGE:
710 case UNGT:
711 case LTGT:
712 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
713 && GET_CODE (op1) != CONST_INT)
714 return CCSRmode;
715 return CCSmode;
716
717 case LTU:
718 case GEU:
719 if (GET_CODE (op0) == PLUS
720 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
721 return CCL1mode;
722
723 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
724 && GET_CODE (op1) != CONST_INT)
725 return CCURmode;
726 return CCUmode;
727
728 case LEU:
729 case GTU:
730 if (GET_CODE (op0) == MINUS
731 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
732 return CCL2mode;
733
734 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
735 && GET_CODE (op1) != CONST_INT)
736 return CCURmode;
737 return CCUmode;
738
739 default:
740 gcc_unreachable ();
741 }
742 }
743
744 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
745 that we can implement more efficiently. */
746
747 static void
748 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
749 bool op0_preserve_value)
750 {
751 if (op0_preserve_value)
752 return;
753
754 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
755 if ((*code == EQ || *code == NE)
756 && *op1 == const0_rtx
757 && GET_CODE (*op0) == ZERO_EXTRACT
758 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
759 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
760 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
761 {
762 rtx inner = XEXP (*op0, 0);
763 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
764 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
765 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
766
767 if (len > 0 && len < modesize
768 && pos >= 0 && pos + len <= modesize
769 && modesize <= HOST_BITS_PER_WIDE_INT)
770 {
771 unsigned HOST_WIDE_INT block;
772 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
773 block <<= modesize - pos - len;
774
775 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
776 gen_int_mode (block, GET_MODE (inner)));
777 }
778 }
779
780 /* Narrow AND of memory against immediate to enable TM. */
781 if ((*code == EQ || *code == NE)
782 && *op1 == const0_rtx
783 && GET_CODE (*op0) == AND
784 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
785 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
786 {
787 rtx inner = XEXP (*op0, 0);
788 rtx mask = XEXP (*op0, 1);
789
790 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
791 if (GET_CODE (inner) == SUBREG
792 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
793 && (GET_MODE_SIZE (GET_MODE (inner))
794 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
795 && ((INTVAL (mask)
796 & GET_MODE_MASK (GET_MODE (inner))
797 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
798 == 0))
799 inner = SUBREG_REG (inner);
800
801 /* Do not change volatile MEMs. */
802 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
803 {
804 int part = s390_single_part (XEXP (*op0, 1),
805 GET_MODE (inner), QImode, 0);
806 if (part >= 0)
807 {
808 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
809 inner = adjust_address_nv (inner, QImode, part);
810 *op0 = gen_rtx_AND (QImode, inner, mask);
811 }
812 }
813 }
814
815 /* Narrow comparisons against 0xffff to HImode if possible. */
816 if ((*code == EQ || *code == NE)
817 && GET_CODE (*op1) == CONST_INT
818 && INTVAL (*op1) == 0xffff
819 && SCALAR_INT_MODE_P (GET_MODE (*op0))
820 && (nonzero_bits (*op0, GET_MODE (*op0))
821 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
822 {
823 *op0 = gen_lowpart (HImode, *op0);
824 *op1 = constm1_rtx;
825 }
826
827 /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */
828 if (GET_CODE (*op0) == UNSPEC
829 && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
830 && XVECLEN (*op0, 0) == 1
831 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
832 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
833 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
834 && *op1 == const0_rtx)
835 {
836 enum rtx_code new_code = UNKNOWN;
837 switch (*code)
838 {
839 case EQ: new_code = EQ; break;
840 case NE: new_code = NE; break;
841 case LT: new_code = GTU; break;
842 case GT: new_code = LTU; break;
843 case LE: new_code = GEU; break;
844 case GE: new_code = LEU; break;
845 default: break;
846 }
847
848 if (new_code != UNKNOWN)
849 {
850 *op0 = XVECEXP (*op0, 0, 0);
851 *code = new_code;
852 }
853 }
854
855 /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */
856 if (GET_CODE (*op0) == UNSPEC
857 && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
858 && XVECLEN (*op0, 0) == 1
859 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
860 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
861 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
862 && *op1 == const0_rtx)
863 {
864 enum rtx_code new_code = UNKNOWN;
865 switch (*code)
866 {
867 case EQ: new_code = EQ; break;
868 case NE: new_code = NE; break;
869 default: break;
870 }
871
872 if (new_code != UNKNOWN)
873 {
874 *op0 = XVECEXP (*op0, 0, 0);
875 *code = new_code;
876 }
877 }
878
879 /* Simplify cascaded EQ, NE with const0_rtx. */
880 if ((*code == NE || *code == EQ)
881 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
882 && GET_MODE (*op0) == SImode
883 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
884 && REG_P (XEXP (*op0, 0))
885 && XEXP (*op0, 1) == const0_rtx
886 && *op1 == const0_rtx)
887 {
888 if ((*code == EQ && GET_CODE (*op0) == NE)
889 || (*code == NE && GET_CODE (*op0) == EQ))
890 *code = EQ;
891 else
892 *code = NE;
893 *op0 = XEXP (*op0, 0);
894 }
895
896 /* Prefer register over memory as first operand. */
897 if (MEM_P (*op0) && REG_P (*op1))
898 {
899 rtx tem = *op0; *op0 = *op1; *op1 = tem;
900 *code = (int)swap_condition ((enum rtx_code)*code);
901 }
902 }
903
904 /* Emit a compare instruction suitable to implement the comparison
905 OP0 CODE OP1. Return the correct condition RTL to be placed in
906 the IF_THEN_ELSE of the conditional branch testing the result. */
907
908 rtx
909 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
910 {
911 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
912 rtx cc;
913
914 /* Do not output a redundant compare instruction if a compare_and_swap
915 pattern already computed the result and the machine modes are compatible. */
916 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
917 {
918 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
919 == GET_MODE (op0));
920 cc = op0;
921 }
922 else
923 {
924 cc = gen_rtx_REG (mode, CC_REGNUM);
925 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
926 }
927
928 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
929 }
930
931 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
932 matches CMP.
933 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
934 conditional branch testing the result. */
935
936 static rtx
937 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
938 rtx cmp, rtx new_rtx)
939 {
940 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
941 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
942 const0_rtx);
943 }
944
945 /* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
946 unconditional jump, else a conditional jump under condition COND. */
947
948 void
949 s390_emit_jump (rtx target, rtx cond)
950 {
951 rtx insn;
952
953 target = gen_rtx_LABEL_REF (VOIDmode, target);
954 if (cond)
955 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
956
957 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
958 emit_jump_insn (insn);
959 }
960
961 /* Return branch condition mask to implement a branch
962 specified by CODE. Return -1 for invalid comparisons. */
963
964 int
965 s390_branch_condition_mask (rtx code)
966 {
967 const int CC0 = 1 << 3;
968 const int CC1 = 1 << 2;
969 const int CC2 = 1 << 1;
970 const int CC3 = 1 << 0;
971
972 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
973 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
974 gcc_assert (XEXP (code, 1) == const0_rtx);
975
976 switch (GET_MODE (XEXP (code, 0)))
977 {
978 case CCZmode:
979 case CCZ1mode:
980 switch (GET_CODE (code))
981 {
982 case EQ: return CC0;
983 case NE: return CC1 | CC2 | CC3;
984 default: return -1;
985 }
986 break;
987
988 case CCT1mode:
989 switch (GET_CODE (code))
990 {
991 case EQ: return CC1;
992 case NE: return CC0 | CC2 | CC3;
993 default: return -1;
994 }
995 break;
996
997 case CCT2mode:
998 switch (GET_CODE (code))
999 {
1000 case EQ: return CC2;
1001 case NE: return CC0 | CC1 | CC3;
1002 default: return -1;
1003 }
1004 break;
1005
1006 case CCT3mode:
1007 switch (GET_CODE (code))
1008 {
1009 case EQ: return CC3;
1010 case NE: return CC0 | CC1 | CC2;
1011 default: return -1;
1012 }
1013 break;
1014
1015 case CCLmode:
1016 switch (GET_CODE (code))
1017 {
1018 case EQ: return CC0 | CC2;
1019 case NE: return CC1 | CC3;
1020 default: return -1;
1021 }
1022 break;
1023
1024 case CCL1mode:
1025 switch (GET_CODE (code))
1026 {
1027 case LTU: return CC2 | CC3; /* carry */
1028 case GEU: return CC0 | CC1; /* no carry */
1029 default: return -1;
1030 }
1031 break;
1032
1033 case CCL2mode:
1034 switch (GET_CODE (code))
1035 {
1036 case GTU: return CC0 | CC1; /* borrow */
1037 case LEU: return CC2 | CC3; /* no borrow */
1038 default: return -1;
1039 }
1040 break;
1041
1042 case CCL3mode:
1043 switch (GET_CODE (code))
1044 {
1045 case EQ: return CC0 | CC2;
1046 case NE: return CC1 | CC3;
1047 case LTU: return CC1;
1048 case GTU: return CC3;
1049 case LEU: return CC1 | CC2;
1050 case GEU: return CC2 | CC3;
1051 default: return -1;
1052 }
1053
1054 case CCUmode:
1055 switch (GET_CODE (code))
1056 {
1057 case EQ: return CC0;
1058 case NE: return CC1 | CC2 | CC3;
1059 case LTU: return CC1;
1060 case GTU: return CC2;
1061 case LEU: return CC0 | CC1;
1062 case GEU: return CC0 | CC2;
1063 default: return -1;
1064 }
1065 break;
1066
1067 case CCURmode:
1068 switch (GET_CODE (code))
1069 {
1070 case EQ: return CC0;
1071 case NE: return CC2 | CC1 | CC3;
1072 case LTU: return CC2;
1073 case GTU: return CC1;
1074 case LEU: return CC0 | CC2;
1075 case GEU: return CC0 | CC1;
1076 default: return -1;
1077 }
1078 break;
1079
1080 case CCAPmode:
1081 switch (GET_CODE (code))
1082 {
1083 case EQ: return CC0;
1084 case NE: return CC1 | CC2 | CC3;
1085 case LT: return CC1 | CC3;
1086 case GT: return CC2;
1087 case LE: return CC0 | CC1 | CC3;
1088 case GE: return CC0 | CC2;
1089 default: return -1;
1090 }
1091 break;
1092
1093 case CCANmode:
1094 switch (GET_CODE (code))
1095 {
1096 case EQ: return CC0;
1097 case NE: return CC1 | CC2 | CC3;
1098 case LT: return CC1;
1099 case GT: return CC2 | CC3;
1100 case LE: return CC0 | CC1;
1101 case GE: return CC0 | CC2 | CC3;
1102 default: return -1;
1103 }
1104 break;
1105
1106 case CCSmode:
1107 switch (GET_CODE (code))
1108 {
1109 case EQ: return CC0;
1110 case NE: return CC1 | CC2 | CC3;
1111 case LT: return CC1;
1112 case GT: return CC2;
1113 case LE: return CC0 | CC1;
1114 case GE: return CC0 | CC2;
1115 case UNORDERED: return CC3;
1116 case ORDERED: return CC0 | CC1 | CC2;
1117 case UNEQ: return CC0 | CC3;
1118 case UNLT: return CC1 | CC3;
1119 case UNGT: return CC2 | CC3;
1120 case UNLE: return CC0 | CC1 | CC3;
1121 case UNGE: return CC0 | CC2 | CC3;
1122 case LTGT: return CC1 | CC2;
1123 default: return -1;
1124 }
1125 break;
1126
1127 case CCSRmode:
1128 switch (GET_CODE (code))
1129 {
1130 case EQ: return CC0;
1131 case NE: return CC2 | CC1 | CC3;
1132 case LT: return CC2;
1133 case GT: return CC1;
1134 case LE: return CC0 | CC2;
1135 case GE: return CC0 | CC1;
1136 case UNORDERED: return CC3;
1137 case ORDERED: return CC0 | CC2 | CC1;
1138 case UNEQ: return CC0 | CC3;
1139 case UNLT: return CC2 | CC3;
1140 case UNGT: return CC1 | CC3;
1141 case UNLE: return CC0 | CC2 | CC3;
1142 case UNGE: return CC0 | CC1 | CC3;
1143 case LTGT: return CC2 | CC1;
1144 default: return -1;
1145 }
1146 break;
1147
1148 default:
1149 return -1;
1150 }
1151 }
1152
1153
1154 /* Return branch condition mask to implement a compare and branch
1155 specified by CODE. Return -1 for invalid comparisons. */
1156
1157 int
1158 s390_compare_and_branch_condition_mask (rtx code)
1159 {
1160 const int CC0 = 1 << 3;
1161 const int CC1 = 1 << 2;
1162 const int CC2 = 1 << 1;
1163
1164 switch (GET_CODE (code))
1165 {
1166 case EQ:
1167 return CC0;
1168 case NE:
1169 return CC1 | CC2;
1170 case LT:
1171 case LTU:
1172 return CC1;
1173 case GT:
1174 case GTU:
1175 return CC2;
1176 case LE:
1177 case LEU:
1178 return CC0 | CC1;
1179 case GE:
1180 case GEU:
1181 return CC0 | CC2;
1182 default:
1183 gcc_unreachable ();
1184 }
1185 return -1;
1186 }
1187
1188 /* If INV is false, return assembler mnemonic string to implement
1189 a branch specified by CODE. If INV is true, return mnemonic
1190 for the corresponding inverted branch. */
1191
1192 static const char *
1193 s390_branch_condition_mnemonic (rtx code, int inv)
1194 {
1195 int mask;
1196
1197 static const char *const mnemonic[16] =
1198 {
1199 NULL, "o", "h", "nle",
1200 "l", "nhe", "lh", "ne",
1201 "e", "nlh", "he", "nl",
1202 "le", "nh", "no", NULL
1203 };
1204
1205 if (GET_CODE (XEXP (code, 0)) == REG
1206 && REGNO (XEXP (code, 0)) == CC_REGNUM
1207 && XEXP (code, 1) == const0_rtx)
1208 mask = s390_branch_condition_mask (code);
1209 else
1210 mask = s390_compare_and_branch_condition_mask (code);
1211
1212 gcc_assert (mask >= 0);
1213
1214 if (inv)
1215 mask ^= 15;
1216
1217 gcc_assert (mask >= 1 && mask <= 14);
1218
1219 return mnemonic[mask];
1220 }
1221
1222 /* Return the part of op which has a value different from def.
1223 The size of the part is determined by mode.
1224 Use this function only if you already know that op really
1225 contains such a part. */
1226
1227 unsigned HOST_WIDE_INT
1228 s390_extract_part (rtx op, enum machine_mode mode, int def)
1229 {
1230 unsigned HOST_WIDE_INT value = 0;
1231 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1232 int part_bits = GET_MODE_BITSIZE (mode);
1233 unsigned HOST_WIDE_INT part_mask
1234 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1235 int i;
1236
1237 for (i = 0; i < max_parts; i++)
1238 {
1239 if (i == 0)
1240 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1241 else
1242 value >>= part_bits;
1243
1244 if ((value & part_mask) != (def & part_mask))
1245 return value & part_mask;
1246 }
1247
1248 gcc_unreachable ();
1249 }
1250
1251 /* If OP is an integer constant of mode MODE with exactly one
1252 part of mode PART_MODE unequal to DEF, return the number of that
1253 part. Otherwise, return -1. */
1254
1255 int
1256 s390_single_part (rtx op,
1257 enum machine_mode mode,
1258 enum machine_mode part_mode,
1259 int def)
1260 {
1261 unsigned HOST_WIDE_INT value = 0;
1262 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1263 unsigned HOST_WIDE_INT part_mask
1264 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1265 int i, part = -1;
1266
1267 if (GET_CODE (op) != CONST_INT)
1268 return -1;
1269
1270 for (i = 0; i < n_parts; i++)
1271 {
1272 if (i == 0)
1273 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1274 else
1275 value >>= GET_MODE_BITSIZE (part_mode);
1276
1277 if ((value & part_mask) != (def & part_mask))
1278 {
1279 if (part != -1)
1280 return -1;
1281 else
1282 part = i;
1283 }
1284 }
1285 return part == -1 ? -1 : n_parts - 1 - part;
1286 }
1287
1288 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1289 bits and no other bits are set in IN. POS and LENGTH can be used
1290 to obtain the start position and the length of the bitfield.
1291
1292 POS gives the position of the first bit of the bitfield counting
1293 from the lowest order bit starting with zero. In order to use this
1294 value for S/390 instructions this has to be converted to "bits big
1295 endian" style. */
1296
1297 bool
1298 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1299 int *pos, int *length)
1300 {
1301 int tmp_pos = 0;
1302 int tmp_length = 0;
1303 int i;
1304 unsigned HOST_WIDE_INT mask = 1ULL;
1305 bool contiguous = false;
1306
1307 for (i = 0; i < size; mask <<= 1, i++)
1308 {
1309 if (contiguous)
1310 {
1311 if (mask & in)
1312 tmp_length++;
1313 else
1314 break;
1315 }
1316 else
1317 {
1318 if (mask & in)
1319 {
1320 contiguous = true;
1321 tmp_length++;
1322 }
1323 else
1324 tmp_pos++;
1325 }
1326 }
1327
1328 if (!tmp_length)
1329 return false;
1330
1331 /* Calculate a mask for all bits beyond the contiguous bits. */
1332 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1333
1334 if (mask & in)
1335 return false;
1336
1337 if (tmp_length + tmp_pos - 1 > size)
1338 return false;
1339
1340 if (length)
1341 *length = tmp_length;
1342
1343 if (pos)
1344 *pos = tmp_pos;
1345
1346 return true;
1347 }
1348
1349 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1350 equivalent to a shift followed by the AND. In particular, CONTIG
1351 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1352 for ROTL indicate a rotate to the right. */
1353
1354 bool
1355 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1356 {
1357 int pos, len;
1358 bool ok;
1359
1360 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1361 gcc_assert (ok);
1362
1363 return ((rotl >= 0 && rotl <= pos)
1364 || (rotl < 0 && -rotl <= bitsize - len - pos));
1365 }
1366
1367 /* Check whether we can (and want to) split a double-word
1368 move in mode MODE from SRC to DST into two single-word
1369 moves, moving the subword FIRST_SUBWORD first. */
1370
1371 bool
1372 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1373 {
1374 /* Floating point registers cannot be split. */
1375 if (FP_REG_P (src) || FP_REG_P (dst))
1376 return false;
1377
1378 /* We don't need to split if operands are directly accessible. */
1379 if (s_operand (src, mode) || s_operand (dst, mode))
1380 return false;
1381
1382 /* Non-offsettable memory references cannot be split. */
1383 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1384 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1385 return false;
1386
1387 /* Moving the first subword must not clobber a register
1388 needed to move the second subword. */
1389 if (register_operand (dst, mode))
1390 {
1391 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1392 if (reg_overlap_mentioned_p (subreg, src))
1393 return false;
1394 }
1395
1396 return true;
1397 }
1398
1399 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1400 and [MEM2, MEM2 + SIZE] do overlap and false
1401 otherwise. */
1402
1403 bool
1404 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1405 {
1406 rtx addr1, addr2, addr_delta;
1407 HOST_WIDE_INT delta;
1408
1409 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1410 return true;
1411
1412 if (size == 0)
1413 return false;
1414
1415 addr1 = XEXP (mem1, 0);
1416 addr2 = XEXP (mem2, 0);
1417
1418 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1419
1420 /* This overlapping check is used by peepholes merging memory block operations.
1421 Overlapping operations would otherwise be recognized by the S/390 hardware
1422 and would fall back to a slower implementation. Allowing overlapping
1423 operations would lead to slow code but not to wrong code. Therefore we are
1424 somewhat optimistic if we cannot prove that the memory blocks are
1425 overlapping.
1426 That's why we return false here although this may accept operations on
1427 overlapping memory areas. */
1428 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1429 return false;
1430
1431 delta = INTVAL (addr_delta);
1432
1433 if (delta == 0
1434 || (delta > 0 && delta < size)
1435 || (delta < 0 && -delta < size))
1436 return true;
1437
1438 return false;
1439 }
1440
1441 /* Check whether the address of memory reference MEM2 equals exactly
1442 the address of memory reference MEM1 plus DELTA. Return true if
1443 we can prove this to be the case, false otherwise. */
1444
1445 bool
1446 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1447 {
1448 rtx addr1, addr2, addr_delta;
1449
1450 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1451 return false;
1452
1453 addr1 = XEXP (mem1, 0);
1454 addr2 = XEXP (mem2, 0);
1455
1456 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1457 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1458 return false;
1459
1460 return true;
1461 }
1462
1463 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1464
1465 void
1466 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1467 rtx *operands)
1468 {
1469 enum machine_mode wmode = mode;
1470 rtx dst = operands[0];
1471 rtx src1 = operands[1];
1472 rtx src2 = operands[2];
1473 rtx op, clob, tem;
1474
1475 /* If we cannot handle the operation directly, use a temp register. */
1476 if (!s390_logical_operator_ok_p (operands))
1477 dst = gen_reg_rtx (mode);
1478
1479 /* QImode and HImode patterns make sense only if we have a destination
1480 in memory. Otherwise perform the operation in SImode. */
1481 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1482 wmode = SImode;
1483
1484 /* Widen operands if required. */
1485 if (mode != wmode)
1486 {
1487 if (GET_CODE (dst) == SUBREG
1488 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1489 dst = tem;
1490 else if (REG_P (dst))
1491 dst = gen_rtx_SUBREG (wmode, dst, 0);
1492 else
1493 dst = gen_reg_rtx (wmode);
1494
1495 if (GET_CODE (src1) == SUBREG
1496 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1497 src1 = tem;
1498 else if (GET_MODE (src1) != VOIDmode)
1499 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1500
1501 if (GET_CODE (src2) == SUBREG
1502 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1503 src2 = tem;
1504 else if (GET_MODE (src2) != VOIDmode)
1505 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1506 }
1507
1508 /* Emit the instruction. */
1509 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1510 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1511 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1512
1513 /* Fix up the destination if needed. */
1514 if (dst != operands[0])
1515 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1516 }
1517
1518 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1519
1520 bool
1521 s390_logical_operator_ok_p (rtx *operands)
1522 {
1523 /* If the destination operand is in memory, it needs to coincide
1524 with one of the source operands. After reload, it has to be
1525 the first source operand. */
1526 if (GET_CODE (operands[0]) == MEM)
1527 return rtx_equal_p (operands[0], operands[1])
1528 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1529
1530 return true;
1531 }
1532
1533 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1534 operand IMMOP to switch from SS to SI type instructions. */
1535
1536 void
1537 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1538 {
1539 int def = code == AND ? -1 : 0;
1540 HOST_WIDE_INT mask;
1541 int part;
1542
1543 gcc_assert (GET_CODE (*memop) == MEM);
1544 gcc_assert (!MEM_VOLATILE_P (*memop));
1545
1546 mask = s390_extract_part (*immop, QImode, def);
1547 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1548 gcc_assert (part >= 0);
1549
1550 *memop = adjust_address (*memop, QImode, part);
1551 *immop = gen_int_mode (mask, QImode);
1552 }
1553
1554
1555 /* How to allocate a 'struct machine_function'. */
1556
1557 static struct machine_function *
1558 s390_init_machine_status (void)
1559 {
1560 return ggc_alloc_cleared_machine_function ();
1561 }
1562
1563 static void
1564 s390_option_override (void)
1565 {
1566 /* Set up function hooks. */
1567 init_machine_status = s390_init_machine_status;
1568
1569 /* Architecture mode defaults according to ABI. */
1570 if (!(target_flags_explicit & MASK_ZARCH))
1571 {
1572 if (TARGET_64BIT)
1573 target_flags |= MASK_ZARCH;
1574 else
1575 target_flags &= ~MASK_ZARCH;
1576 }
1577
1578 /* Set the march default in case it hasn't been specified on
1579 cmdline. */
1580 if (s390_arch == PROCESSOR_max)
1581 {
1582 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
1583 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
1584 s390_arch_flags = processor_flags_table[(int)s390_arch];
1585 }
1586
1587 /* Determine processor to tune for. */
1588 if (s390_tune == PROCESSOR_max)
1589 {
1590 s390_tune = s390_arch;
1591 s390_tune_flags = s390_arch_flags;
1592 }
1593
1594 /* Sanity checks. */
1595 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
1596 error ("z/Architecture mode not supported on %s", s390_arch_string);
1597 if (TARGET_64BIT && !TARGET_ZARCH)
1598 error ("64-bit ABI not supported in ESA/390 mode");
1599
1600 /* Use hardware DFP if available and not explicitly disabled by
1601 user. E.g. with -m31 -march=z10 -mzarch */
1602 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
1603 target_flags |= MASK_HARD_DFP;
1604
1605 if (TARGET_HARD_DFP && !TARGET_DFP)
1606 {
1607 if (target_flags_explicit & MASK_HARD_DFP)
1608 {
1609 if (!TARGET_CPU_DFP)
1610 error ("hardware decimal floating point instructions"
1611 " not available on %s", s390_arch_string);
1612 if (!TARGET_ZARCH)
1613 error ("hardware decimal floating point instructions"
1614 " not available in ESA/390 mode");
1615 }
1616 else
1617 target_flags &= ~MASK_HARD_DFP;
1618 }
1619
1620 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
1621 {
1622 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
1623 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
1624
1625 target_flags &= ~MASK_HARD_DFP;
1626 }
1627
1628 /* Set processor cost function. */
1629 switch (s390_tune)
1630 {
1631 case PROCESSOR_2084_Z990:
1632 s390_cost = &z990_cost;
1633 break;
1634 case PROCESSOR_2094_Z9_109:
1635 s390_cost = &z9_109_cost;
1636 break;
1637 case PROCESSOR_2097_Z10:
1638 s390_cost = &z10_cost;
1639 break;
1640 case PROCESSOR_2817_Z196:
1641 s390_cost = &z196_cost;
1642 break;
1643 case PROCESSOR_2827_ZEC12:
1644 s390_cost = &zEC12_cost;
1645 break;
1646 default:
1647 s390_cost = &z900_cost;
1648 }
1649
1650 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
1651 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
1652 "in combination");
1653
1654 if (s390_stack_size)
1655 {
1656 if (s390_stack_guard >= s390_stack_size)
1657 error ("stack size must be greater than the stack guard value");
1658 else if (s390_stack_size > 1 << 16)
1659 error ("stack size must not be greater than 64k");
1660 }
1661 else if (s390_stack_guard)
1662 error ("-mstack-guard implies use of -mstack-size");
1663
1664 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1665 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1666 target_flags |= MASK_LONG_DOUBLE_128;
1667 #endif
1668
1669 if (s390_tune == PROCESSOR_2097_Z10
1670 || s390_tune == PROCESSOR_2817_Z196
1671 || s390_tune == PROCESSOR_2827_ZEC12)
1672 {
1673 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
1674 global_options.x_param_values,
1675 global_options_set.x_param_values);
1676 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
1677 global_options.x_param_values,
1678 global_options_set.x_param_values);
1679 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
1680 global_options.x_param_values,
1681 global_options_set.x_param_values);
1682 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
1683 global_options.x_param_values,
1684 global_options_set.x_param_values);
1685 }
1686
1687 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
1688 global_options.x_param_values,
1689 global_options_set.x_param_values);
1690 /* values for loop prefetching */
1691 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
1692 global_options.x_param_values,
1693 global_options_set.x_param_values);
1694 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
1695 global_options.x_param_values,
1696 global_options_set.x_param_values);
1697 /* s390 has more than 2 levels and the size is much larger. Since
1698 we are always running virtualized assume that we only get a small
1699 part of the caches above l1. */
1700 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
1701 global_options.x_param_values,
1702 global_options_set.x_param_values);
1703 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
1704 global_options.x_param_values,
1705 global_options_set.x_param_values);
1706 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
1707 global_options.x_param_values,
1708 global_options_set.x_param_values);
1709
1710 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
1711 requires the arch flags to be evaluated already. Since prefetching
1712 is beneficial on s390, we enable it if available. */
1713 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
1714 flag_prefetch_loop_arrays = 1;
1715
1716 /* Use the alternative scheduling-pressure algorithm by default. */
1717 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
1718 global_options.x_param_values,
1719 global_options_set.x_param_values);
1720
1721 if (TARGET_TPF)
1722 {
1723 /* Don't emit DWARF3/4 unless specifically selected. The TPF
1724 debuggers do not yet support DWARF 3/4. */
1725 if (!global_options_set.x_dwarf_strict)
1726 dwarf_strict = 1;
1727 if (!global_options_set.x_dwarf_version)
1728 dwarf_version = 2;
1729 }
1730 }
1731
1732 /* Map for smallest class containing reg regno. */
1733
1734 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1735 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1736 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1737 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1738 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1739 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1740 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1741 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1742 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1743 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1744 ACCESS_REGS, ACCESS_REGS
1745 };
1746
1747 /* Return attribute type of insn. */
1748
1749 static enum attr_type
1750 s390_safe_attr_type (rtx insn)
1751 {
1752 if (recog_memoized (insn) >= 0)
1753 return get_attr_type (insn);
1754 else
1755 return TYPE_NONE;
1756 }
1757
1758 /* Return true if DISP is a valid short displacement. */
1759
1760 static bool
1761 s390_short_displacement (rtx disp)
1762 {
1763 /* No displacement is OK. */
1764 if (!disp)
1765 return true;
1766
1767 /* Without the long displacement facility we don't need to
1768 distingiush between long and short displacement. */
1769 if (!TARGET_LONG_DISPLACEMENT)
1770 return true;
1771
1772 /* Integer displacement in range. */
1773 if (GET_CODE (disp) == CONST_INT)
1774 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1775
1776 /* GOT offset is not OK, the GOT can be large. */
1777 if (GET_CODE (disp) == CONST
1778 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1779 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1780 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1781 return false;
1782
1783 /* All other symbolic constants are literal pool references,
1784 which are OK as the literal pool must be small. */
1785 if (GET_CODE (disp) == CONST)
1786 return true;
1787
1788 return false;
1789 }
1790
1791 /* Decompose a RTL expression ADDR for a memory address into
1792 its components, returned in OUT.
1793
1794 Returns false if ADDR is not a valid memory address, true
1795 otherwise. If OUT is NULL, don't return the components,
1796 but check for validity only.
1797
1798 Note: Only addresses in canonical form are recognized.
1799 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1800 canonical form so that they will be recognized. */
1801
1802 static int
1803 s390_decompose_address (rtx addr, struct s390_address *out)
1804 {
1805 HOST_WIDE_INT offset = 0;
1806 rtx base = NULL_RTX;
1807 rtx indx = NULL_RTX;
1808 rtx disp = NULL_RTX;
1809 rtx orig_disp;
1810 bool pointer = false;
1811 bool base_ptr = false;
1812 bool indx_ptr = false;
1813 bool literal_pool = false;
1814
1815 /* We may need to substitute the literal pool base register into the address
1816 below. However, at this point we do not know which register is going to
1817 be used as base, so we substitute the arg pointer register. This is going
1818 to be treated as holding a pointer below -- it shouldn't be used for any
1819 other purpose. */
1820 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1821
1822 /* Decompose address into base + index + displacement. */
1823
1824 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1825 base = addr;
1826
1827 else if (GET_CODE (addr) == PLUS)
1828 {
1829 rtx op0 = XEXP (addr, 0);
1830 rtx op1 = XEXP (addr, 1);
1831 enum rtx_code code0 = GET_CODE (op0);
1832 enum rtx_code code1 = GET_CODE (op1);
1833
1834 if (code0 == REG || code0 == UNSPEC)
1835 {
1836 if (code1 == REG || code1 == UNSPEC)
1837 {
1838 indx = op0; /* index + base */
1839 base = op1;
1840 }
1841
1842 else
1843 {
1844 base = op0; /* base + displacement */
1845 disp = op1;
1846 }
1847 }
1848
1849 else if (code0 == PLUS)
1850 {
1851 indx = XEXP (op0, 0); /* index + base + disp */
1852 base = XEXP (op0, 1);
1853 disp = op1;
1854 }
1855
1856 else
1857 {
1858 return false;
1859 }
1860 }
1861
1862 else
1863 disp = addr; /* displacement */
1864
1865 /* Extract integer part of displacement. */
1866 orig_disp = disp;
1867 if (disp)
1868 {
1869 if (GET_CODE (disp) == CONST_INT)
1870 {
1871 offset = INTVAL (disp);
1872 disp = NULL_RTX;
1873 }
1874 else if (GET_CODE (disp) == CONST
1875 && GET_CODE (XEXP (disp, 0)) == PLUS
1876 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1877 {
1878 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1879 disp = XEXP (XEXP (disp, 0), 0);
1880 }
1881 }
1882
1883 /* Strip off CONST here to avoid special case tests later. */
1884 if (disp && GET_CODE (disp) == CONST)
1885 disp = XEXP (disp, 0);
1886
1887 /* We can convert literal pool addresses to
1888 displacements by basing them off the base register. */
1889 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1890 {
1891 /* Either base or index must be free to hold the base register. */
1892 if (!base)
1893 base = fake_pool_base, literal_pool = true;
1894 else if (!indx)
1895 indx = fake_pool_base, literal_pool = true;
1896 else
1897 return false;
1898
1899 /* Mark up the displacement. */
1900 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1901 UNSPEC_LTREL_OFFSET);
1902 }
1903
1904 /* Validate base register. */
1905 if (base)
1906 {
1907 if (GET_CODE (base) == UNSPEC)
1908 switch (XINT (base, 1))
1909 {
1910 case UNSPEC_LTREF:
1911 if (!disp)
1912 disp = gen_rtx_UNSPEC (Pmode,
1913 gen_rtvec (1, XVECEXP (base, 0, 0)),
1914 UNSPEC_LTREL_OFFSET);
1915 else
1916 return false;
1917
1918 base = XVECEXP (base, 0, 1);
1919 break;
1920
1921 case UNSPEC_LTREL_BASE:
1922 if (XVECLEN (base, 0) == 1)
1923 base = fake_pool_base, literal_pool = true;
1924 else
1925 base = XVECEXP (base, 0, 1);
1926 break;
1927
1928 default:
1929 return false;
1930 }
1931
1932 if (!REG_P (base)
1933 || (GET_MODE (base) != SImode
1934 && GET_MODE (base) != Pmode))
1935 return false;
1936
1937 if (REGNO (base) == STACK_POINTER_REGNUM
1938 || REGNO (base) == FRAME_POINTER_REGNUM
1939 || ((reload_completed || reload_in_progress)
1940 && frame_pointer_needed
1941 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1942 || REGNO (base) == ARG_POINTER_REGNUM
1943 || (flag_pic
1944 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1945 pointer = base_ptr = true;
1946
1947 if ((reload_completed || reload_in_progress)
1948 && base == cfun->machine->base_reg)
1949 pointer = base_ptr = literal_pool = true;
1950 }
1951
1952 /* Validate index register. */
1953 if (indx)
1954 {
1955 if (GET_CODE (indx) == UNSPEC)
1956 switch (XINT (indx, 1))
1957 {
1958 case UNSPEC_LTREF:
1959 if (!disp)
1960 disp = gen_rtx_UNSPEC (Pmode,
1961 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1962 UNSPEC_LTREL_OFFSET);
1963 else
1964 return false;
1965
1966 indx = XVECEXP (indx, 0, 1);
1967 break;
1968
1969 case UNSPEC_LTREL_BASE:
1970 if (XVECLEN (indx, 0) == 1)
1971 indx = fake_pool_base, literal_pool = true;
1972 else
1973 indx = XVECEXP (indx, 0, 1);
1974 break;
1975
1976 default:
1977 return false;
1978 }
1979
1980 if (!REG_P (indx)
1981 || (GET_MODE (indx) != SImode
1982 && GET_MODE (indx) != Pmode))
1983 return false;
1984
1985 if (REGNO (indx) == STACK_POINTER_REGNUM
1986 || REGNO (indx) == FRAME_POINTER_REGNUM
1987 || ((reload_completed || reload_in_progress)
1988 && frame_pointer_needed
1989 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1990 || REGNO (indx) == ARG_POINTER_REGNUM
1991 || (flag_pic
1992 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1993 pointer = indx_ptr = true;
1994
1995 if ((reload_completed || reload_in_progress)
1996 && indx == cfun->machine->base_reg)
1997 pointer = indx_ptr = literal_pool = true;
1998 }
1999
2000 /* Prefer to use pointer as base, not index. */
2001 if (base && indx && !base_ptr
2002 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2003 {
2004 rtx tmp = base;
2005 base = indx;
2006 indx = tmp;
2007 }
2008
2009 /* Validate displacement. */
2010 if (!disp)
2011 {
2012 /* If virtual registers are involved, the displacement will change later
2013 anyway as the virtual registers get eliminated. This could make a
2014 valid displacement invalid, but it is more likely to make an invalid
2015 displacement valid, because we sometimes access the register save area
2016 via negative offsets to one of those registers.
2017 Thus we don't check the displacement for validity here. If after
2018 elimination the displacement turns out to be invalid after all,
2019 this is fixed up by reload in any case. */
2020 if (base != arg_pointer_rtx
2021 && indx != arg_pointer_rtx
2022 && base != return_address_pointer_rtx
2023 && indx != return_address_pointer_rtx
2024 && base != frame_pointer_rtx
2025 && indx != frame_pointer_rtx
2026 && base != virtual_stack_vars_rtx
2027 && indx != virtual_stack_vars_rtx)
2028 if (!DISP_IN_RANGE (offset))
2029 return false;
2030 }
2031 else
2032 {
2033 /* All the special cases are pointers. */
2034 pointer = true;
2035
2036 /* In the small-PIC case, the linker converts @GOT
2037 and @GOTNTPOFF offsets to possible displacements. */
2038 if (GET_CODE (disp) == UNSPEC
2039 && (XINT (disp, 1) == UNSPEC_GOT
2040 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2041 && flag_pic == 1)
2042 {
2043 ;
2044 }
2045
2046 /* Accept pool label offsets. */
2047 else if (GET_CODE (disp) == UNSPEC
2048 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2049 ;
2050
2051 /* Accept literal pool references. */
2052 else if (GET_CODE (disp) == UNSPEC
2053 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2054 {
2055 /* In case CSE pulled a non literal pool reference out of
2056 the pool we have to reject the address. This is
2057 especially important when loading the GOT pointer on non
2058 zarch CPUs. In this case the literal pool contains an lt
2059 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2060 will most likely exceed the displacement. */
2061 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2062 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2063 return false;
2064
2065 orig_disp = gen_rtx_CONST (Pmode, disp);
2066 if (offset)
2067 {
2068 /* If we have an offset, make sure it does not
2069 exceed the size of the constant pool entry. */
2070 rtx sym = XVECEXP (disp, 0, 0);
2071 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2072 return false;
2073
2074 orig_disp = plus_constant (Pmode, orig_disp, offset);
2075 }
2076 }
2077
2078 else
2079 return false;
2080 }
2081
2082 if (!base && !indx)
2083 pointer = true;
2084
2085 if (out)
2086 {
2087 out->base = base;
2088 out->indx = indx;
2089 out->disp = orig_disp;
2090 out->pointer = pointer;
2091 out->literal_pool = literal_pool;
2092 }
2093
2094 return true;
2095 }
2096
2097 /* Decompose a RTL expression OP for a shift count into its components,
2098 and return the base register in BASE and the offset in OFFSET.
2099
2100 Return true if OP is a valid shift count, false if not. */
2101
2102 bool
2103 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2104 {
2105 HOST_WIDE_INT off = 0;
2106
2107 /* We can have an integer constant, an address register,
2108 or a sum of the two. */
2109 if (GET_CODE (op) == CONST_INT)
2110 {
2111 off = INTVAL (op);
2112 op = NULL_RTX;
2113 }
2114 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2115 {
2116 off = INTVAL (XEXP (op, 1));
2117 op = XEXP (op, 0);
2118 }
2119 while (op && GET_CODE (op) == SUBREG)
2120 op = SUBREG_REG (op);
2121
2122 if (op && GET_CODE (op) != REG)
2123 return false;
2124
2125 if (offset)
2126 *offset = off;
2127 if (base)
2128 *base = op;
2129
2130 return true;
2131 }
2132
2133
2134 /* Return true if CODE is a valid address without index. */
2135
2136 bool
2137 s390_legitimate_address_without_index_p (rtx op)
2138 {
2139 struct s390_address addr;
2140
2141 if (!s390_decompose_address (XEXP (op, 0), &addr))
2142 return false;
2143 if (addr.indx)
2144 return false;
2145
2146 return true;
2147 }
2148
2149
2150 /* Return TRUE if ADDR is an operand valid for a load/store relative
2151 instruction. Be aware that the alignment of the operand needs to
2152 be checked separately.
2153 Valid addresses are single references or a sum of a reference and a
2154 constant integer. Return these parts in SYMREF and ADDEND. You can
2155 pass NULL in REF and/or ADDEND if you are not interested in these
2156 values. Literal pool references are *not* considered symbol
2157 references. */
2158
2159 static bool
2160 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2161 {
2162 HOST_WIDE_INT tmpaddend = 0;
2163
2164 if (GET_CODE (addr) == CONST)
2165 addr = XEXP (addr, 0);
2166
2167 if (GET_CODE (addr) == PLUS)
2168 {
2169 if (!CONST_INT_P (XEXP (addr, 1)))
2170 return false;
2171
2172 tmpaddend = INTVAL (XEXP (addr, 1));
2173 addr = XEXP (addr, 0);
2174 }
2175
2176 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2177 || (GET_CODE (addr) == UNSPEC
2178 && (XINT (addr, 1) == UNSPEC_GOTENT
2179 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2180 {
2181 if (symref)
2182 *symref = addr;
2183 if (addend)
2184 *addend = tmpaddend;
2185
2186 return true;
2187 }
2188 return false;
2189 }
2190
2191 /* Return true if the address in OP is valid for constraint letter C
2192 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2193 pool MEMs should be accepted. Only the Q, R, S, T constraint
2194 letters are allowed for C. */
2195
2196 static int
2197 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2198 {
2199 struct s390_address addr;
2200 bool decomposed = false;
2201
2202 /* This check makes sure that no symbolic address (except literal
2203 pool references) are accepted by the R or T constraints. */
2204 if (s390_loadrelative_operand_p (op, NULL, NULL))
2205 return 0;
2206
2207 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2208 if (!lit_pool_ok)
2209 {
2210 if (!s390_decompose_address (op, &addr))
2211 return 0;
2212 if (addr.literal_pool)
2213 return 0;
2214 decomposed = true;
2215 }
2216
2217 switch (c)
2218 {
2219 case 'Q': /* no index short displacement */
2220 if (!decomposed && !s390_decompose_address (op, &addr))
2221 return 0;
2222 if (addr.indx)
2223 return 0;
2224 if (!s390_short_displacement (addr.disp))
2225 return 0;
2226 break;
2227
2228 case 'R': /* with index short displacement */
2229 if (TARGET_LONG_DISPLACEMENT)
2230 {
2231 if (!decomposed && !s390_decompose_address (op, &addr))
2232 return 0;
2233 if (!s390_short_displacement (addr.disp))
2234 return 0;
2235 }
2236 /* Any invalid address here will be fixed up by reload,
2237 so accept it for the most generic constraint. */
2238 break;
2239
2240 case 'S': /* no index long displacement */
2241 if (!TARGET_LONG_DISPLACEMENT)
2242 return 0;
2243 if (!decomposed && !s390_decompose_address (op, &addr))
2244 return 0;
2245 if (addr.indx)
2246 return 0;
2247 if (s390_short_displacement (addr.disp))
2248 return 0;
2249 break;
2250
2251 case 'T': /* with index long displacement */
2252 if (!TARGET_LONG_DISPLACEMENT)
2253 return 0;
2254 /* Any invalid address here will be fixed up by reload,
2255 so accept it for the most generic constraint. */
2256 if ((decomposed || s390_decompose_address (op, &addr))
2257 && s390_short_displacement (addr.disp))
2258 return 0;
2259 break;
2260 default:
2261 return 0;
2262 }
2263 return 1;
2264 }
2265
2266
2267 /* Evaluates constraint strings described by the regular expression
2268 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2269 the constraint given in STR, or 0 else. */
2270
2271 int
2272 s390_mem_constraint (const char *str, rtx op)
2273 {
2274 char c = str[0];
2275
2276 switch (c)
2277 {
2278 case 'A':
2279 /* Check for offsettable variants of memory constraints. */
2280 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2281 return 0;
2282 if ((reload_completed || reload_in_progress)
2283 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2284 return 0;
2285 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2286 case 'B':
2287 /* Check for non-literal-pool variants of memory constraints. */
2288 if (!MEM_P (op))
2289 return 0;
2290 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2291 case 'Q':
2292 case 'R':
2293 case 'S':
2294 case 'T':
2295 if (GET_CODE (op) != MEM)
2296 return 0;
2297 return s390_check_qrst_address (c, XEXP (op, 0), true);
2298 case 'U':
2299 return (s390_check_qrst_address ('Q', op, true)
2300 || s390_check_qrst_address ('R', op, true));
2301 case 'W':
2302 return (s390_check_qrst_address ('S', op, true)
2303 || s390_check_qrst_address ('T', op, true));
2304 case 'Y':
2305 /* Simply check for the basic form of a shift count. Reload will
2306 take care of making sure we have a proper base register. */
2307 if (!s390_decompose_shift_count (op, NULL, NULL))
2308 return 0;
2309 break;
2310 case 'Z':
2311 return s390_check_qrst_address (str[1], op, true);
2312 default:
2313 return 0;
2314 }
2315 return 1;
2316 }
2317
2318
2319 /* Evaluates constraint strings starting with letter O. Input
2320 parameter C is the second letter following the "O" in the constraint
2321 string. Returns 1 if VALUE meets the respective constraint and 0
2322 otherwise. */
2323
2324 int
2325 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2326 {
2327 if (!TARGET_EXTIMM)
2328 return 0;
2329
2330 switch (c)
2331 {
2332 case 's':
2333 return trunc_int_for_mode (value, SImode) == value;
2334
2335 case 'p':
2336 return value == 0
2337 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2338
2339 case 'n':
2340 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2341
2342 default:
2343 gcc_unreachable ();
2344 }
2345 }
2346
2347
2348 /* Evaluates constraint strings starting with letter N. Parameter STR
2349 contains the letters following letter "N" in the constraint string.
2350 Returns true if VALUE matches the constraint. */
2351
2352 int
2353 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2354 {
2355 enum machine_mode mode, part_mode;
2356 int def;
2357 int part, part_goal;
2358
2359
2360 if (str[0] == 'x')
2361 part_goal = -1;
2362 else
2363 part_goal = str[0] - '0';
2364
2365 switch (str[1])
2366 {
2367 case 'Q':
2368 part_mode = QImode;
2369 break;
2370 case 'H':
2371 part_mode = HImode;
2372 break;
2373 case 'S':
2374 part_mode = SImode;
2375 break;
2376 default:
2377 return 0;
2378 }
2379
2380 switch (str[2])
2381 {
2382 case 'H':
2383 mode = HImode;
2384 break;
2385 case 'S':
2386 mode = SImode;
2387 break;
2388 case 'D':
2389 mode = DImode;
2390 break;
2391 default:
2392 return 0;
2393 }
2394
2395 switch (str[3])
2396 {
2397 case '0':
2398 def = 0;
2399 break;
2400 case 'F':
2401 def = -1;
2402 break;
2403 default:
2404 return 0;
2405 }
2406
2407 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2408 return 0;
2409
2410 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2411 if (part < 0)
2412 return 0;
2413 if (part_goal != -1 && part_goal != part)
2414 return 0;
2415
2416 return 1;
2417 }
2418
2419
2420 /* Returns true if the input parameter VALUE is a float zero. */
2421
2422 int
2423 s390_float_const_zero_p (rtx value)
2424 {
2425 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2426 && value == CONST0_RTX (GET_MODE (value)));
2427 }
2428
2429 /* Implement TARGET_REGISTER_MOVE_COST. */
2430
2431 static int
2432 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2433 reg_class_t from, reg_class_t to)
2434 {
2435 /* On s390, copy between fprs and gprs is expensive. */
2436 if ((reg_classes_intersect_p (from, GENERAL_REGS)
2437 && reg_classes_intersect_p (to, FP_REGS))
2438 || (reg_classes_intersect_p (from, FP_REGS)
2439 && reg_classes_intersect_p (to, GENERAL_REGS)))
2440 return 10;
2441
2442 return 1;
2443 }
2444
2445 /* Implement TARGET_MEMORY_MOVE_COST. */
2446
2447 static int
2448 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2449 reg_class_t rclass ATTRIBUTE_UNUSED,
2450 bool in ATTRIBUTE_UNUSED)
2451 {
2452 return 1;
2453 }
2454
2455 /* Compute a (partial) cost for rtx X. Return true if the complete
2456 cost has been computed, and false if subexpressions should be
2457 scanned. In either case, *TOTAL contains the cost result.
2458 CODE contains GET_CODE (x), OUTER_CODE contains the code
2459 of the superexpression of x. */
2460
2461 static bool
2462 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2463 int *total, bool speed ATTRIBUTE_UNUSED)
2464 {
2465 switch (code)
2466 {
2467 case CONST:
2468 case CONST_INT:
2469 case LABEL_REF:
2470 case SYMBOL_REF:
2471 case CONST_DOUBLE:
2472 case MEM:
2473 *total = 0;
2474 return true;
2475
2476 case ASHIFT:
2477 case ASHIFTRT:
2478 case LSHIFTRT:
2479 case ROTATE:
2480 case ROTATERT:
2481 case AND:
2482 case IOR:
2483 case XOR:
2484 case NEG:
2485 case NOT:
2486 *total = COSTS_N_INSNS (1);
2487 return false;
2488
2489 case PLUS:
2490 case MINUS:
2491 *total = COSTS_N_INSNS (1);
2492 return false;
2493
2494 case MULT:
2495 switch (GET_MODE (x))
2496 {
2497 case SImode:
2498 {
2499 rtx left = XEXP (x, 0);
2500 rtx right = XEXP (x, 1);
2501 if (GET_CODE (right) == CONST_INT
2502 && CONST_OK_FOR_K (INTVAL (right)))
2503 *total = s390_cost->mhi;
2504 else if (GET_CODE (left) == SIGN_EXTEND)
2505 *total = s390_cost->mh;
2506 else
2507 *total = s390_cost->ms; /* msr, ms, msy */
2508 break;
2509 }
2510 case DImode:
2511 {
2512 rtx left = XEXP (x, 0);
2513 rtx right = XEXP (x, 1);
2514 if (TARGET_ZARCH)
2515 {
2516 if (GET_CODE (right) == CONST_INT
2517 && CONST_OK_FOR_K (INTVAL (right)))
2518 *total = s390_cost->mghi;
2519 else if (GET_CODE (left) == SIGN_EXTEND)
2520 *total = s390_cost->msgf;
2521 else
2522 *total = s390_cost->msg; /* msgr, msg */
2523 }
2524 else /* TARGET_31BIT */
2525 {
2526 if (GET_CODE (left) == SIGN_EXTEND
2527 && GET_CODE (right) == SIGN_EXTEND)
2528 /* mulsidi case: mr, m */
2529 *total = s390_cost->m;
2530 else if (GET_CODE (left) == ZERO_EXTEND
2531 && GET_CODE (right) == ZERO_EXTEND
2532 && TARGET_CPU_ZARCH)
2533 /* umulsidi case: ml, mlr */
2534 *total = s390_cost->ml;
2535 else
2536 /* Complex calculation is required. */
2537 *total = COSTS_N_INSNS (40);
2538 }
2539 break;
2540 }
2541 case SFmode:
2542 case DFmode:
2543 *total = s390_cost->mult_df;
2544 break;
2545 case TFmode:
2546 *total = s390_cost->mxbr;
2547 break;
2548 default:
2549 return false;
2550 }
2551 return false;
2552
2553 case FMA:
2554 switch (GET_MODE (x))
2555 {
2556 case DFmode:
2557 *total = s390_cost->madbr;
2558 break;
2559 case SFmode:
2560 *total = s390_cost->maebr;
2561 break;
2562 default:
2563 return false;
2564 }
2565 /* Negate in the third argument is free: FMSUB. */
2566 if (GET_CODE (XEXP (x, 2)) == NEG)
2567 {
2568 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2569 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2570 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2571 return true;
2572 }
2573 return false;
2574
2575 case UDIV:
2576 case UMOD:
2577 if (GET_MODE (x) == TImode) /* 128 bit division */
2578 *total = s390_cost->dlgr;
2579 else if (GET_MODE (x) == DImode)
2580 {
2581 rtx right = XEXP (x, 1);
2582 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2583 *total = s390_cost->dlr;
2584 else /* 64 by 64 bit division */
2585 *total = s390_cost->dlgr;
2586 }
2587 else if (GET_MODE (x) == SImode) /* 32 bit division */
2588 *total = s390_cost->dlr;
2589 return false;
2590
2591 case DIV:
2592 case MOD:
2593 if (GET_MODE (x) == DImode)
2594 {
2595 rtx right = XEXP (x, 1);
2596 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2597 if (TARGET_ZARCH)
2598 *total = s390_cost->dsgfr;
2599 else
2600 *total = s390_cost->dr;
2601 else /* 64 by 64 bit division */
2602 *total = s390_cost->dsgr;
2603 }
2604 else if (GET_MODE (x) == SImode) /* 32 bit division */
2605 *total = s390_cost->dlr;
2606 else if (GET_MODE (x) == SFmode)
2607 {
2608 *total = s390_cost->debr;
2609 }
2610 else if (GET_MODE (x) == DFmode)
2611 {
2612 *total = s390_cost->ddbr;
2613 }
2614 else if (GET_MODE (x) == TFmode)
2615 {
2616 *total = s390_cost->dxbr;
2617 }
2618 return false;
2619
2620 case SQRT:
2621 if (GET_MODE (x) == SFmode)
2622 *total = s390_cost->sqebr;
2623 else if (GET_MODE (x) == DFmode)
2624 *total = s390_cost->sqdbr;
2625 else /* TFmode */
2626 *total = s390_cost->sqxbr;
2627 return false;
2628
2629 case SIGN_EXTEND:
2630 case ZERO_EXTEND:
2631 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2632 || outer_code == PLUS || outer_code == MINUS
2633 || outer_code == COMPARE)
2634 *total = 0;
2635 return false;
2636
2637 case COMPARE:
2638 *total = COSTS_N_INSNS (1);
2639 if (GET_CODE (XEXP (x, 0)) == AND
2640 && GET_CODE (XEXP (x, 1)) == CONST_INT
2641 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2642 {
2643 rtx op0 = XEXP (XEXP (x, 0), 0);
2644 rtx op1 = XEXP (XEXP (x, 0), 1);
2645 rtx op2 = XEXP (x, 1);
2646
2647 if (memory_operand (op0, GET_MODE (op0))
2648 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2649 return true;
2650 if (register_operand (op0, GET_MODE (op0))
2651 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2652 return true;
2653 }
2654 return false;
2655
2656 default:
2657 return false;
2658 }
2659 }
2660
2661 /* Return the cost of an address rtx ADDR. */
2662
2663 static int
2664 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2665 addr_space_t as ATTRIBUTE_UNUSED,
2666 bool speed ATTRIBUTE_UNUSED)
2667 {
2668 struct s390_address ad;
2669 if (!s390_decompose_address (addr, &ad))
2670 return 1000;
2671
2672 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2673 }
2674
2675 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2676 otherwise return 0. */
2677
2678 int
2679 tls_symbolic_operand (rtx op)
2680 {
2681 if (GET_CODE (op) != SYMBOL_REF)
2682 return 0;
2683 return SYMBOL_REF_TLS_MODEL (op);
2684 }
2685 \f
2686 /* Split DImode access register reference REG (on 64-bit) into its constituent
2687 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2688 gen_highpart cannot be used as they assume all registers are word-sized,
2689 while our access registers have only half that size. */
2690
2691 void
2692 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2693 {
2694 gcc_assert (TARGET_64BIT);
2695 gcc_assert (ACCESS_REG_P (reg));
2696 gcc_assert (GET_MODE (reg) == DImode);
2697 gcc_assert (!(REGNO (reg) & 1));
2698
2699 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2700 *hi = gen_rtx_REG (SImode, REGNO (reg));
2701 }
2702
2703 /* Return true if OP contains a symbol reference */
2704
2705 bool
2706 symbolic_reference_mentioned_p (rtx op)
2707 {
2708 const char *fmt;
2709 int i;
2710
2711 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2712 return 1;
2713
2714 fmt = GET_RTX_FORMAT (GET_CODE (op));
2715 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2716 {
2717 if (fmt[i] == 'E')
2718 {
2719 int j;
2720
2721 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2722 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2723 return 1;
2724 }
2725
2726 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2727 return 1;
2728 }
2729
2730 return 0;
2731 }
2732
2733 /* Return true if OP contains a reference to a thread-local symbol. */
2734
2735 bool
2736 tls_symbolic_reference_mentioned_p (rtx op)
2737 {
2738 const char *fmt;
2739 int i;
2740
2741 if (GET_CODE (op) == SYMBOL_REF)
2742 return tls_symbolic_operand (op);
2743
2744 fmt = GET_RTX_FORMAT (GET_CODE (op));
2745 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2746 {
2747 if (fmt[i] == 'E')
2748 {
2749 int j;
2750
2751 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2752 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2753 return true;
2754 }
2755
2756 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2757 return true;
2758 }
2759
2760 return false;
2761 }
2762
2763
2764 /* Return true if OP is a legitimate general operand when
2765 generating PIC code. It is given that flag_pic is on
2766 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2767
2768 int
2769 legitimate_pic_operand_p (rtx op)
2770 {
2771 /* Accept all non-symbolic constants. */
2772 if (!SYMBOLIC_CONST (op))
2773 return 1;
2774
2775 /* Reject everything else; must be handled
2776 via emit_symbolic_move. */
2777 return 0;
2778 }
2779
2780 /* Returns true if the constant value OP is a legitimate general operand.
2781 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2782
2783 static bool
2784 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2785 {
2786 /* Accept all non-symbolic constants. */
2787 if (!SYMBOLIC_CONST (op))
2788 return 1;
2789
2790 /* Accept immediate LARL operands. */
2791 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2792 return 1;
2793
2794 /* Thread-local symbols are never legal constants. This is
2795 so that emit_call knows that computing such addresses
2796 might require a function call. */
2797 if (TLS_SYMBOLIC_CONST (op))
2798 return 0;
2799
2800 /* In the PIC case, symbolic constants must *not* be
2801 forced into the literal pool. We accept them here,
2802 so that they will be handled by emit_symbolic_move. */
2803 if (flag_pic)
2804 return 1;
2805
2806 /* All remaining non-PIC symbolic constants are
2807 forced into the literal pool. */
2808 return 0;
2809 }
2810
2811 /* Determine if it's legal to put X into the constant pool. This
2812 is not possible if X contains the address of a symbol that is
2813 not constant (TLS) or not known at final link time (PIC). */
2814
2815 static bool
2816 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2817 {
2818 switch (GET_CODE (x))
2819 {
2820 case CONST_INT:
2821 case CONST_DOUBLE:
2822 /* Accept all non-symbolic constants. */
2823 return false;
2824
2825 case LABEL_REF:
2826 /* Labels are OK iff we are non-PIC. */
2827 return flag_pic != 0;
2828
2829 case SYMBOL_REF:
2830 /* 'Naked' TLS symbol references are never OK,
2831 non-TLS symbols are OK iff we are non-PIC. */
2832 if (tls_symbolic_operand (x))
2833 return true;
2834 else
2835 return flag_pic != 0;
2836
2837 case CONST:
2838 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2839 case PLUS:
2840 case MINUS:
2841 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2842 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2843
2844 case UNSPEC:
2845 switch (XINT (x, 1))
2846 {
2847 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2848 case UNSPEC_LTREL_OFFSET:
2849 case UNSPEC_GOT:
2850 case UNSPEC_GOTOFF:
2851 case UNSPEC_PLTOFF:
2852 case UNSPEC_TLSGD:
2853 case UNSPEC_TLSLDM:
2854 case UNSPEC_NTPOFF:
2855 case UNSPEC_DTPOFF:
2856 case UNSPEC_GOTNTPOFF:
2857 case UNSPEC_INDNTPOFF:
2858 return false;
2859
2860 /* If the literal pool shares the code section, be put
2861 execute template placeholders into the pool as well. */
2862 case UNSPEC_INSN:
2863 return TARGET_CPU_ZARCH;
2864
2865 default:
2866 return true;
2867 }
2868 break;
2869
2870 default:
2871 gcc_unreachable ();
2872 }
2873 }
2874
2875 /* Returns true if the constant value OP is a legitimate general
2876 operand during and after reload. The difference to
2877 legitimate_constant_p is that this function will not accept
2878 a constant that would need to be forced to the literal pool
2879 before it can be used as operand.
2880 This function accepts all constants which can be loaded directly
2881 into a GPR. */
2882
2883 bool
2884 legitimate_reload_constant_p (rtx op)
2885 {
2886 /* Accept la(y) operands. */
2887 if (GET_CODE (op) == CONST_INT
2888 && DISP_IN_RANGE (INTVAL (op)))
2889 return true;
2890
2891 /* Accept l(g)hi/l(g)fi operands. */
2892 if (GET_CODE (op) == CONST_INT
2893 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2894 return true;
2895
2896 /* Accept lliXX operands. */
2897 if (TARGET_ZARCH
2898 && GET_CODE (op) == CONST_INT
2899 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2900 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2901 return true;
2902
2903 if (TARGET_EXTIMM
2904 && GET_CODE (op) == CONST_INT
2905 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2906 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2907 return true;
2908
2909 /* Accept larl operands. */
2910 if (TARGET_CPU_ZARCH
2911 && larl_operand (op, VOIDmode))
2912 return true;
2913
2914 /* Accept floating-point zero operands that fit into a single GPR. */
2915 if (GET_CODE (op) == CONST_DOUBLE
2916 && s390_float_const_zero_p (op)
2917 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2918 return true;
2919
2920 /* Accept double-word operands that can be split. */
2921 if (GET_CODE (op) == CONST_INT
2922 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2923 {
2924 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2925 rtx hi = operand_subword (op, 0, 0, dword_mode);
2926 rtx lo = operand_subword (op, 1, 0, dword_mode);
2927 return legitimate_reload_constant_p (hi)
2928 && legitimate_reload_constant_p (lo);
2929 }
2930
2931 /* Everything else cannot be handled without reload. */
2932 return false;
2933 }
2934
2935 /* Returns true if the constant value OP is a legitimate fp operand
2936 during and after reload.
2937 This function accepts all constants which can be loaded directly
2938 into an FPR. */
2939
2940 static bool
2941 legitimate_reload_fp_constant_p (rtx op)
2942 {
2943 /* Accept floating-point zero operands if the load zero instruction
2944 can be used. Prior to z196 the load fp zero instruction caused a
2945 performance penalty if the result is used as BFP number. */
2946 if (TARGET_Z196
2947 && GET_CODE (op) == CONST_DOUBLE
2948 && s390_float_const_zero_p (op))
2949 return true;
2950
2951 return false;
2952 }
2953
2954 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2955 return the class of reg to actually use. */
2956
2957 static reg_class_t
2958 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2959 {
2960 switch (GET_CODE (op))
2961 {
2962 /* Constants we cannot reload into general registers
2963 must be forced into the literal pool. */
2964 case CONST_DOUBLE:
2965 case CONST_INT:
2966 if (reg_class_subset_p (GENERAL_REGS, rclass)
2967 && legitimate_reload_constant_p (op))
2968 return GENERAL_REGS;
2969 else if (reg_class_subset_p (ADDR_REGS, rclass)
2970 && legitimate_reload_constant_p (op))
2971 return ADDR_REGS;
2972 else if (reg_class_subset_p (FP_REGS, rclass)
2973 && legitimate_reload_fp_constant_p (op))
2974 return FP_REGS;
2975 return NO_REGS;
2976
2977 /* If a symbolic constant or a PLUS is reloaded,
2978 it is most likely being used as an address, so
2979 prefer ADDR_REGS. If 'class' is not a superset
2980 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2981 case LABEL_REF:
2982 case SYMBOL_REF:
2983 case CONST:
2984 if (!legitimate_reload_constant_p (op))
2985 return NO_REGS;
2986 /* fallthrough */
2987 case PLUS:
2988 /* load address will be used. */
2989 if (reg_class_subset_p (ADDR_REGS, rclass))
2990 return ADDR_REGS;
2991 else
2992 return NO_REGS;
2993
2994 default:
2995 break;
2996 }
2997
2998 return rclass;
2999 }
3000
3001 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3002 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3003 aligned. */
3004
3005 bool
3006 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3007 {
3008 HOST_WIDE_INT addend;
3009 rtx symref;
3010
3011 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3012 return false;
3013
3014 if (addend & (alignment - 1))
3015 return false;
3016
3017 if (GET_CODE (symref) == SYMBOL_REF
3018 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3019 return true;
3020
3021 if (GET_CODE (symref) == UNSPEC
3022 && alignment <= UNITS_PER_LONG)
3023 return true;
3024
3025 return false;
3026 }
3027
3028 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3029 operand SCRATCH is used to reload the even part of the address and
3030 adding one. */
3031
3032 void
3033 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3034 {
3035 HOST_WIDE_INT addend;
3036 rtx symref;
3037
3038 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3039 gcc_unreachable ();
3040
3041 if (!(addend & 1))
3042 /* Easy case. The addend is even so larl will do fine. */
3043 emit_move_insn (reg, addr);
3044 else
3045 {
3046 /* We can leave the scratch register untouched if the target
3047 register is a valid base register. */
3048 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3049 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3050 scratch = reg;
3051
3052 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3053 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3054
3055 if (addend != 1)
3056 emit_move_insn (scratch,
3057 gen_rtx_CONST (Pmode,
3058 gen_rtx_PLUS (Pmode, symref,
3059 GEN_INT (addend - 1))));
3060 else
3061 emit_move_insn (scratch, symref);
3062
3063 /* Increment the address using la in order to avoid clobbering cc. */
3064 emit_move_insn (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3065 }
3066 }
3067
3068 /* Generate what is necessary to move between REG and MEM using
3069 SCRATCH. The direction is given by TOMEM. */
3070
3071 void
3072 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3073 {
3074 /* Reload might have pulled a constant out of the literal pool.
3075 Force it back in. */
3076 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3077 || GET_CODE (mem) == CONST)
3078 mem = force_const_mem (GET_MODE (reg), mem);
3079
3080 gcc_assert (MEM_P (mem));
3081
3082 /* For a load from memory we can leave the scratch register
3083 untouched if the target register is a valid base register. */
3084 if (!tomem
3085 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3086 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3087 && GET_MODE (reg) == GET_MODE (scratch))
3088 scratch = reg;
3089
3090 /* Load address into scratch register. Since we can't have a
3091 secondary reload for a secondary reload we have to cover the case
3092 where larl would need a secondary reload here as well. */
3093 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3094
3095 /* Now we can use a standard load/store to do the move. */
3096 if (tomem)
3097 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3098 else
3099 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3100 }
3101
3102 /* Inform reload about cases where moving X with a mode MODE to a register in
3103 RCLASS requires an extra scratch or immediate register. Return the class
3104 needed for the immediate register. */
3105
3106 static reg_class_t
3107 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3108 enum machine_mode mode, secondary_reload_info *sri)
3109 {
3110 enum reg_class rclass = (enum reg_class) rclass_i;
3111
3112 /* Intermediate register needed. */
3113 if (reg_classes_intersect_p (CC_REGS, rclass))
3114 return GENERAL_REGS;
3115
3116 if (TARGET_Z10)
3117 {
3118 HOST_WIDE_INT offset;
3119 rtx symref;
3120
3121 /* On z10 several optimizer steps may generate larl operands with
3122 an odd addend. */
3123 if (in_p
3124 && s390_loadrelative_operand_p (x, &symref, &offset)
3125 && mode == Pmode
3126 && !SYMBOL_REF_ALIGN1_P (symref)
3127 && (offset & 1) == 1)
3128 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3129 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3130
3131 /* On z10 we need a scratch register when moving QI, TI or floating
3132 point mode values from or to a memory location with a SYMBOL_REF
3133 or if the symref addend of a SI or DI move is not aligned to the
3134 width of the access. */
3135 if (MEM_P (x)
3136 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3137 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3138 || (!TARGET_ZARCH && mode == DImode)
3139 || ((mode == HImode || mode == SImode || mode == DImode)
3140 && (!s390_check_symref_alignment (XEXP (x, 0),
3141 GET_MODE_SIZE (mode))))))
3142 {
3143 #define __SECONDARY_RELOAD_CASE(M,m) \
3144 case M##mode: \
3145 if (TARGET_64BIT) \
3146 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3147 CODE_FOR_reload##m##di_tomem_z10; \
3148 else \
3149 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3150 CODE_FOR_reload##m##si_tomem_z10; \
3151 break;
3152
3153 switch (GET_MODE (x))
3154 {
3155 __SECONDARY_RELOAD_CASE (QI, qi);
3156 __SECONDARY_RELOAD_CASE (HI, hi);
3157 __SECONDARY_RELOAD_CASE (SI, si);
3158 __SECONDARY_RELOAD_CASE (DI, di);
3159 __SECONDARY_RELOAD_CASE (TI, ti);
3160 __SECONDARY_RELOAD_CASE (SF, sf);
3161 __SECONDARY_RELOAD_CASE (DF, df);
3162 __SECONDARY_RELOAD_CASE (TF, tf);
3163 __SECONDARY_RELOAD_CASE (SD, sd);
3164 __SECONDARY_RELOAD_CASE (DD, dd);
3165 __SECONDARY_RELOAD_CASE (TD, td);
3166
3167 default:
3168 gcc_unreachable ();
3169 }
3170 #undef __SECONDARY_RELOAD_CASE
3171 }
3172 }
3173
3174 /* We need a scratch register when loading a PLUS expression which
3175 is not a legitimate operand of the LOAD ADDRESS instruction. */
3176 if (in_p && s390_plus_operand (x, mode))
3177 sri->icode = (TARGET_64BIT ?
3178 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3179
3180 /* Performing a multiword move from or to memory we have to make sure the
3181 second chunk in memory is addressable without causing a displacement
3182 overflow. If that would be the case we calculate the address in
3183 a scratch register. */
3184 if (MEM_P (x)
3185 && GET_CODE (XEXP (x, 0)) == PLUS
3186 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3187 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3188 + GET_MODE_SIZE (mode) - 1))
3189 {
3190 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3191 in a s_operand address since we may fallback to lm/stm. So we only
3192 have to care about overflows in the b+i+d case. */
3193 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3194 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3195 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3196 /* For FP_REGS no lm/stm is available so this check is triggered
3197 for displacement overflows in b+i+d and b+d like addresses. */
3198 || (reg_classes_intersect_p (FP_REGS, rclass)
3199 && s390_class_max_nregs (FP_REGS, mode) > 1))
3200 {
3201 if (in_p)
3202 sri->icode = (TARGET_64BIT ?
3203 CODE_FOR_reloaddi_nonoffmem_in :
3204 CODE_FOR_reloadsi_nonoffmem_in);
3205 else
3206 sri->icode = (TARGET_64BIT ?
3207 CODE_FOR_reloaddi_nonoffmem_out :
3208 CODE_FOR_reloadsi_nonoffmem_out);
3209 }
3210 }
3211
3212 /* A scratch address register is needed when a symbolic constant is
3213 copied to r0 compiling with -fPIC. In other cases the target
3214 register might be used as temporary (see legitimize_pic_address). */
3215 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3216 sri->icode = (TARGET_64BIT ?
3217 CODE_FOR_reloaddi_PIC_addr :
3218 CODE_FOR_reloadsi_PIC_addr);
3219
3220 /* Either scratch or no register needed. */
3221 return NO_REGS;
3222 }
3223
3224 /* Generate code to load SRC, which is PLUS that is not a
3225 legitimate operand for the LA instruction, into TARGET.
3226 SCRATCH may be used as scratch register. */
3227
3228 void
3229 s390_expand_plus_operand (rtx target, rtx src,
3230 rtx scratch)
3231 {
3232 rtx sum1, sum2;
3233 struct s390_address ad;
3234
3235 /* src must be a PLUS; get its two operands. */
3236 gcc_assert (GET_CODE (src) == PLUS);
3237 gcc_assert (GET_MODE (src) == Pmode);
3238
3239 /* Check if any of the two operands is already scheduled
3240 for replacement by reload. This can happen e.g. when
3241 float registers occur in an address. */
3242 sum1 = find_replacement (&XEXP (src, 0));
3243 sum2 = find_replacement (&XEXP (src, 1));
3244 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3245
3246 /* If the address is already strictly valid, there's nothing to do. */
3247 if (!s390_decompose_address (src, &ad)
3248 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3249 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3250 {
3251 /* Otherwise, one of the operands cannot be an address register;
3252 we reload its value into the scratch register. */
3253 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3254 {
3255 emit_move_insn (scratch, sum1);
3256 sum1 = scratch;
3257 }
3258 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3259 {
3260 emit_move_insn (scratch, sum2);
3261 sum2 = scratch;
3262 }
3263
3264 /* According to the way these invalid addresses are generated
3265 in reload.c, it should never happen (at least on s390) that
3266 *neither* of the PLUS components, after find_replacements
3267 was applied, is an address register. */
3268 if (sum1 == scratch && sum2 == scratch)
3269 {
3270 debug_rtx (src);
3271 gcc_unreachable ();
3272 }
3273
3274 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3275 }
3276
3277 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3278 is only ever performed on addresses, so we can mark the
3279 sum as legitimate for LA in any case. */
3280 s390_load_address (target, src);
3281 }
3282
3283
3284 /* Return true if ADDR is a valid memory address.
3285 STRICT specifies whether strict register checking applies. */
3286
3287 static bool
3288 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3289 {
3290 struct s390_address ad;
3291
3292 if (TARGET_Z10
3293 && larl_operand (addr, VOIDmode)
3294 && (mode == VOIDmode
3295 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3296 return true;
3297
3298 if (!s390_decompose_address (addr, &ad))
3299 return false;
3300
3301 if (strict)
3302 {
3303 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3304 return false;
3305
3306 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3307 return false;
3308 }
3309 else
3310 {
3311 if (ad.base
3312 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3313 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3314 return false;
3315
3316 if (ad.indx
3317 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3318 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3319 return false;
3320 }
3321 return true;
3322 }
3323
3324 /* Return true if OP is a valid operand for the LA instruction.
3325 In 31-bit, we need to prove that the result is used as an
3326 address, as LA performs only a 31-bit addition. */
3327
3328 bool
3329 legitimate_la_operand_p (rtx op)
3330 {
3331 struct s390_address addr;
3332 if (!s390_decompose_address (op, &addr))
3333 return false;
3334
3335 return (TARGET_64BIT || addr.pointer);
3336 }
3337
3338 /* Return true if it is valid *and* preferable to use LA to
3339 compute the sum of OP1 and OP2. */
3340
3341 bool
3342 preferred_la_operand_p (rtx op1, rtx op2)
3343 {
3344 struct s390_address addr;
3345
3346 if (op2 != const0_rtx)
3347 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3348
3349 if (!s390_decompose_address (op1, &addr))
3350 return false;
3351 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3352 return false;
3353 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3354 return false;
3355
3356 /* Avoid LA instructions with index register on z196; it is
3357 preferable to use regular add instructions when possible.
3358 Starting with zEC12 the la with index register is "uncracked"
3359 again. */
3360 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3361 return false;
3362
3363 if (!TARGET_64BIT && !addr.pointer)
3364 return false;
3365
3366 if (addr.pointer)
3367 return true;
3368
3369 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3370 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3371 return true;
3372
3373 return false;
3374 }
3375
3376 /* Emit a forced load-address operation to load SRC into DST.
3377 This will use the LOAD ADDRESS instruction even in situations
3378 where legitimate_la_operand_p (SRC) returns false. */
3379
3380 void
3381 s390_load_address (rtx dst, rtx src)
3382 {
3383 if (TARGET_64BIT)
3384 emit_move_insn (dst, src);
3385 else
3386 emit_insn (gen_force_la_31 (dst, src));
3387 }
3388
3389 /* Return a legitimate reference for ORIG (an address) using the
3390 register REG. If REG is 0, a new pseudo is generated.
3391
3392 There are two types of references that must be handled:
3393
3394 1. Global data references must load the address from the GOT, via
3395 the PIC reg. An insn is emitted to do this load, and the reg is
3396 returned.
3397
3398 2. Static data references, constant pool addresses, and code labels
3399 compute the address as an offset from the GOT, whose base is in
3400 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3401 differentiate them from global data objects. The returned
3402 address is the PIC reg + an unspec constant.
3403
3404 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3405 reg also appears in the address. */
3406
3407 rtx
3408 legitimize_pic_address (rtx orig, rtx reg)
3409 {
3410 rtx addr = orig;
3411 rtx addend = const0_rtx;
3412 rtx new_rtx = orig;
3413
3414 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3415
3416 if (GET_CODE (addr) == CONST)
3417 addr = XEXP (addr, 0);
3418
3419 if (GET_CODE (addr) == PLUS)
3420 {
3421 addend = XEXP (addr, 1);
3422 addr = XEXP (addr, 0);
3423 }
3424
3425 if ((GET_CODE (addr) == LABEL_REF
3426 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3427 || (GET_CODE (addr) == UNSPEC &&
3428 (XINT (addr, 1) == UNSPEC_GOTENT
3429 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3430 && GET_CODE (addend) == CONST_INT)
3431 {
3432 /* This can be locally addressed. */
3433
3434 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3435 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3436 gen_rtx_CONST (Pmode, addr) : addr);
3437
3438 if (TARGET_CPU_ZARCH
3439 && larl_operand (const_addr, VOIDmode)
3440 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3441 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3442 {
3443 if (INTVAL (addend) & 1)
3444 {
3445 /* LARL can't handle odd offsets, so emit a pair of LARL
3446 and LA. */
3447 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3448
3449 if (!DISP_IN_RANGE (INTVAL (addend)))
3450 {
3451 HOST_WIDE_INT even = INTVAL (addend) - 1;
3452 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3453 addr = gen_rtx_CONST (Pmode, addr);
3454 addend = const1_rtx;
3455 }
3456
3457 emit_move_insn (temp, addr);
3458 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3459
3460 if (reg != 0)
3461 {
3462 s390_load_address (reg, new_rtx);
3463 new_rtx = reg;
3464 }
3465 }
3466 else
3467 {
3468 /* If the offset is even, we can just use LARL. This
3469 will happen automatically. */
3470 }
3471 }
3472 else
3473 {
3474 /* No larl - Access local symbols relative to the GOT. */
3475
3476 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3477
3478 if (reload_in_progress || reload_completed)
3479 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3480
3481 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3482 if (addend != const0_rtx)
3483 addr = gen_rtx_PLUS (Pmode, addr, addend);
3484 addr = gen_rtx_CONST (Pmode, addr);
3485 addr = force_const_mem (Pmode, addr);
3486 emit_move_insn (temp, addr);
3487
3488 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3489 if (reg != 0)
3490 {
3491 s390_load_address (reg, new_rtx);
3492 new_rtx = reg;
3493 }
3494 }
3495 }
3496 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3497 {
3498 /* A non-local symbol reference without addend.
3499
3500 The symbol ref is wrapped into an UNSPEC to make sure the
3501 proper operand modifier (@GOT or @GOTENT) will be emitted.
3502 This will tell the linker to put the symbol into the GOT.
3503
3504 Additionally the code dereferencing the GOT slot is emitted here.
3505
3506 An addend to the symref needs to be added afterwards.
3507 legitimize_pic_address calls itself recursively to handle
3508 that case. So no need to do it here. */
3509
3510 if (reg == 0)
3511 reg = gen_reg_rtx (Pmode);
3512
3513 if (TARGET_Z10)
3514 {
3515 /* Use load relative if possible.
3516 lgrl <target>, sym@GOTENT */
3517 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3518 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3519 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3520
3521 emit_move_insn (reg, new_rtx);
3522 new_rtx = reg;
3523 }
3524 else if (flag_pic == 1)
3525 {
3526 /* Assume GOT offset is a valid displacement operand (< 4k
3527 or < 512k with z990). This is handled the same way in
3528 both 31- and 64-bit code (@GOT).
3529 lg <target>, sym@GOT(r12) */
3530
3531 if (reload_in_progress || reload_completed)
3532 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3533
3534 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3535 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3536 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3537 new_rtx = gen_const_mem (Pmode, new_rtx);
3538 emit_move_insn (reg, new_rtx);
3539 new_rtx = reg;
3540 }
3541 else if (TARGET_CPU_ZARCH)
3542 {
3543 /* If the GOT offset might be >= 4k, we determine the position
3544 of the GOT entry via a PC-relative LARL (@GOTENT).
3545 larl temp, sym@GOTENT
3546 lg <target>, 0(temp) */
3547
3548 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3549
3550 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3551 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3552
3553 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3554 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3555 emit_move_insn (temp, new_rtx);
3556
3557 new_rtx = gen_const_mem (Pmode, temp);
3558 emit_move_insn (reg, new_rtx);
3559
3560 new_rtx = reg;
3561 }
3562 else
3563 {
3564 /* If the GOT offset might be >= 4k, we have to load it
3565 from the literal pool (@GOT).
3566
3567 lg temp, lit-litbase(r13)
3568 lg <target>, 0(temp)
3569 lit: .long sym@GOT */
3570
3571 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3572
3573 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3574 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3575
3576 if (reload_in_progress || reload_completed)
3577 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3578
3579 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3580 addr = gen_rtx_CONST (Pmode, addr);
3581 addr = force_const_mem (Pmode, addr);
3582 emit_move_insn (temp, addr);
3583
3584 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3585 new_rtx = gen_const_mem (Pmode, new_rtx);
3586 emit_move_insn (reg, new_rtx);
3587 new_rtx = reg;
3588 }
3589 }
3590 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3591 {
3592 gcc_assert (XVECLEN (addr, 0) == 1);
3593 switch (XINT (addr, 1))
3594 {
3595 /* These address symbols (or PLT slots) relative to the GOT
3596 (not GOT slots!). In general this will exceed the
3597 displacement range so these value belong into the literal
3598 pool. */
3599 case UNSPEC_GOTOFF:
3600 case UNSPEC_PLTOFF:
3601 new_rtx = force_const_mem (Pmode, orig);
3602 break;
3603
3604 /* For -fPIC the GOT size might exceed the displacement
3605 range so make sure the value is in the literal pool. */
3606 case UNSPEC_GOT:
3607 if (flag_pic == 2)
3608 new_rtx = force_const_mem (Pmode, orig);
3609 break;
3610
3611 /* For @GOTENT larl is used. This is handled like local
3612 symbol refs. */
3613 case UNSPEC_GOTENT:
3614 gcc_unreachable ();
3615 break;
3616
3617 /* @PLT is OK as is on 64-bit, must be converted to
3618 GOT-relative @PLTOFF on 31-bit. */
3619 case UNSPEC_PLT:
3620 if (!TARGET_CPU_ZARCH)
3621 {
3622 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3623
3624 if (reload_in_progress || reload_completed)
3625 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3626
3627 addr = XVECEXP (addr, 0, 0);
3628 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3629 UNSPEC_PLTOFF);
3630 if (addend != const0_rtx)
3631 addr = gen_rtx_PLUS (Pmode, addr, addend);
3632 addr = gen_rtx_CONST (Pmode, addr);
3633 addr = force_const_mem (Pmode, addr);
3634 emit_move_insn (temp, addr);
3635
3636 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3637 if (reg != 0)
3638 {
3639 s390_load_address (reg, new_rtx);
3640 new_rtx = reg;
3641 }
3642 }
3643 else
3644 /* On 64 bit larl can be used. This case is handled like
3645 local symbol refs. */
3646 gcc_unreachable ();
3647 break;
3648
3649 /* Everything else cannot happen. */
3650 default:
3651 gcc_unreachable ();
3652 }
3653 }
3654 else if (addend != const0_rtx)
3655 {
3656 /* Otherwise, compute the sum. */
3657
3658 rtx base = legitimize_pic_address (addr, reg);
3659 new_rtx = legitimize_pic_address (addend,
3660 base == reg ? NULL_RTX : reg);
3661 if (GET_CODE (new_rtx) == CONST_INT)
3662 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3663 else
3664 {
3665 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3666 {
3667 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3668 new_rtx = XEXP (new_rtx, 1);
3669 }
3670 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3671 }
3672
3673 if (GET_CODE (new_rtx) == CONST)
3674 new_rtx = XEXP (new_rtx, 0);
3675 new_rtx = force_operand (new_rtx, 0);
3676 }
3677
3678 return new_rtx;
3679 }
3680
3681 /* Load the thread pointer into a register. */
3682
3683 rtx
3684 s390_get_thread_pointer (void)
3685 {
3686 rtx tp = gen_reg_rtx (Pmode);
3687
3688 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3689 mark_reg_pointer (tp, BITS_PER_WORD);
3690
3691 return tp;
3692 }
3693
3694 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3695 in s390_tls_symbol which always refers to __tls_get_offset.
3696 The returned offset is written to RESULT_REG and an USE rtx is
3697 generated for TLS_CALL. */
3698
3699 static GTY(()) rtx s390_tls_symbol;
3700
3701 static void
3702 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3703 {
3704 rtx insn;
3705
3706 if (!flag_pic)
3707 emit_insn (s390_load_got ());
3708
3709 if (!s390_tls_symbol)
3710 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3711
3712 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3713 gen_rtx_REG (Pmode, RETURN_REGNUM));
3714
3715 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3716 RTL_CONST_CALL_P (insn) = 1;
3717 }
3718
3719 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3720 this (thread-local) address. REG may be used as temporary. */
3721
3722 static rtx
3723 legitimize_tls_address (rtx addr, rtx reg)
3724 {
3725 rtx new_rtx, tls_call, temp, base, r2, insn;
3726
3727 if (GET_CODE (addr) == SYMBOL_REF)
3728 switch (tls_symbolic_operand (addr))
3729 {
3730 case TLS_MODEL_GLOBAL_DYNAMIC:
3731 start_sequence ();
3732 r2 = gen_rtx_REG (Pmode, 2);
3733 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3734 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3735 new_rtx = force_const_mem (Pmode, new_rtx);
3736 emit_move_insn (r2, new_rtx);
3737 s390_emit_tls_call_insn (r2, tls_call);
3738 insn = get_insns ();
3739 end_sequence ();
3740
3741 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3742 temp = gen_reg_rtx (Pmode);
3743 emit_libcall_block (insn, temp, r2, new_rtx);
3744
3745 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3746 if (reg != 0)
3747 {
3748 s390_load_address (reg, new_rtx);
3749 new_rtx = reg;
3750 }
3751 break;
3752
3753 case TLS_MODEL_LOCAL_DYNAMIC:
3754 start_sequence ();
3755 r2 = gen_rtx_REG (Pmode, 2);
3756 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3757 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3758 new_rtx = force_const_mem (Pmode, new_rtx);
3759 emit_move_insn (r2, new_rtx);
3760 s390_emit_tls_call_insn (r2, tls_call);
3761 insn = get_insns ();
3762 end_sequence ();
3763
3764 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3765 temp = gen_reg_rtx (Pmode);
3766 emit_libcall_block (insn, temp, r2, new_rtx);
3767
3768 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3769 base = gen_reg_rtx (Pmode);
3770 s390_load_address (base, new_rtx);
3771
3772 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3773 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3774 new_rtx = force_const_mem (Pmode, new_rtx);
3775 temp = gen_reg_rtx (Pmode);
3776 emit_move_insn (temp, new_rtx);
3777
3778 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3779 if (reg != 0)
3780 {
3781 s390_load_address (reg, new_rtx);
3782 new_rtx = reg;
3783 }
3784 break;
3785
3786 case TLS_MODEL_INITIAL_EXEC:
3787 if (flag_pic == 1)
3788 {
3789 /* Assume GOT offset < 4k. This is handled the same way
3790 in both 31- and 64-bit code. */
3791
3792 if (reload_in_progress || reload_completed)
3793 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3794
3795 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3796 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3797 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3798 new_rtx = gen_const_mem (Pmode, new_rtx);
3799 temp = gen_reg_rtx (Pmode);
3800 emit_move_insn (temp, new_rtx);
3801 }
3802 else if (TARGET_CPU_ZARCH)
3803 {
3804 /* If the GOT offset might be >= 4k, we determine the position
3805 of the GOT entry via a PC-relative LARL. */
3806
3807 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3808 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3809 temp = gen_reg_rtx (Pmode);
3810 emit_move_insn (temp, new_rtx);
3811
3812 new_rtx = gen_const_mem (Pmode, temp);
3813 temp = gen_reg_rtx (Pmode);
3814 emit_move_insn (temp, new_rtx);
3815 }
3816 else if (flag_pic)
3817 {
3818 /* If the GOT offset might be >= 4k, we have to load it
3819 from the literal pool. */
3820
3821 if (reload_in_progress || reload_completed)
3822 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3823
3824 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3825 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3826 new_rtx = force_const_mem (Pmode, new_rtx);
3827 temp = gen_reg_rtx (Pmode);
3828 emit_move_insn (temp, new_rtx);
3829
3830 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3831 new_rtx = gen_const_mem (Pmode, new_rtx);
3832
3833 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3834 temp = gen_reg_rtx (Pmode);
3835 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3836 }
3837 else
3838 {
3839 /* In position-dependent code, load the absolute address of
3840 the GOT entry from the literal pool. */
3841
3842 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3843 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3844 new_rtx = force_const_mem (Pmode, new_rtx);
3845 temp = gen_reg_rtx (Pmode);
3846 emit_move_insn (temp, new_rtx);
3847
3848 new_rtx = temp;
3849 new_rtx = gen_const_mem (Pmode, new_rtx);
3850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3851 temp = gen_reg_rtx (Pmode);
3852 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3853 }
3854
3855 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3856 if (reg != 0)
3857 {
3858 s390_load_address (reg, new_rtx);
3859 new_rtx = reg;
3860 }
3861 break;
3862
3863 case TLS_MODEL_LOCAL_EXEC:
3864 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3865 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3866 new_rtx = force_const_mem (Pmode, new_rtx);
3867 temp = gen_reg_rtx (Pmode);
3868 emit_move_insn (temp, new_rtx);
3869
3870 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3871 if (reg != 0)
3872 {
3873 s390_load_address (reg, new_rtx);
3874 new_rtx = reg;
3875 }
3876 break;
3877
3878 default:
3879 gcc_unreachable ();
3880 }
3881
3882 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3883 {
3884 switch (XINT (XEXP (addr, 0), 1))
3885 {
3886 case UNSPEC_INDNTPOFF:
3887 gcc_assert (TARGET_CPU_ZARCH);
3888 new_rtx = addr;
3889 break;
3890
3891 default:
3892 gcc_unreachable ();
3893 }
3894 }
3895
3896 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3897 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3898 {
3899 new_rtx = XEXP (XEXP (addr, 0), 0);
3900 if (GET_CODE (new_rtx) != SYMBOL_REF)
3901 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3902
3903 new_rtx = legitimize_tls_address (new_rtx, reg);
3904 new_rtx = plus_constant (Pmode, new_rtx,
3905 INTVAL (XEXP (XEXP (addr, 0), 1)));
3906 new_rtx = force_operand (new_rtx, 0);
3907 }
3908
3909 else
3910 gcc_unreachable (); /* for now ... */
3911
3912 return new_rtx;
3913 }
3914
3915 /* Emit insns making the address in operands[1] valid for a standard
3916 move to operands[0]. operands[1] is replaced by an address which
3917 should be used instead of the former RTX to emit the move
3918 pattern. */
3919
3920 void
3921 emit_symbolic_move (rtx *operands)
3922 {
3923 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3924
3925 if (GET_CODE (operands[0]) == MEM)
3926 operands[1] = force_reg (Pmode, operands[1]);
3927 else if (TLS_SYMBOLIC_CONST (operands[1]))
3928 operands[1] = legitimize_tls_address (operands[1], temp);
3929 else if (flag_pic)
3930 operands[1] = legitimize_pic_address (operands[1], temp);
3931 }
3932
3933 /* Try machine-dependent ways of modifying an illegitimate address X
3934 to be legitimate. If we find one, return the new, valid address.
3935
3936 OLDX is the address as it was before break_out_memory_refs was called.
3937 In some cases it is useful to look at this to decide what needs to be done.
3938
3939 MODE is the mode of the operand pointed to by X.
3940
3941 When -fpic is used, special handling is needed for symbolic references.
3942 See comments by legitimize_pic_address for details. */
3943
3944 static rtx
3945 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3946 enum machine_mode mode ATTRIBUTE_UNUSED)
3947 {
3948 rtx constant_term = const0_rtx;
3949
3950 if (TLS_SYMBOLIC_CONST (x))
3951 {
3952 x = legitimize_tls_address (x, 0);
3953
3954 if (s390_legitimate_address_p (mode, x, FALSE))
3955 return x;
3956 }
3957 else if (GET_CODE (x) == PLUS
3958 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3959 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3960 {
3961 return x;
3962 }
3963 else if (flag_pic)
3964 {
3965 if (SYMBOLIC_CONST (x)
3966 || (GET_CODE (x) == PLUS
3967 && (SYMBOLIC_CONST (XEXP (x, 0))
3968 || SYMBOLIC_CONST (XEXP (x, 1)))))
3969 x = legitimize_pic_address (x, 0);
3970
3971 if (s390_legitimate_address_p (mode, x, FALSE))
3972 return x;
3973 }
3974
3975 x = eliminate_constant_term (x, &constant_term);
3976
3977 /* Optimize loading of large displacements by splitting them
3978 into the multiple of 4K and the rest; this allows the
3979 former to be CSE'd if possible.
3980
3981 Don't do this if the displacement is added to a register
3982 pointing into the stack frame, as the offsets will
3983 change later anyway. */
3984
3985 if (GET_CODE (constant_term) == CONST_INT
3986 && !TARGET_LONG_DISPLACEMENT
3987 && !DISP_IN_RANGE (INTVAL (constant_term))
3988 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
3989 {
3990 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
3991 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
3992
3993 rtx temp = gen_reg_rtx (Pmode);
3994 rtx val = force_operand (GEN_INT (upper), temp);
3995 if (val != temp)
3996 emit_move_insn (temp, val);
3997
3998 x = gen_rtx_PLUS (Pmode, x, temp);
3999 constant_term = GEN_INT (lower);
4000 }
4001
4002 if (GET_CODE (x) == PLUS)
4003 {
4004 if (GET_CODE (XEXP (x, 0)) == REG)
4005 {
4006 rtx temp = gen_reg_rtx (Pmode);
4007 rtx val = force_operand (XEXP (x, 1), temp);
4008 if (val != temp)
4009 emit_move_insn (temp, val);
4010
4011 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4012 }
4013
4014 else if (GET_CODE (XEXP (x, 1)) == REG)
4015 {
4016 rtx temp = gen_reg_rtx (Pmode);
4017 rtx val = force_operand (XEXP (x, 0), temp);
4018 if (val != temp)
4019 emit_move_insn (temp, val);
4020
4021 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4022 }
4023 }
4024
4025 if (constant_term != const0_rtx)
4026 x = gen_rtx_PLUS (Pmode, x, constant_term);
4027
4028 return x;
4029 }
4030
4031 /* Try a machine-dependent way of reloading an illegitimate address AD
4032 operand. If we find one, push the reload and return the new address.
4033
4034 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4035 and TYPE is the reload type of the current reload. */
4036
4037 rtx
4038 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4039 int opnum, int type)
4040 {
4041 if (!optimize || TARGET_LONG_DISPLACEMENT)
4042 return NULL_RTX;
4043
4044 if (GET_CODE (ad) == PLUS)
4045 {
4046 rtx tem = simplify_binary_operation (PLUS, Pmode,
4047 XEXP (ad, 0), XEXP (ad, 1));
4048 if (tem)
4049 ad = tem;
4050 }
4051
4052 if (GET_CODE (ad) == PLUS
4053 && GET_CODE (XEXP (ad, 0)) == REG
4054 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4055 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4056 {
4057 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4058 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4059 rtx cst, tem, new_rtx;
4060
4061 cst = GEN_INT (upper);
4062 if (!legitimate_reload_constant_p (cst))
4063 cst = force_const_mem (Pmode, cst);
4064
4065 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4066 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4067
4068 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4069 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4070 opnum, (enum reload_type) type);
4071 return new_rtx;
4072 }
4073
4074 return NULL_RTX;
4075 }
4076
4077 /* Emit code to move LEN bytes from DST to SRC. */
4078
4079 bool
4080 s390_expand_movmem (rtx dst, rtx src, rtx len)
4081 {
4082 /* When tuning for z10 or higher we rely on the Glibc functions to
4083 do the right thing. Only for constant lengths below 64k we will
4084 generate inline code. */
4085 if (s390_tune >= PROCESSOR_2097_Z10
4086 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4087 return false;
4088
4089 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4090 {
4091 if (INTVAL (len) > 0)
4092 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4093 }
4094
4095 else if (TARGET_MVCLE)
4096 {
4097 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4098 }
4099
4100 else
4101 {
4102 rtx dst_addr, src_addr, count, blocks, temp;
4103 rtx loop_start_label = gen_label_rtx ();
4104 rtx loop_end_label = gen_label_rtx ();
4105 rtx end_label = gen_label_rtx ();
4106 enum machine_mode mode;
4107
4108 mode = GET_MODE (len);
4109 if (mode == VOIDmode)
4110 mode = Pmode;
4111
4112 dst_addr = gen_reg_rtx (Pmode);
4113 src_addr = gen_reg_rtx (Pmode);
4114 count = gen_reg_rtx (mode);
4115 blocks = gen_reg_rtx (mode);
4116
4117 convert_move (count, len, 1);
4118 emit_cmp_and_jump_insns (count, const0_rtx,
4119 EQ, NULL_RTX, mode, 1, end_label);
4120
4121 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4122 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4123 dst = change_address (dst, VOIDmode, dst_addr);
4124 src = change_address (src, VOIDmode, src_addr);
4125
4126 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4127 OPTAB_DIRECT);
4128 if (temp != count)
4129 emit_move_insn (count, temp);
4130
4131 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4132 OPTAB_DIRECT);
4133 if (temp != blocks)
4134 emit_move_insn (blocks, temp);
4135
4136 emit_cmp_and_jump_insns (blocks, const0_rtx,
4137 EQ, NULL_RTX, mode, 1, loop_end_label);
4138
4139 emit_label (loop_start_label);
4140
4141 if (TARGET_Z10
4142 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4143 {
4144 rtx prefetch;
4145
4146 /* Issue a read prefetch for the +3 cache line. */
4147 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4148 const0_rtx, const0_rtx);
4149 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4150 emit_insn (prefetch);
4151
4152 /* Issue a write prefetch for the +3 cache line. */
4153 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4154 const1_rtx, const0_rtx);
4155 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4156 emit_insn (prefetch);
4157 }
4158
4159 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4160 s390_load_address (dst_addr,
4161 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4162 s390_load_address (src_addr,
4163 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4164
4165 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4166 OPTAB_DIRECT);
4167 if (temp != blocks)
4168 emit_move_insn (blocks, temp);
4169
4170 emit_cmp_and_jump_insns (blocks, const0_rtx,
4171 EQ, NULL_RTX, mode, 1, loop_end_label);
4172
4173 emit_jump (loop_start_label);
4174 emit_label (loop_end_label);
4175
4176 emit_insn (gen_movmem_short (dst, src,
4177 convert_to_mode (Pmode, count, 1)));
4178 emit_label (end_label);
4179 }
4180 return true;
4181 }
4182
4183 /* Emit code to set LEN bytes at DST to VAL.
4184 Make use of clrmem if VAL is zero. */
4185
4186 void
4187 s390_expand_setmem (rtx dst, rtx len, rtx val)
4188 {
4189 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4190 return;
4191
4192 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4193
4194 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4195 {
4196 if (val == const0_rtx && INTVAL (len) <= 256)
4197 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4198 else
4199 {
4200 /* Initialize memory by storing the first byte. */
4201 emit_move_insn (adjust_address (dst, QImode, 0), val);
4202
4203 if (INTVAL (len) > 1)
4204 {
4205 /* Initiate 1 byte overlap move.
4206 The first byte of DST is propagated through DSTP1.
4207 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4208 DST is set to size 1 so the rest of the memory location
4209 does not count as source operand. */
4210 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4211 set_mem_size (dst, 1);
4212
4213 emit_insn (gen_movmem_short (dstp1, dst,
4214 GEN_INT (INTVAL (len) - 2)));
4215 }
4216 }
4217 }
4218
4219 else if (TARGET_MVCLE)
4220 {
4221 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4222 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4223 }
4224
4225 else
4226 {
4227 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4228 rtx loop_start_label = gen_label_rtx ();
4229 rtx loop_end_label = gen_label_rtx ();
4230 rtx end_label = gen_label_rtx ();
4231 enum machine_mode mode;
4232
4233 mode = GET_MODE (len);
4234 if (mode == VOIDmode)
4235 mode = Pmode;
4236
4237 dst_addr = gen_reg_rtx (Pmode);
4238 count = gen_reg_rtx (mode);
4239 blocks = gen_reg_rtx (mode);
4240
4241 convert_move (count, len, 1);
4242 emit_cmp_and_jump_insns (count, const0_rtx,
4243 EQ, NULL_RTX, mode, 1, end_label);
4244
4245 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4246 dst = change_address (dst, VOIDmode, dst_addr);
4247
4248 if (val == const0_rtx)
4249 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4250 OPTAB_DIRECT);
4251 else
4252 {
4253 dstp1 = adjust_address (dst, VOIDmode, 1);
4254 set_mem_size (dst, 1);
4255
4256 /* Initialize memory by storing the first byte. */
4257 emit_move_insn (adjust_address (dst, QImode, 0), val);
4258
4259 /* If count is 1 we are done. */
4260 emit_cmp_and_jump_insns (count, const1_rtx,
4261 EQ, NULL_RTX, mode, 1, end_label);
4262
4263 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4264 OPTAB_DIRECT);
4265 }
4266 if (temp != count)
4267 emit_move_insn (count, temp);
4268
4269 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4270 OPTAB_DIRECT);
4271 if (temp != blocks)
4272 emit_move_insn (blocks, temp);
4273
4274 emit_cmp_and_jump_insns (blocks, const0_rtx,
4275 EQ, NULL_RTX, mode, 1, loop_end_label);
4276
4277 emit_label (loop_start_label);
4278
4279 if (TARGET_Z10
4280 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4281 {
4282 /* Issue a write prefetch for the +4 cache line. */
4283 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4284 GEN_INT (1024)),
4285 const1_rtx, const0_rtx);
4286 emit_insn (prefetch);
4287 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4288 }
4289
4290 if (val == const0_rtx)
4291 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4292 else
4293 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4294 s390_load_address (dst_addr,
4295 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4296
4297 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4298 OPTAB_DIRECT);
4299 if (temp != blocks)
4300 emit_move_insn (blocks, temp);
4301
4302 emit_cmp_and_jump_insns (blocks, const0_rtx,
4303 EQ, NULL_RTX, mode, 1, loop_end_label);
4304
4305 emit_jump (loop_start_label);
4306 emit_label (loop_end_label);
4307
4308 if (val == const0_rtx)
4309 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4310 else
4311 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4312 emit_label (end_label);
4313 }
4314 }
4315
4316 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4317 and return the result in TARGET. */
4318
4319 bool
4320 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4321 {
4322 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4323 rtx tmp;
4324
4325 /* When tuning for z10 or higher we rely on the Glibc functions to
4326 do the right thing. Only for constant lengths below 64k we will
4327 generate inline code. */
4328 if (s390_tune >= PROCESSOR_2097_Z10
4329 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4330 return false;
4331
4332 /* As the result of CMPINT is inverted compared to what we need,
4333 we have to swap the operands. */
4334 tmp = op0; op0 = op1; op1 = tmp;
4335
4336 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4337 {
4338 if (INTVAL (len) > 0)
4339 {
4340 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4341 emit_insn (gen_cmpint (target, ccreg));
4342 }
4343 else
4344 emit_move_insn (target, const0_rtx);
4345 }
4346 else if (TARGET_MVCLE)
4347 {
4348 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4349 emit_insn (gen_cmpint (target, ccreg));
4350 }
4351 else
4352 {
4353 rtx addr0, addr1, count, blocks, temp;
4354 rtx loop_start_label = gen_label_rtx ();
4355 rtx loop_end_label = gen_label_rtx ();
4356 rtx end_label = gen_label_rtx ();
4357 enum machine_mode mode;
4358
4359 mode = GET_MODE (len);
4360 if (mode == VOIDmode)
4361 mode = Pmode;
4362
4363 addr0 = gen_reg_rtx (Pmode);
4364 addr1 = gen_reg_rtx (Pmode);
4365 count = gen_reg_rtx (mode);
4366 blocks = gen_reg_rtx (mode);
4367
4368 convert_move (count, len, 1);
4369 emit_cmp_and_jump_insns (count, const0_rtx,
4370 EQ, NULL_RTX, mode, 1, end_label);
4371
4372 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4373 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4374 op0 = change_address (op0, VOIDmode, addr0);
4375 op1 = change_address (op1, VOIDmode, addr1);
4376
4377 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4378 OPTAB_DIRECT);
4379 if (temp != count)
4380 emit_move_insn (count, temp);
4381
4382 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4383 OPTAB_DIRECT);
4384 if (temp != blocks)
4385 emit_move_insn (blocks, temp);
4386
4387 emit_cmp_and_jump_insns (blocks, const0_rtx,
4388 EQ, NULL_RTX, mode, 1, loop_end_label);
4389
4390 emit_label (loop_start_label);
4391
4392 if (TARGET_Z10
4393 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4394 {
4395 rtx prefetch;
4396
4397 /* Issue a read prefetch for the +2 cache line of operand 1. */
4398 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4399 const0_rtx, const0_rtx);
4400 emit_insn (prefetch);
4401 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4402
4403 /* Issue a read prefetch for the +2 cache line of operand 2. */
4404 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4405 const0_rtx, const0_rtx);
4406 emit_insn (prefetch);
4407 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4408 }
4409
4410 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4411 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4412 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4413 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4414 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4415 emit_jump_insn (temp);
4416
4417 s390_load_address (addr0,
4418 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4419 s390_load_address (addr1,
4420 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4421
4422 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4423 OPTAB_DIRECT);
4424 if (temp != blocks)
4425 emit_move_insn (blocks, temp);
4426
4427 emit_cmp_and_jump_insns (blocks, const0_rtx,
4428 EQ, NULL_RTX, mode, 1, loop_end_label);
4429
4430 emit_jump (loop_start_label);
4431 emit_label (loop_end_label);
4432
4433 emit_insn (gen_cmpmem_short (op0, op1,
4434 convert_to_mode (Pmode, count, 1)));
4435 emit_label (end_label);
4436
4437 emit_insn (gen_cmpint (target, ccreg));
4438 }
4439 return true;
4440 }
4441
4442
4443 /* Expand conditional increment or decrement using alc/slb instructions.
4444 Should generate code setting DST to either SRC or SRC + INCREMENT,
4445 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4446 Returns true if successful, false otherwise.
4447
4448 That makes it possible to implement some if-constructs without jumps e.g.:
4449 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4450 unsigned int a, b, c;
4451 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4452 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4453 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4454 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4455
4456 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4457 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4458 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4459 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4460 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4461
4462 bool
4463 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4464 rtx dst, rtx src, rtx increment)
4465 {
4466 enum machine_mode cmp_mode;
4467 enum machine_mode cc_mode;
4468 rtx op_res;
4469 rtx insn;
4470 rtvec p;
4471 int ret;
4472
4473 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4474 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4475 cmp_mode = SImode;
4476 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4477 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4478 cmp_mode = DImode;
4479 else
4480 return false;
4481
4482 /* Try ADD LOGICAL WITH CARRY. */
4483 if (increment == const1_rtx)
4484 {
4485 /* Determine CC mode to use. */
4486 if (cmp_code == EQ || cmp_code == NE)
4487 {
4488 if (cmp_op1 != const0_rtx)
4489 {
4490 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4491 NULL_RTX, 0, OPTAB_WIDEN);
4492 cmp_op1 = const0_rtx;
4493 }
4494
4495 cmp_code = cmp_code == EQ ? LEU : GTU;
4496 }
4497
4498 if (cmp_code == LTU || cmp_code == LEU)
4499 {
4500 rtx tem = cmp_op0;
4501 cmp_op0 = cmp_op1;
4502 cmp_op1 = tem;
4503 cmp_code = swap_condition (cmp_code);
4504 }
4505
4506 switch (cmp_code)
4507 {
4508 case GTU:
4509 cc_mode = CCUmode;
4510 break;
4511
4512 case GEU:
4513 cc_mode = CCL3mode;
4514 break;
4515
4516 default:
4517 return false;
4518 }
4519
4520 /* Emit comparison instruction pattern. */
4521 if (!register_operand (cmp_op0, cmp_mode))
4522 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4523
4524 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4525 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4526 /* We use insn_invalid_p here to add clobbers if required. */
4527 ret = insn_invalid_p (emit_insn (insn), false);
4528 gcc_assert (!ret);
4529
4530 /* Emit ALC instruction pattern. */
4531 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4532 gen_rtx_REG (cc_mode, CC_REGNUM),
4533 const0_rtx);
4534
4535 if (src != const0_rtx)
4536 {
4537 if (!register_operand (src, GET_MODE (dst)))
4538 src = force_reg (GET_MODE (dst), src);
4539
4540 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4541 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4542 }
4543
4544 p = rtvec_alloc (2);
4545 RTVEC_ELT (p, 0) =
4546 gen_rtx_SET (VOIDmode, dst, op_res);
4547 RTVEC_ELT (p, 1) =
4548 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4549 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4550
4551 return true;
4552 }
4553
4554 /* Try SUBTRACT LOGICAL WITH BORROW. */
4555 if (increment == constm1_rtx)
4556 {
4557 /* Determine CC mode to use. */
4558 if (cmp_code == EQ || cmp_code == NE)
4559 {
4560 if (cmp_op1 != const0_rtx)
4561 {
4562 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4563 NULL_RTX, 0, OPTAB_WIDEN);
4564 cmp_op1 = const0_rtx;
4565 }
4566
4567 cmp_code = cmp_code == EQ ? LEU : GTU;
4568 }
4569
4570 if (cmp_code == GTU || cmp_code == GEU)
4571 {
4572 rtx tem = cmp_op0;
4573 cmp_op0 = cmp_op1;
4574 cmp_op1 = tem;
4575 cmp_code = swap_condition (cmp_code);
4576 }
4577
4578 switch (cmp_code)
4579 {
4580 case LEU:
4581 cc_mode = CCUmode;
4582 break;
4583
4584 case LTU:
4585 cc_mode = CCL3mode;
4586 break;
4587
4588 default:
4589 return false;
4590 }
4591
4592 /* Emit comparison instruction pattern. */
4593 if (!register_operand (cmp_op0, cmp_mode))
4594 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4595
4596 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4597 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4598 /* We use insn_invalid_p here to add clobbers if required. */
4599 ret = insn_invalid_p (emit_insn (insn), false);
4600 gcc_assert (!ret);
4601
4602 /* Emit SLB instruction pattern. */
4603 if (!register_operand (src, GET_MODE (dst)))
4604 src = force_reg (GET_MODE (dst), src);
4605
4606 op_res = gen_rtx_MINUS (GET_MODE (dst),
4607 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4608 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4609 gen_rtx_REG (cc_mode, CC_REGNUM),
4610 const0_rtx));
4611 p = rtvec_alloc (2);
4612 RTVEC_ELT (p, 0) =
4613 gen_rtx_SET (VOIDmode, dst, op_res);
4614 RTVEC_ELT (p, 1) =
4615 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4616 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4617
4618 return true;
4619 }
4620
4621 return false;
4622 }
4623
4624 /* Expand code for the insv template. Return true if successful. */
4625
4626 bool
4627 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4628 {
4629 int bitsize = INTVAL (op1);
4630 int bitpos = INTVAL (op2);
4631 enum machine_mode mode = GET_MODE (dest);
4632 enum machine_mode smode;
4633 int smode_bsize, mode_bsize;
4634 rtx op, clobber;
4635
4636 /* Generate INSERT IMMEDIATE (IILL et al). */
4637 /* (set (ze (reg)) (const_int)). */
4638 if (TARGET_ZARCH
4639 && register_operand (dest, word_mode)
4640 && (bitpos % 16) == 0
4641 && (bitsize % 16) == 0
4642 && const_int_operand (src, VOIDmode))
4643 {
4644 HOST_WIDE_INT val = INTVAL (src);
4645 int regpos = bitpos + bitsize;
4646
4647 while (regpos > bitpos)
4648 {
4649 enum machine_mode putmode;
4650 int putsize;
4651
4652 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4653 putmode = SImode;
4654 else
4655 putmode = HImode;
4656
4657 putsize = GET_MODE_BITSIZE (putmode);
4658 regpos -= putsize;
4659 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4660 GEN_INT (putsize),
4661 GEN_INT (regpos)),
4662 gen_int_mode (val, putmode));
4663 val >>= putsize;
4664 }
4665 gcc_assert (regpos == bitpos);
4666 return true;
4667 }
4668
4669 smode = smallest_mode_for_size (bitsize, MODE_INT);
4670 smode_bsize = GET_MODE_BITSIZE (smode);
4671 mode_bsize = GET_MODE_BITSIZE (mode);
4672
4673 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4674 if (bitpos == 0
4675 && (bitsize % BITS_PER_UNIT) == 0
4676 && MEM_P (dest)
4677 && (register_operand (src, word_mode)
4678 || const_int_operand (src, VOIDmode)))
4679 {
4680 /* Emit standard pattern if possible. */
4681 if (smode_bsize == bitsize)
4682 {
4683 emit_move_insn (adjust_address (dest, smode, 0),
4684 gen_lowpart (smode, src));
4685 return true;
4686 }
4687
4688 /* (set (ze (mem)) (const_int)). */
4689 else if (const_int_operand (src, VOIDmode))
4690 {
4691 int size = bitsize / BITS_PER_UNIT;
4692 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4693 BLKmode,
4694 UNITS_PER_WORD - size);
4695
4696 dest = adjust_address (dest, BLKmode, 0);
4697 set_mem_size (dest, size);
4698 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4699 return true;
4700 }
4701
4702 /* (set (ze (mem)) (reg)). */
4703 else if (register_operand (src, word_mode))
4704 {
4705 if (bitsize <= 32)
4706 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4707 const0_rtx), src);
4708 else
4709 {
4710 /* Emit st,stcmh sequence. */
4711 int stcmh_width = bitsize - 32;
4712 int size = stcmh_width / BITS_PER_UNIT;
4713
4714 emit_move_insn (adjust_address (dest, SImode, size),
4715 gen_lowpart (SImode, src));
4716 set_mem_size (dest, size);
4717 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4718 GEN_INT (stcmh_width),
4719 const0_rtx),
4720 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4721 }
4722 return true;
4723 }
4724 }
4725
4726 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4727 if ((bitpos % BITS_PER_UNIT) == 0
4728 && (bitsize % BITS_PER_UNIT) == 0
4729 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4730 && MEM_P (src)
4731 && (mode == DImode || mode == SImode)
4732 && register_operand (dest, mode))
4733 {
4734 /* Emit a strict_low_part pattern if possible. */
4735 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4736 {
4737 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4738 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4739 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4740 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4741 return true;
4742 }
4743
4744 /* ??? There are more powerful versions of ICM that are not
4745 completely represented in the md file. */
4746 }
4747
4748 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4749 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4750 {
4751 enum machine_mode mode_s = GET_MODE (src);
4752
4753 if (mode_s == VOIDmode)
4754 {
4755 /* Assume const_int etc already in the proper mode. */
4756 src = force_reg (mode, src);
4757 }
4758 else if (mode_s != mode)
4759 {
4760 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4761 src = force_reg (mode_s, src);
4762 src = gen_lowpart (mode, src);
4763 }
4764
4765 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4766 op = gen_rtx_SET (VOIDmode, op, src);
4767
4768 if (!TARGET_ZEC12)
4769 {
4770 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4771 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4772 }
4773 emit_insn (op);
4774
4775 return true;
4776 }
4777
4778 return false;
4779 }
4780
4781 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4782 register that holds VAL of mode MODE shifted by COUNT bits. */
4783
4784 static inline rtx
4785 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4786 {
4787 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4788 NULL_RTX, 1, OPTAB_DIRECT);
4789 return expand_simple_binop (SImode, ASHIFT, val, count,
4790 NULL_RTX, 1, OPTAB_DIRECT);
4791 }
4792
4793 /* Structure to hold the initial parameters for a compare_and_swap operation
4794 in HImode and QImode. */
4795
4796 struct alignment_context
4797 {
4798 rtx memsi; /* SI aligned memory location. */
4799 rtx shift; /* Bit offset with regard to lsb. */
4800 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4801 rtx modemaski; /* ~modemask */
4802 bool aligned; /* True if memory is aligned, false else. */
4803 };
4804
4805 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4806 structure AC for transparent simplifying, if the memory alignment is known
4807 to be at least 32bit. MEM is the memory location for the actual operation
4808 and MODE its mode. */
4809
4810 static void
4811 init_alignment_context (struct alignment_context *ac, rtx mem,
4812 enum machine_mode mode)
4813 {
4814 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4815 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4816
4817 if (ac->aligned)
4818 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4819 else
4820 {
4821 /* Alignment is unknown. */
4822 rtx byteoffset, addr, align;
4823
4824 /* Force the address into a register. */
4825 addr = force_reg (Pmode, XEXP (mem, 0));
4826
4827 /* Align it to SImode. */
4828 align = expand_simple_binop (Pmode, AND, addr,
4829 GEN_INT (-GET_MODE_SIZE (SImode)),
4830 NULL_RTX, 1, OPTAB_DIRECT);
4831 /* Generate MEM. */
4832 ac->memsi = gen_rtx_MEM (SImode, align);
4833 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4834 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4835 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4836
4837 /* Calculate shiftcount. */
4838 byteoffset = expand_simple_binop (Pmode, AND, addr,
4839 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4840 NULL_RTX, 1, OPTAB_DIRECT);
4841 /* As we already have some offset, evaluate the remaining distance. */
4842 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4843 NULL_RTX, 1, OPTAB_DIRECT);
4844 }
4845
4846 /* Shift is the byte count, but we need the bitcount. */
4847 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4848 NULL_RTX, 1, OPTAB_DIRECT);
4849
4850 /* Calculate masks. */
4851 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4852 GEN_INT (GET_MODE_MASK (mode)),
4853 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4854 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4855 NULL_RTX, 1);
4856 }
4857
4858 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4859 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4860 perform the merge in SEQ2. */
4861
4862 static rtx
4863 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4864 enum machine_mode mode, rtx val, rtx ins)
4865 {
4866 rtx tmp;
4867
4868 if (ac->aligned)
4869 {
4870 start_sequence ();
4871 tmp = copy_to_mode_reg (SImode, val);
4872 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4873 const0_rtx, ins))
4874 {
4875 *seq1 = NULL;
4876 *seq2 = get_insns ();
4877 end_sequence ();
4878 return tmp;
4879 }
4880 end_sequence ();
4881 }
4882
4883 /* Failed to use insv. Generate a two part shift and mask. */
4884 start_sequence ();
4885 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4886 *seq1 = get_insns ();
4887 end_sequence ();
4888
4889 start_sequence ();
4890 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4891 *seq2 = get_insns ();
4892 end_sequence ();
4893
4894 return tmp;
4895 }
4896
4897 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4898 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4899 value to set if CMP == MEM. */
4900
4901 void
4902 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4903 rtx cmp, rtx new_rtx, bool is_weak)
4904 {
4905 struct alignment_context ac;
4906 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4907 rtx res = gen_reg_rtx (SImode);
4908 rtx csloop = NULL, csend = NULL;
4909
4910 gcc_assert (MEM_P (mem));
4911
4912 init_alignment_context (&ac, mem, mode);
4913
4914 /* Load full word. Subsequent loads are performed by CS. */
4915 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4916 NULL_RTX, 1, OPTAB_DIRECT);
4917
4918 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4919 possible, we try to use insv to make this happen efficiently. If
4920 that fails we'll generate code both inside and outside the loop. */
4921 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4922 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4923
4924 if (seq0)
4925 emit_insn (seq0);
4926 if (seq1)
4927 emit_insn (seq1);
4928
4929 /* Start CS loop. */
4930 if (!is_weak)
4931 {
4932 /* Begin assuming success. */
4933 emit_move_insn (btarget, const1_rtx);
4934
4935 csloop = gen_label_rtx ();
4936 csend = gen_label_rtx ();
4937 emit_label (csloop);
4938 }
4939
4940 /* val = "<mem>00..0<mem>"
4941 * cmp = "00..0<cmp>00..0"
4942 * new = "00..0<new>00..0"
4943 */
4944
4945 emit_insn (seq2);
4946 emit_insn (seq3);
4947
4948 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4949 if (is_weak)
4950 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4951 else
4952 {
4953 rtx tmp;
4954
4955 /* Jump to end if we're done (likely?). */
4956 s390_emit_jump (csend, cc);
4957
4958 /* Check for changes outside mode, and loop internal if so.
4959 Arrange the moves so that the compare is adjacent to the
4960 branch so that we can generate CRJ. */
4961 tmp = copy_to_reg (val);
4962 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4963 1, OPTAB_DIRECT);
4964 cc = s390_emit_compare (NE, val, tmp);
4965 s390_emit_jump (csloop, cc);
4966
4967 /* Failed. */
4968 emit_move_insn (btarget, const0_rtx);
4969 emit_label (csend);
4970 }
4971
4972 /* Return the correct part of the bitfield. */
4973 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4974 NULL_RTX, 1, OPTAB_DIRECT), 1);
4975 }
4976
4977 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4978 and VAL the value to play with. If AFTER is true then store the value
4979 MEM holds after the operation, if AFTER is false then store the value MEM
4980 holds before the operation. If TARGET is zero then discard that value, else
4981 store it to TARGET. */
4982
4983 void
4984 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
4985 rtx target, rtx mem, rtx val, bool after)
4986 {
4987 struct alignment_context ac;
4988 rtx cmp;
4989 rtx new_rtx = gen_reg_rtx (SImode);
4990 rtx orig = gen_reg_rtx (SImode);
4991 rtx csloop = gen_label_rtx ();
4992
4993 gcc_assert (!target || register_operand (target, VOIDmode));
4994 gcc_assert (MEM_P (mem));
4995
4996 init_alignment_context (&ac, mem, mode);
4997
4998 /* Shift val to the correct bit positions.
4999 Preserve "icm", but prevent "ex icm". */
5000 if (!(ac.aligned && code == SET && MEM_P (val)))
5001 val = s390_expand_mask_and_shift (val, mode, ac.shift);
5002
5003 /* Further preparation insns. */
5004 if (code == PLUS || code == MINUS)
5005 emit_move_insn (orig, val);
5006 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
5007 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
5008 NULL_RTX, 1, OPTAB_DIRECT);
5009
5010 /* Load full word. Subsequent loads are performed by CS. */
5011 cmp = force_reg (SImode, ac.memsi);
5012
5013 /* Start CS loop. */
5014 emit_label (csloop);
5015 emit_move_insn (new_rtx, cmp);
5016
5017 /* Patch new with val at correct position. */
5018 switch (code)
5019 {
5020 case PLUS:
5021 case MINUS:
5022 val = expand_simple_binop (SImode, code, new_rtx, orig,
5023 NULL_RTX, 1, OPTAB_DIRECT);
5024 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5025 NULL_RTX, 1, OPTAB_DIRECT);
5026 /* FALLTHRU */
5027 case SET:
5028 if (ac.aligned && MEM_P (val))
5029 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5030 0, 0, SImode, val);
5031 else
5032 {
5033 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5034 NULL_RTX, 1, OPTAB_DIRECT);
5035 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5036 NULL_RTX, 1, OPTAB_DIRECT);
5037 }
5038 break;
5039 case AND:
5040 case IOR:
5041 case XOR:
5042 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5043 NULL_RTX, 1, OPTAB_DIRECT);
5044 break;
5045 case MULT: /* NAND */
5046 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5047 NULL_RTX, 1, OPTAB_DIRECT);
5048 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5049 NULL_RTX, 1, OPTAB_DIRECT);
5050 break;
5051 default:
5052 gcc_unreachable ();
5053 }
5054
5055 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5056 ac.memsi, cmp, new_rtx));
5057
5058 /* Return the correct part of the bitfield. */
5059 if (target)
5060 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5061 after ? new_rtx : cmp, ac.shift,
5062 NULL_RTX, 1, OPTAB_DIRECT), 1);
5063 }
5064
5065 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5066 We need to emit DTP-relative relocations. */
5067
5068 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5069
5070 static void
5071 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5072 {
5073 switch (size)
5074 {
5075 case 4:
5076 fputs ("\t.long\t", file);
5077 break;
5078 case 8:
5079 fputs ("\t.quad\t", file);
5080 break;
5081 default:
5082 gcc_unreachable ();
5083 }
5084 output_addr_const (file, x);
5085 fputs ("@DTPOFF", file);
5086 }
5087
5088 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5089 /* Implement TARGET_MANGLE_TYPE. */
5090
5091 static const char *
5092 s390_mangle_type (const_tree type)
5093 {
5094 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5095 && TARGET_LONG_DOUBLE_128)
5096 return "g";
5097
5098 /* For all other types, use normal C++ mangling. */
5099 return NULL;
5100 }
5101 #endif
5102
5103 /* In the name of slightly smaller debug output, and to cater to
5104 general assembler lossage, recognize various UNSPEC sequences
5105 and turn them back into a direct symbol reference. */
5106
5107 static rtx
5108 s390_delegitimize_address (rtx orig_x)
5109 {
5110 rtx x, y;
5111
5112 orig_x = delegitimize_mem_from_attrs (orig_x);
5113 x = orig_x;
5114
5115 /* Extract the symbol ref from:
5116 (plus:SI (reg:SI 12 %r12)
5117 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5118 UNSPEC_GOTOFF/PLTOFF)))
5119 and
5120 (plus:SI (reg:SI 12 %r12)
5121 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5122 UNSPEC_GOTOFF/PLTOFF)
5123 (const_int 4 [0x4])))) */
5124 if (GET_CODE (x) == PLUS
5125 && REG_P (XEXP (x, 0))
5126 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5127 && GET_CODE (XEXP (x, 1)) == CONST)
5128 {
5129 HOST_WIDE_INT offset = 0;
5130
5131 /* The const operand. */
5132 y = XEXP (XEXP (x, 1), 0);
5133
5134 if (GET_CODE (y) == PLUS
5135 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5136 {
5137 offset = INTVAL (XEXP (y, 1));
5138 y = XEXP (y, 0);
5139 }
5140
5141 if (GET_CODE (y) == UNSPEC
5142 && (XINT (y, 1) == UNSPEC_GOTOFF
5143 || XINT (y, 1) == UNSPEC_PLTOFF))
5144 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5145 }
5146
5147 if (GET_CODE (x) != MEM)
5148 return orig_x;
5149
5150 x = XEXP (x, 0);
5151 if (GET_CODE (x) == PLUS
5152 && GET_CODE (XEXP (x, 1)) == CONST
5153 && GET_CODE (XEXP (x, 0)) == REG
5154 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5155 {
5156 y = XEXP (XEXP (x, 1), 0);
5157 if (GET_CODE (y) == UNSPEC
5158 && XINT (y, 1) == UNSPEC_GOT)
5159 y = XVECEXP (y, 0, 0);
5160 else
5161 return orig_x;
5162 }
5163 else if (GET_CODE (x) == CONST)
5164 {
5165 /* Extract the symbol ref from:
5166 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5167 UNSPEC_PLT/GOTENT))) */
5168
5169 y = XEXP (x, 0);
5170 if (GET_CODE (y) == UNSPEC
5171 && (XINT (y, 1) == UNSPEC_GOTENT
5172 || XINT (y, 1) == UNSPEC_PLT))
5173 y = XVECEXP (y, 0, 0);
5174 else
5175 return orig_x;
5176 }
5177 else
5178 return orig_x;
5179
5180 if (GET_MODE (orig_x) != Pmode)
5181 {
5182 if (GET_MODE (orig_x) == BLKmode)
5183 return orig_x;
5184 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5185 if (y == NULL_RTX)
5186 return orig_x;
5187 }
5188 return y;
5189 }
5190
5191 /* Output operand OP to stdio stream FILE.
5192 OP is an address (register + offset) which is not used to address data;
5193 instead the rightmost bits are interpreted as the value. */
5194
5195 static void
5196 print_shift_count_operand (FILE *file, rtx op)
5197 {
5198 HOST_WIDE_INT offset;
5199 rtx base;
5200
5201 /* Extract base register and offset. */
5202 if (!s390_decompose_shift_count (op, &base, &offset))
5203 gcc_unreachable ();
5204
5205 /* Sanity check. */
5206 if (base)
5207 {
5208 gcc_assert (GET_CODE (base) == REG);
5209 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5210 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5211 }
5212
5213 /* Offsets are constricted to twelve bits. */
5214 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5215 if (base)
5216 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5217 }
5218
5219 /* See 'get_some_local_dynamic_name'. */
5220
5221 static int
5222 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5223 {
5224 rtx x = *px;
5225
5226 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5227 {
5228 x = get_pool_constant (x);
5229 return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
5230 }
5231
5232 if (GET_CODE (x) == SYMBOL_REF
5233 && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
5234 {
5235 cfun->machine->some_ld_name = XSTR (x, 0);
5236 return 1;
5237 }
5238
5239 return 0;
5240 }
5241
5242 /* Locate some local-dynamic symbol still in use by this function
5243 so that we can print its name in local-dynamic base patterns. */
5244
5245 static const char *
5246 get_some_local_dynamic_name (void)
5247 {
5248 rtx insn;
5249
5250 if (cfun->machine->some_ld_name)
5251 return cfun->machine->some_ld_name;
5252
5253 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5254 if (INSN_P (insn)
5255 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5256 return cfun->machine->some_ld_name;
5257
5258 gcc_unreachable ();
5259 }
5260
5261 /* Output machine-dependent UNSPECs occurring in address constant X
5262 in assembler syntax to stdio stream FILE. Returns true if the
5263 constant X could be recognized, false otherwise. */
5264
5265 static bool
5266 s390_output_addr_const_extra (FILE *file, rtx x)
5267 {
5268 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5269 switch (XINT (x, 1))
5270 {
5271 case UNSPEC_GOTENT:
5272 output_addr_const (file, XVECEXP (x, 0, 0));
5273 fprintf (file, "@GOTENT");
5274 return true;
5275 case UNSPEC_GOT:
5276 output_addr_const (file, XVECEXP (x, 0, 0));
5277 fprintf (file, "@GOT");
5278 return true;
5279 case UNSPEC_GOTOFF:
5280 output_addr_const (file, XVECEXP (x, 0, 0));
5281 fprintf (file, "@GOTOFF");
5282 return true;
5283 case UNSPEC_PLT:
5284 output_addr_const (file, XVECEXP (x, 0, 0));
5285 fprintf (file, "@PLT");
5286 return true;
5287 case UNSPEC_PLTOFF:
5288 output_addr_const (file, XVECEXP (x, 0, 0));
5289 fprintf (file, "@PLTOFF");
5290 return true;
5291 case UNSPEC_TLSGD:
5292 output_addr_const (file, XVECEXP (x, 0, 0));
5293 fprintf (file, "@TLSGD");
5294 return true;
5295 case UNSPEC_TLSLDM:
5296 assemble_name (file, get_some_local_dynamic_name ());
5297 fprintf (file, "@TLSLDM");
5298 return true;
5299 case UNSPEC_DTPOFF:
5300 output_addr_const (file, XVECEXP (x, 0, 0));
5301 fprintf (file, "@DTPOFF");
5302 return true;
5303 case UNSPEC_NTPOFF:
5304 output_addr_const (file, XVECEXP (x, 0, 0));
5305 fprintf (file, "@NTPOFF");
5306 return true;
5307 case UNSPEC_GOTNTPOFF:
5308 output_addr_const (file, XVECEXP (x, 0, 0));
5309 fprintf (file, "@GOTNTPOFF");
5310 return true;
5311 case UNSPEC_INDNTPOFF:
5312 output_addr_const (file, XVECEXP (x, 0, 0));
5313 fprintf (file, "@INDNTPOFF");
5314 return true;
5315 }
5316
5317 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5318 switch (XINT (x, 1))
5319 {
5320 case UNSPEC_POOL_OFFSET:
5321 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5322 output_addr_const (file, x);
5323 return true;
5324 }
5325 return false;
5326 }
5327
5328 /* Output address operand ADDR in assembler syntax to
5329 stdio stream FILE. */
5330
5331 void
5332 print_operand_address (FILE *file, rtx addr)
5333 {
5334 struct s390_address ad;
5335
5336 if (s390_loadrelative_operand_p (addr, NULL, NULL))
5337 {
5338 if (!TARGET_Z10)
5339 {
5340 output_operand_lossage ("symbolic memory references are "
5341 "only supported on z10 or later");
5342 return;
5343 }
5344 output_addr_const (file, addr);
5345 return;
5346 }
5347
5348 if (!s390_decompose_address (addr, &ad)
5349 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5350 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5351 output_operand_lossage ("cannot decompose address");
5352
5353 if (ad.disp)
5354 output_addr_const (file, ad.disp);
5355 else
5356 fprintf (file, "0");
5357
5358 if (ad.base && ad.indx)
5359 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5360 reg_names[REGNO (ad.base)]);
5361 else if (ad.base)
5362 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5363 }
5364
5365 /* Output operand X in assembler syntax to stdio stream FILE.
5366 CODE specified the format flag. The following format flags
5367 are recognized:
5368
5369 'C': print opcode suffix for branch condition.
5370 'D': print opcode suffix for inverse branch condition.
5371 'E': print opcode suffix for branch on index instruction.
5372 'G': print the size of the operand in bytes.
5373 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5374 'M': print the second word of a TImode operand.
5375 'N': print the second word of a DImode operand.
5376 'O': print only the displacement of a memory reference.
5377 'R': print only the base register of a memory reference.
5378 'S': print S-type memory reference (base+displacement).
5379 'Y': print shift count operand.
5380
5381 'b': print integer X as if it's an unsigned byte.
5382 'c': print integer X as if it's an signed byte.
5383 'e': "end" of DImode contiguous bitmask X.
5384 'f': "end" of SImode contiguous bitmask X.
5385 'h': print integer X as if it's a signed halfword.
5386 'i': print the first nonzero HImode part of X.
5387 'j': print the first HImode part unequal to -1 of X.
5388 'k': print the first nonzero SImode part of X.
5389 'm': print the first SImode part unequal to -1 of X.
5390 'o': print integer X as if it's an unsigned 32bit word.
5391 's': "start" of DImode contiguous bitmask X.
5392 't': "start" of SImode contiguous bitmask X.
5393 'x': print integer X as if it's an unsigned halfword.
5394 */
5395
5396 void
5397 print_operand (FILE *file, rtx x, int code)
5398 {
5399 HOST_WIDE_INT ival;
5400
5401 switch (code)
5402 {
5403 case 'C':
5404 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5405 return;
5406
5407 case 'D':
5408 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5409 return;
5410
5411 case 'E':
5412 if (GET_CODE (x) == LE)
5413 fprintf (file, "l");
5414 else if (GET_CODE (x) == GT)
5415 fprintf (file, "h");
5416 else
5417 output_operand_lossage ("invalid comparison operator "
5418 "for 'E' output modifier");
5419 return;
5420
5421 case 'J':
5422 if (GET_CODE (x) == SYMBOL_REF)
5423 {
5424 fprintf (file, "%s", ":tls_load:");
5425 output_addr_const (file, x);
5426 }
5427 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5428 {
5429 fprintf (file, "%s", ":tls_gdcall:");
5430 output_addr_const (file, XVECEXP (x, 0, 0));
5431 }
5432 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5433 {
5434 fprintf (file, "%s", ":tls_ldcall:");
5435 assemble_name (file, get_some_local_dynamic_name ());
5436 }
5437 else
5438 output_operand_lossage ("invalid reference for 'J' output modifier");
5439 return;
5440
5441 case 'G':
5442 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5443 return;
5444
5445 case 'O':
5446 {
5447 struct s390_address ad;
5448 int ret;
5449
5450 if (!MEM_P (x))
5451 {
5452 output_operand_lossage ("memory reference expected for "
5453 "'O' output modifier");
5454 return;
5455 }
5456
5457 ret = s390_decompose_address (XEXP (x, 0), &ad);
5458
5459 if (!ret
5460 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5461 || ad.indx)
5462 {
5463 output_operand_lossage ("invalid address for 'O' output modifier");
5464 return;
5465 }
5466
5467 if (ad.disp)
5468 output_addr_const (file, ad.disp);
5469 else
5470 fprintf (file, "0");
5471 }
5472 return;
5473
5474 case 'R':
5475 {
5476 struct s390_address ad;
5477 int ret;
5478
5479 if (!MEM_P (x))
5480 {
5481 output_operand_lossage ("memory reference expected for "
5482 "'R' output modifier");
5483 return;
5484 }
5485
5486 ret = s390_decompose_address (XEXP (x, 0), &ad);
5487
5488 if (!ret
5489 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5490 || ad.indx)
5491 {
5492 output_operand_lossage ("invalid address for 'R' output modifier");
5493 return;
5494 }
5495
5496 if (ad.base)
5497 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5498 else
5499 fprintf (file, "0");
5500 }
5501 return;
5502
5503 case 'S':
5504 {
5505 struct s390_address ad;
5506 int ret;
5507
5508 if (!MEM_P (x))
5509 {
5510 output_operand_lossage ("memory reference expected for "
5511 "'S' output modifier");
5512 return;
5513 }
5514 ret = s390_decompose_address (XEXP (x, 0), &ad);
5515
5516 if (!ret
5517 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5518 || ad.indx)
5519 {
5520 output_operand_lossage ("invalid address for 'S' output modifier");
5521 return;
5522 }
5523
5524 if (ad.disp)
5525 output_addr_const (file, ad.disp);
5526 else
5527 fprintf (file, "0");
5528
5529 if (ad.base)
5530 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5531 }
5532 return;
5533
5534 case 'N':
5535 if (GET_CODE (x) == REG)
5536 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5537 else if (GET_CODE (x) == MEM)
5538 x = change_address (x, VOIDmode,
5539 plus_constant (Pmode, XEXP (x, 0), 4));
5540 else
5541 output_operand_lossage ("register or memory expression expected "
5542 "for 'N' output modifier");
5543 break;
5544
5545 case 'M':
5546 if (GET_CODE (x) == REG)
5547 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5548 else if (GET_CODE (x) == MEM)
5549 x = change_address (x, VOIDmode,
5550 plus_constant (Pmode, XEXP (x, 0), 8));
5551 else
5552 output_operand_lossage ("register or memory expression expected "
5553 "for 'M' output modifier");
5554 break;
5555
5556 case 'Y':
5557 print_shift_count_operand (file, x);
5558 return;
5559 }
5560
5561 switch (GET_CODE (x))
5562 {
5563 case REG:
5564 fprintf (file, "%s", reg_names[REGNO (x)]);
5565 break;
5566
5567 case MEM:
5568 output_address (XEXP (x, 0));
5569 break;
5570
5571 case CONST:
5572 case CODE_LABEL:
5573 case LABEL_REF:
5574 case SYMBOL_REF:
5575 output_addr_const (file, x);
5576 break;
5577
5578 case CONST_INT:
5579 ival = INTVAL (x);
5580 switch (code)
5581 {
5582 case 0:
5583 break;
5584 case 'b':
5585 ival &= 0xff;
5586 break;
5587 case 'c':
5588 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5589 break;
5590 case 'x':
5591 ival &= 0xffff;
5592 break;
5593 case 'h':
5594 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5595 break;
5596 case 'i':
5597 ival = s390_extract_part (x, HImode, 0);
5598 break;
5599 case 'j':
5600 ival = s390_extract_part (x, HImode, -1);
5601 break;
5602 case 'k':
5603 ival = s390_extract_part (x, SImode, 0);
5604 break;
5605 case 'm':
5606 ival = s390_extract_part (x, SImode, -1);
5607 break;
5608 case 'o':
5609 ival &= 0xffffffff;
5610 break;
5611 case 'e': case 'f':
5612 case 's': case 't':
5613 {
5614 int pos, len;
5615 bool ok;
5616
5617 len = (code == 's' || code == 'e' ? 64 : 32);
5618 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5619 gcc_assert (ok);
5620 if (code == 's' || code == 't')
5621 ival = 64 - pos - len;
5622 else
5623 ival = 64 - 1 - pos;
5624 }
5625 break;
5626 default:
5627 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5628 }
5629 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5630 break;
5631
5632 case CONST_DOUBLE:
5633 gcc_assert (GET_MODE (x) == VOIDmode);
5634 if (code == 'b')
5635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5636 else if (code == 'x')
5637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5638 else if (code == 'h')
5639 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5640 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5641 else
5642 {
5643 if (code == 0)
5644 output_operand_lossage ("invalid constant - try using "
5645 "an output modifier");
5646 else
5647 output_operand_lossage ("invalid constant for output modifier '%c'",
5648 code);
5649 }
5650 break;
5651
5652 default:
5653 if (code == 0)
5654 output_operand_lossage ("invalid expression - try using "
5655 "an output modifier");
5656 else
5657 output_operand_lossage ("invalid expression for output "
5658 "modifier '%c'", code);
5659 break;
5660 }
5661 }
5662
5663 /* Target hook for assembling integer objects. We need to define it
5664 here to work a round a bug in some versions of GAS, which couldn't
5665 handle values smaller than INT_MIN when printed in decimal. */
5666
5667 static bool
5668 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5669 {
5670 if (size == 8 && aligned_p
5671 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5672 {
5673 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5674 INTVAL (x));
5675 return true;
5676 }
5677 return default_assemble_integer (x, size, aligned_p);
5678 }
5679
5680 /* Returns true if register REGNO is used for forming
5681 a memory address in expression X. */
5682
5683 static bool
5684 reg_used_in_mem_p (int regno, rtx x)
5685 {
5686 enum rtx_code code = GET_CODE (x);
5687 int i, j;
5688 const char *fmt;
5689
5690 if (code == MEM)
5691 {
5692 if (refers_to_regno_p (regno, regno+1,
5693 XEXP (x, 0), 0))
5694 return true;
5695 }
5696 else if (code == SET
5697 && GET_CODE (SET_DEST (x)) == PC)
5698 {
5699 if (refers_to_regno_p (regno, regno+1,
5700 SET_SRC (x), 0))
5701 return true;
5702 }
5703
5704 fmt = GET_RTX_FORMAT (code);
5705 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5706 {
5707 if (fmt[i] == 'e'
5708 && reg_used_in_mem_p (regno, XEXP (x, i)))
5709 return true;
5710
5711 else if (fmt[i] == 'E')
5712 for (j = 0; j < XVECLEN (x, i); j++)
5713 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5714 return true;
5715 }
5716 return false;
5717 }
5718
5719 /* Returns true if expression DEP_RTX sets an address register
5720 used by instruction INSN to address memory. */
5721
5722 static bool
5723 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5724 {
5725 rtx target, pat;
5726
5727 if (GET_CODE (dep_rtx) == INSN)
5728 dep_rtx = PATTERN (dep_rtx);
5729
5730 if (GET_CODE (dep_rtx) == SET)
5731 {
5732 target = SET_DEST (dep_rtx);
5733 if (GET_CODE (target) == STRICT_LOW_PART)
5734 target = XEXP (target, 0);
5735 while (GET_CODE (target) == SUBREG)
5736 target = SUBREG_REG (target);
5737
5738 if (GET_CODE (target) == REG)
5739 {
5740 int regno = REGNO (target);
5741
5742 if (s390_safe_attr_type (insn) == TYPE_LA)
5743 {
5744 pat = PATTERN (insn);
5745 if (GET_CODE (pat) == PARALLEL)
5746 {
5747 gcc_assert (XVECLEN (pat, 0) == 2);
5748 pat = XVECEXP (pat, 0, 0);
5749 }
5750 gcc_assert (GET_CODE (pat) == SET);
5751 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5752 }
5753 else if (get_attr_atype (insn) == ATYPE_AGEN)
5754 return reg_used_in_mem_p (regno, PATTERN (insn));
5755 }
5756 }
5757 return false;
5758 }
5759
5760 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5761
5762 int
5763 s390_agen_dep_p (rtx dep_insn, rtx insn)
5764 {
5765 rtx dep_rtx = PATTERN (dep_insn);
5766 int i;
5767
5768 if (GET_CODE (dep_rtx) == SET
5769 && addr_generation_dependency_p (dep_rtx, insn))
5770 return 1;
5771 else if (GET_CODE (dep_rtx) == PARALLEL)
5772 {
5773 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5774 {
5775 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5776 return 1;
5777 }
5778 }
5779 return 0;
5780 }
5781
5782
5783 /* A C statement (sans semicolon) to update the integer scheduling priority
5784 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5785 reduce the priority to execute INSN later. Do not define this macro if
5786 you do not need to adjust the scheduling priorities of insns.
5787
5788 A STD instruction should be scheduled earlier,
5789 in order to use the bypass. */
5790 static int
5791 s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
5792 {
5793 if (! INSN_P (insn))
5794 return priority;
5795
5796 if (s390_tune != PROCESSOR_2084_Z990
5797 && s390_tune != PROCESSOR_2094_Z9_109
5798 && s390_tune != PROCESSOR_2097_Z10
5799 && s390_tune != PROCESSOR_2817_Z196
5800 && s390_tune != PROCESSOR_2827_ZEC12)
5801 return priority;
5802
5803 switch (s390_safe_attr_type (insn))
5804 {
5805 case TYPE_FSTOREDF:
5806 case TYPE_FSTORESF:
5807 priority = priority << 3;
5808 break;
5809 case TYPE_STORE:
5810 case TYPE_STM:
5811 priority = priority << 1;
5812 break;
5813 default:
5814 break;
5815 }
5816 return priority;
5817 }
5818
5819
5820 /* The number of instructions that can be issued per cycle. */
5821
5822 static int
5823 s390_issue_rate (void)
5824 {
5825 switch (s390_tune)
5826 {
5827 case PROCESSOR_2084_Z990:
5828 case PROCESSOR_2094_Z9_109:
5829 case PROCESSOR_2817_Z196:
5830 return 3;
5831 case PROCESSOR_2097_Z10:
5832 case PROCESSOR_2827_ZEC12:
5833 return 2;
5834 default:
5835 return 1;
5836 }
5837 }
5838
5839 static int
5840 s390_first_cycle_multipass_dfa_lookahead (void)
5841 {
5842 return 4;
5843 }
5844
5845 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5846 Fix up MEMs as required. */
5847
5848 static void
5849 annotate_constant_pool_refs (rtx *x)
5850 {
5851 int i, j;
5852 const char *fmt;
5853
5854 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5855 || !CONSTANT_POOL_ADDRESS_P (*x));
5856
5857 /* Literal pool references can only occur inside a MEM ... */
5858 if (GET_CODE (*x) == MEM)
5859 {
5860 rtx memref = XEXP (*x, 0);
5861
5862 if (GET_CODE (memref) == SYMBOL_REF
5863 && CONSTANT_POOL_ADDRESS_P (memref))
5864 {
5865 rtx base = cfun->machine->base_reg;
5866 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5867 UNSPEC_LTREF);
5868
5869 *x = replace_equiv_address (*x, addr);
5870 return;
5871 }
5872
5873 if (GET_CODE (memref) == CONST
5874 && GET_CODE (XEXP (memref, 0)) == PLUS
5875 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5876 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5877 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5878 {
5879 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5880 rtx sym = XEXP (XEXP (memref, 0), 0);
5881 rtx base = cfun->machine->base_reg;
5882 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5883 UNSPEC_LTREF);
5884
5885 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5886 return;
5887 }
5888 }
5889
5890 /* ... or a load-address type pattern. */
5891 if (GET_CODE (*x) == SET)
5892 {
5893 rtx addrref = SET_SRC (*x);
5894
5895 if (GET_CODE (addrref) == SYMBOL_REF
5896 && CONSTANT_POOL_ADDRESS_P (addrref))
5897 {
5898 rtx base = cfun->machine->base_reg;
5899 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5900 UNSPEC_LTREF);
5901
5902 SET_SRC (*x) = addr;
5903 return;
5904 }
5905
5906 if (GET_CODE (addrref) == CONST
5907 && GET_CODE (XEXP (addrref, 0)) == PLUS
5908 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5909 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5910 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5911 {
5912 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5913 rtx sym = XEXP (XEXP (addrref, 0), 0);
5914 rtx base = cfun->machine->base_reg;
5915 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5916 UNSPEC_LTREF);
5917
5918 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5919 return;
5920 }
5921 }
5922
5923 /* Annotate LTREL_BASE as well. */
5924 if (GET_CODE (*x) == UNSPEC
5925 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5926 {
5927 rtx base = cfun->machine->base_reg;
5928 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
5929 UNSPEC_LTREL_BASE);
5930 return;
5931 }
5932
5933 fmt = GET_RTX_FORMAT (GET_CODE (*x));
5934 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
5935 {
5936 if (fmt[i] == 'e')
5937 {
5938 annotate_constant_pool_refs (&XEXP (*x, i));
5939 }
5940 else if (fmt[i] == 'E')
5941 {
5942 for (j = 0; j < XVECLEN (*x, i); j++)
5943 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
5944 }
5945 }
5946 }
5947
5948 /* Split all branches that exceed the maximum distance.
5949 Returns true if this created a new literal pool entry. */
5950
5951 static int
5952 s390_split_branches (void)
5953 {
5954 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
5955 int new_literal = 0, ret;
5956 rtx insn, pat, tmp, target;
5957 rtx *label;
5958
5959 /* We need correct insn addresses. */
5960
5961 shorten_branches (get_insns ());
5962
5963 /* Find all branches that exceed 64KB, and split them. */
5964
5965 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5966 {
5967 if (GET_CODE (insn) != JUMP_INSN)
5968 continue;
5969
5970 pat = PATTERN (insn);
5971 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
5972 pat = XVECEXP (pat, 0, 0);
5973 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
5974 continue;
5975
5976 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
5977 {
5978 label = &SET_SRC (pat);
5979 }
5980 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
5981 {
5982 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
5983 label = &XEXP (SET_SRC (pat), 1);
5984 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
5985 label = &XEXP (SET_SRC (pat), 2);
5986 else
5987 continue;
5988 }
5989 else
5990 continue;
5991
5992 if (get_attr_length (insn) <= 4)
5993 continue;
5994
5995 /* We are going to use the return register as scratch register,
5996 make sure it will be saved/restored by the prologue/epilogue. */
5997 cfun_frame_layout.save_return_addr_p = 1;
5998
5999 if (!flag_pic)
6000 {
6001 new_literal = 1;
6002 tmp = force_const_mem (Pmode, *label);
6003 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
6004 INSN_ADDRESSES_NEW (tmp, -1);
6005 annotate_constant_pool_refs (&PATTERN (tmp));
6006
6007 target = temp_reg;
6008 }
6009 else
6010 {
6011 new_literal = 1;
6012 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6013 UNSPEC_LTREL_OFFSET);
6014 target = gen_rtx_CONST (Pmode, target);
6015 target = force_const_mem (Pmode, target);
6016 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6017 INSN_ADDRESSES_NEW (tmp, -1);
6018 annotate_constant_pool_refs (&PATTERN (tmp));
6019
6020 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6021 cfun->machine->base_reg),
6022 UNSPEC_LTREL_BASE);
6023 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6024 }
6025
6026 ret = validate_change (insn, label, target, 0);
6027 gcc_assert (ret);
6028 }
6029
6030 return new_literal;
6031 }
6032
6033
6034 /* Find an annotated literal pool symbol referenced in RTX X,
6035 and store it at REF. Will abort if X contains references to
6036 more than one such pool symbol; multiple references to the same
6037 symbol are allowed, however.
6038
6039 The rtx pointed to by REF must be initialized to NULL_RTX
6040 by the caller before calling this routine. */
6041
6042 static void
6043 find_constant_pool_ref (rtx x, rtx *ref)
6044 {
6045 int i, j;
6046 const char *fmt;
6047
6048 /* Ignore LTREL_BASE references. */
6049 if (GET_CODE (x) == UNSPEC
6050 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6051 return;
6052 /* Likewise POOL_ENTRY insns. */
6053 if (GET_CODE (x) == UNSPEC_VOLATILE
6054 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6055 return;
6056
6057 gcc_assert (GET_CODE (x) != SYMBOL_REF
6058 || !CONSTANT_POOL_ADDRESS_P (x));
6059
6060 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6061 {
6062 rtx sym = XVECEXP (x, 0, 0);
6063 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6064 && CONSTANT_POOL_ADDRESS_P (sym));
6065
6066 if (*ref == NULL_RTX)
6067 *ref = sym;
6068 else
6069 gcc_assert (*ref == sym);
6070
6071 return;
6072 }
6073
6074 fmt = GET_RTX_FORMAT (GET_CODE (x));
6075 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6076 {
6077 if (fmt[i] == 'e')
6078 {
6079 find_constant_pool_ref (XEXP (x, i), ref);
6080 }
6081 else if (fmt[i] == 'E')
6082 {
6083 for (j = 0; j < XVECLEN (x, i); j++)
6084 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6085 }
6086 }
6087 }
6088
6089 /* Replace every reference to the annotated literal pool
6090 symbol REF in X by its base plus OFFSET. */
6091
6092 static void
6093 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6094 {
6095 int i, j;
6096 const char *fmt;
6097
6098 gcc_assert (*x != ref);
6099
6100 if (GET_CODE (*x) == UNSPEC
6101 && XINT (*x, 1) == UNSPEC_LTREF
6102 && XVECEXP (*x, 0, 0) == ref)
6103 {
6104 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6105 return;
6106 }
6107
6108 if (GET_CODE (*x) == PLUS
6109 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6110 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6111 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6112 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6113 {
6114 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6115 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6116 return;
6117 }
6118
6119 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6120 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6121 {
6122 if (fmt[i] == 'e')
6123 {
6124 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6125 }
6126 else if (fmt[i] == 'E')
6127 {
6128 for (j = 0; j < XVECLEN (*x, i); j++)
6129 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6130 }
6131 }
6132 }
6133
6134 /* Check whether X contains an UNSPEC_LTREL_BASE.
6135 Return its constant pool symbol if found, NULL_RTX otherwise. */
6136
6137 static rtx
6138 find_ltrel_base (rtx x)
6139 {
6140 int i, j;
6141 const char *fmt;
6142
6143 if (GET_CODE (x) == UNSPEC
6144 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6145 return XVECEXP (x, 0, 0);
6146
6147 fmt = GET_RTX_FORMAT (GET_CODE (x));
6148 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6149 {
6150 if (fmt[i] == 'e')
6151 {
6152 rtx fnd = find_ltrel_base (XEXP (x, i));
6153 if (fnd)
6154 return fnd;
6155 }
6156 else if (fmt[i] == 'E')
6157 {
6158 for (j = 0; j < XVECLEN (x, i); j++)
6159 {
6160 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6161 if (fnd)
6162 return fnd;
6163 }
6164 }
6165 }
6166
6167 return NULL_RTX;
6168 }
6169
6170 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6171
6172 static void
6173 replace_ltrel_base (rtx *x)
6174 {
6175 int i, j;
6176 const char *fmt;
6177
6178 if (GET_CODE (*x) == UNSPEC
6179 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6180 {
6181 *x = XVECEXP (*x, 0, 1);
6182 return;
6183 }
6184
6185 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6186 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6187 {
6188 if (fmt[i] == 'e')
6189 {
6190 replace_ltrel_base (&XEXP (*x, i));
6191 }
6192 else if (fmt[i] == 'E')
6193 {
6194 for (j = 0; j < XVECLEN (*x, i); j++)
6195 replace_ltrel_base (&XVECEXP (*x, i, j));
6196 }
6197 }
6198 }
6199
6200
6201 /* We keep a list of constants which we have to add to internal
6202 constant tables in the middle of large functions. */
6203
6204 #define NR_C_MODES 11
6205 enum machine_mode constant_modes[NR_C_MODES] =
6206 {
6207 TFmode, TImode, TDmode,
6208 DFmode, DImode, DDmode,
6209 SFmode, SImode, SDmode,
6210 HImode,
6211 QImode
6212 };
6213
6214 struct constant
6215 {
6216 struct constant *next;
6217 rtx value;
6218 rtx label;
6219 };
6220
6221 struct constant_pool
6222 {
6223 struct constant_pool *next;
6224 rtx first_insn;
6225 rtx pool_insn;
6226 bitmap insns;
6227 rtx emit_pool_after;
6228
6229 struct constant *constants[NR_C_MODES];
6230 struct constant *execute;
6231 rtx label;
6232 int size;
6233 };
6234
6235 /* Allocate new constant_pool structure. */
6236
6237 static struct constant_pool *
6238 s390_alloc_pool (void)
6239 {
6240 struct constant_pool *pool;
6241 int i;
6242
6243 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6244 pool->next = NULL;
6245 for (i = 0; i < NR_C_MODES; i++)
6246 pool->constants[i] = NULL;
6247
6248 pool->execute = NULL;
6249 pool->label = gen_label_rtx ();
6250 pool->first_insn = NULL_RTX;
6251 pool->pool_insn = NULL_RTX;
6252 pool->insns = BITMAP_ALLOC (NULL);
6253 pool->size = 0;
6254 pool->emit_pool_after = NULL_RTX;
6255
6256 return pool;
6257 }
6258
6259 /* Create new constant pool covering instructions starting at INSN
6260 and chain it to the end of POOL_LIST. */
6261
6262 static struct constant_pool *
6263 s390_start_pool (struct constant_pool **pool_list, rtx insn)
6264 {
6265 struct constant_pool *pool, **prev;
6266
6267 pool = s390_alloc_pool ();
6268 pool->first_insn = insn;
6269
6270 for (prev = pool_list; *prev; prev = &(*prev)->next)
6271 ;
6272 *prev = pool;
6273
6274 return pool;
6275 }
6276
6277 /* End range of instructions covered by POOL at INSN and emit
6278 placeholder insn representing the pool. */
6279
6280 static void
6281 s390_end_pool (struct constant_pool *pool, rtx insn)
6282 {
6283 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6284
6285 if (!insn)
6286 insn = get_last_insn ();
6287
6288 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6289 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6290 }
6291
6292 /* Add INSN to the list of insns covered by POOL. */
6293
6294 static void
6295 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6296 {
6297 bitmap_set_bit (pool->insns, INSN_UID (insn));
6298 }
6299
6300 /* Return pool out of POOL_LIST that covers INSN. */
6301
6302 static struct constant_pool *
6303 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6304 {
6305 struct constant_pool *pool;
6306
6307 for (pool = pool_list; pool; pool = pool->next)
6308 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6309 break;
6310
6311 return pool;
6312 }
6313
6314 /* Add constant VAL of mode MODE to the constant pool POOL. */
6315
6316 static void
6317 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6318 {
6319 struct constant *c;
6320 int i;
6321
6322 for (i = 0; i < NR_C_MODES; i++)
6323 if (constant_modes[i] == mode)
6324 break;
6325 gcc_assert (i != NR_C_MODES);
6326
6327 for (c = pool->constants[i]; c != NULL; c = c->next)
6328 if (rtx_equal_p (val, c->value))
6329 break;
6330
6331 if (c == NULL)
6332 {
6333 c = (struct constant *) xmalloc (sizeof *c);
6334 c->value = val;
6335 c->label = gen_label_rtx ();
6336 c->next = pool->constants[i];
6337 pool->constants[i] = c;
6338 pool->size += GET_MODE_SIZE (mode);
6339 }
6340 }
6341
6342 /* Return an rtx that represents the offset of X from the start of
6343 pool POOL. */
6344
6345 static rtx
6346 s390_pool_offset (struct constant_pool *pool, rtx x)
6347 {
6348 rtx label;
6349
6350 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6351 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6352 UNSPEC_POOL_OFFSET);
6353 return gen_rtx_CONST (GET_MODE (x), x);
6354 }
6355
6356 /* Find constant VAL of mode MODE in the constant pool POOL.
6357 Return an RTX describing the distance from the start of
6358 the pool to the location of the new constant. */
6359
6360 static rtx
6361 s390_find_constant (struct constant_pool *pool, rtx val,
6362 enum machine_mode mode)
6363 {
6364 struct constant *c;
6365 int i;
6366
6367 for (i = 0; i < NR_C_MODES; i++)
6368 if (constant_modes[i] == mode)
6369 break;
6370 gcc_assert (i != NR_C_MODES);
6371
6372 for (c = pool->constants[i]; c != NULL; c = c->next)
6373 if (rtx_equal_p (val, c->value))
6374 break;
6375
6376 gcc_assert (c);
6377
6378 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6379 }
6380
6381 /* Check whether INSN is an execute. Return the label_ref to its
6382 execute target template if so, NULL_RTX otherwise. */
6383
6384 static rtx
6385 s390_execute_label (rtx insn)
6386 {
6387 if (GET_CODE (insn) == INSN
6388 && GET_CODE (PATTERN (insn)) == PARALLEL
6389 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6390 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6391 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6392
6393 return NULL_RTX;
6394 }
6395
6396 /* Add execute target for INSN to the constant pool POOL. */
6397
6398 static void
6399 s390_add_execute (struct constant_pool *pool, rtx insn)
6400 {
6401 struct constant *c;
6402
6403 for (c = pool->execute; c != NULL; c = c->next)
6404 if (INSN_UID (insn) == INSN_UID (c->value))
6405 break;
6406
6407 if (c == NULL)
6408 {
6409 c = (struct constant *) xmalloc (sizeof *c);
6410 c->value = insn;
6411 c->label = gen_label_rtx ();
6412 c->next = pool->execute;
6413 pool->execute = c;
6414 pool->size += 6;
6415 }
6416 }
6417
6418 /* Find execute target for INSN in the constant pool POOL.
6419 Return an RTX describing the distance from the start of
6420 the pool to the location of the execute target. */
6421
6422 static rtx
6423 s390_find_execute (struct constant_pool *pool, rtx insn)
6424 {
6425 struct constant *c;
6426
6427 for (c = pool->execute; c != NULL; c = c->next)
6428 if (INSN_UID (insn) == INSN_UID (c->value))
6429 break;
6430
6431 gcc_assert (c);
6432
6433 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6434 }
6435
6436 /* For an execute INSN, extract the execute target template. */
6437
6438 static rtx
6439 s390_execute_target (rtx insn)
6440 {
6441 rtx pattern = PATTERN (insn);
6442 gcc_assert (s390_execute_label (insn));
6443
6444 if (XVECLEN (pattern, 0) == 2)
6445 {
6446 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6447 }
6448 else
6449 {
6450 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6451 int i;
6452
6453 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6454 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6455
6456 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6457 }
6458
6459 return pattern;
6460 }
6461
6462 /* Indicate that INSN cannot be duplicated. This is the case for
6463 execute insns that carry a unique label. */
6464
6465 static bool
6466 s390_cannot_copy_insn_p (rtx insn)
6467 {
6468 rtx label = s390_execute_label (insn);
6469 return label && label != const0_rtx;
6470 }
6471
6472 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6473 do not emit the pool base label. */
6474
6475 static void
6476 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6477 {
6478 struct constant *c;
6479 rtx insn = pool->pool_insn;
6480 int i;
6481
6482 /* Switch to rodata section. */
6483 if (TARGET_CPU_ZARCH)
6484 {
6485 insn = emit_insn_after (gen_pool_section_start (), insn);
6486 INSN_ADDRESSES_NEW (insn, -1);
6487 }
6488
6489 /* Ensure minimum pool alignment. */
6490 if (TARGET_CPU_ZARCH)
6491 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6492 else
6493 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6494 INSN_ADDRESSES_NEW (insn, -1);
6495
6496 /* Emit pool base label. */
6497 if (!remote_label)
6498 {
6499 insn = emit_label_after (pool->label, insn);
6500 INSN_ADDRESSES_NEW (insn, -1);
6501 }
6502
6503 /* Dump constants in descending alignment requirement order,
6504 ensuring proper alignment for every constant. */
6505 for (i = 0; i < NR_C_MODES; i++)
6506 for (c = pool->constants[i]; c; c = c->next)
6507 {
6508 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6509 rtx value = copy_rtx (c->value);
6510 if (GET_CODE (value) == CONST
6511 && GET_CODE (XEXP (value, 0)) == UNSPEC
6512 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6513 && XVECLEN (XEXP (value, 0), 0) == 1)
6514 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6515
6516 insn = emit_label_after (c->label, insn);
6517 INSN_ADDRESSES_NEW (insn, -1);
6518
6519 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6520 gen_rtvec (1, value),
6521 UNSPECV_POOL_ENTRY);
6522 insn = emit_insn_after (value, insn);
6523 INSN_ADDRESSES_NEW (insn, -1);
6524 }
6525
6526 /* Ensure minimum alignment for instructions. */
6527 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6528 INSN_ADDRESSES_NEW (insn, -1);
6529
6530 /* Output in-pool execute template insns. */
6531 for (c = pool->execute; c; c = c->next)
6532 {
6533 insn = emit_label_after (c->label, insn);
6534 INSN_ADDRESSES_NEW (insn, -1);
6535
6536 insn = emit_insn_after (s390_execute_target (c->value), insn);
6537 INSN_ADDRESSES_NEW (insn, -1);
6538 }
6539
6540 /* Switch back to previous section. */
6541 if (TARGET_CPU_ZARCH)
6542 {
6543 insn = emit_insn_after (gen_pool_section_end (), insn);
6544 INSN_ADDRESSES_NEW (insn, -1);
6545 }
6546
6547 insn = emit_barrier_after (insn);
6548 INSN_ADDRESSES_NEW (insn, -1);
6549
6550 /* Remove placeholder insn. */
6551 remove_insn (pool->pool_insn);
6552 }
6553
6554 /* Free all memory used by POOL. */
6555
6556 static void
6557 s390_free_pool (struct constant_pool *pool)
6558 {
6559 struct constant *c, *next;
6560 int i;
6561
6562 for (i = 0; i < NR_C_MODES; i++)
6563 for (c = pool->constants[i]; c; c = next)
6564 {
6565 next = c->next;
6566 free (c);
6567 }
6568
6569 for (c = pool->execute; c; c = next)
6570 {
6571 next = c->next;
6572 free (c);
6573 }
6574
6575 BITMAP_FREE (pool->insns);
6576 free (pool);
6577 }
6578
6579
6580 /* Collect main literal pool. Return NULL on overflow. */
6581
6582 static struct constant_pool *
6583 s390_mainpool_start (void)
6584 {
6585 struct constant_pool *pool;
6586 rtx insn;
6587
6588 pool = s390_alloc_pool ();
6589
6590 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6591 {
6592 if (GET_CODE (insn) == INSN
6593 && GET_CODE (PATTERN (insn)) == SET
6594 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6595 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6596 {
6597 gcc_assert (!pool->pool_insn);
6598 pool->pool_insn = insn;
6599 }
6600
6601 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6602 {
6603 s390_add_execute (pool, insn);
6604 }
6605 else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
6606 {
6607 rtx pool_ref = NULL_RTX;
6608 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6609 if (pool_ref)
6610 {
6611 rtx constant = get_pool_constant (pool_ref);
6612 enum machine_mode mode = get_pool_mode (pool_ref);
6613 s390_add_constant (pool, constant, mode);
6614 }
6615 }
6616
6617 /* If hot/cold partitioning is enabled we have to make sure that
6618 the literal pool is emitted in the same section where the
6619 initialization of the literal pool base pointer takes place.
6620 emit_pool_after is only used in the non-overflow case on non
6621 Z cpus where we can emit the literal pool at the end of the
6622 function body within the text section. */
6623 if (NOTE_P (insn)
6624 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6625 && !pool->emit_pool_after)
6626 pool->emit_pool_after = PREV_INSN (insn);
6627 }
6628
6629 gcc_assert (pool->pool_insn || pool->size == 0);
6630
6631 if (pool->size >= 4096)
6632 {
6633 /* We're going to chunkify the pool, so remove the main
6634 pool placeholder insn. */
6635 remove_insn (pool->pool_insn);
6636
6637 s390_free_pool (pool);
6638 pool = NULL;
6639 }
6640
6641 /* If the functions ends with the section where the literal pool
6642 should be emitted set the marker to its end. */
6643 if (pool && !pool->emit_pool_after)
6644 pool->emit_pool_after = get_last_insn ();
6645
6646 return pool;
6647 }
6648
6649 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6650 Modify the current function to output the pool constants as well as
6651 the pool register setup instruction. */
6652
6653 static void
6654 s390_mainpool_finish (struct constant_pool *pool)
6655 {
6656 rtx base_reg = cfun->machine->base_reg;
6657 rtx insn;
6658
6659 /* If the pool is empty, we're done. */
6660 if (pool->size == 0)
6661 {
6662 /* We don't actually need a base register after all. */
6663 cfun->machine->base_reg = NULL_RTX;
6664
6665 if (pool->pool_insn)
6666 remove_insn (pool->pool_insn);
6667 s390_free_pool (pool);
6668 return;
6669 }
6670
6671 /* We need correct insn addresses. */
6672 shorten_branches (get_insns ());
6673
6674 /* On zSeries, we use a LARL to load the pool register. The pool is
6675 located in the .rodata section, so we emit it after the function. */
6676 if (TARGET_CPU_ZARCH)
6677 {
6678 insn = gen_main_base_64 (base_reg, pool->label);
6679 insn = emit_insn_after (insn, pool->pool_insn);
6680 INSN_ADDRESSES_NEW (insn, -1);
6681 remove_insn (pool->pool_insn);
6682
6683 insn = get_last_insn ();
6684 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6685 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6686
6687 s390_dump_pool (pool, 0);
6688 }
6689
6690 /* On S/390, if the total size of the function's code plus literal pool
6691 does not exceed 4096 bytes, we use BASR to set up a function base
6692 pointer, and emit the literal pool at the end of the function. */
6693 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6694 + pool->size + 8 /* alignment slop */ < 4096)
6695 {
6696 insn = gen_main_base_31_small (base_reg, pool->label);
6697 insn = emit_insn_after (insn, pool->pool_insn);
6698 INSN_ADDRESSES_NEW (insn, -1);
6699 remove_insn (pool->pool_insn);
6700
6701 insn = emit_label_after (pool->label, insn);
6702 INSN_ADDRESSES_NEW (insn, -1);
6703
6704 /* emit_pool_after will be set by s390_mainpool_start to the
6705 last insn of the section where the literal pool should be
6706 emitted. */
6707 insn = pool->emit_pool_after;
6708
6709 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6710 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6711
6712 s390_dump_pool (pool, 1);
6713 }
6714
6715 /* Otherwise, we emit an inline literal pool and use BASR to branch
6716 over it, setting up the pool register at the same time. */
6717 else
6718 {
6719 rtx pool_end = gen_label_rtx ();
6720
6721 insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
6722 insn = emit_jump_insn_after (insn, pool->pool_insn);
6723 JUMP_LABEL (insn) = pool_end;
6724 INSN_ADDRESSES_NEW (insn, -1);
6725 remove_insn (pool->pool_insn);
6726
6727 insn = emit_label_after (pool->label, insn);
6728 INSN_ADDRESSES_NEW (insn, -1);
6729
6730 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6731 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6732
6733 insn = emit_label_after (pool_end, pool->pool_insn);
6734 INSN_ADDRESSES_NEW (insn, -1);
6735
6736 s390_dump_pool (pool, 1);
6737 }
6738
6739
6740 /* Replace all literal pool references. */
6741
6742 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6743 {
6744 if (INSN_P (insn))
6745 replace_ltrel_base (&PATTERN (insn));
6746
6747 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
6748 {
6749 rtx addr, pool_ref = NULL_RTX;
6750 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6751 if (pool_ref)
6752 {
6753 if (s390_execute_label (insn))
6754 addr = s390_find_execute (pool, insn);
6755 else
6756 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6757 get_pool_mode (pool_ref));
6758
6759 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6760 INSN_CODE (insn) = -1;
6761 }
6762 }
6763 }
6764
6765
6766 /* Free the pool. */
6767 s390_free_pool (pool);
6768 }
6769
6770 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6771 We have decided we cannot use this pool, so revert all changes
6772 to the current function that were done by s390_mainpool_start. */
6773 static void
6774 s390_mainpool_cancel (struct constant_pool *pool)
6775 {
6776 /* We didn't actually change the instruction stream, so simply
6777 free the pool memory. */
6778 s390_free_pool (pool);
6779 }
6780
6781
6782 /* Chunkify the literal pool. */
6783
6784 #define S390_POOL_CHUNK_MIN 0xc00
6785 #define S390_POOL_CHUNK_MAX 0xe00
6786
6787 static struct constant_pool *
6788 s390_chunkify_start (void)
6789 {
6790 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6791 int extra_size = 0;
6792 bitmap far_labels;
6793 rtx pending_ltrel = NULL_RTX;
6794 rtx insn;
6795
6796 rtx (*gen_reload_base) (rtx, rtx) =
6797 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6798
6799
6800 /* We need correct insn addresses. */
6801
6802 shorten_branches (get_insns ());
6803
6804 /* Scan all insns and move literals to pool chunks. */
6805
6806 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6807 {
6808 bool section_switch_p = false;
6809
6810 /* Check for pending LTREL_BASE. */
6811 if (INSN_P (insn))
6812 {
6813 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6814 if (ltrel_base)
6815 {
6816 gcc_assert (ltrel_base == pending_ltrel);
6817 pending_ltrel = NULL_RTX;
6818 }
6819 }
6820
6821 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6822 {
6823 if (!curr_pool)
6824 curr_pool = s390_start_pool (&pool_list, insn);
6825
6826 s390_add_execute (curr_pool, insn);
6827 s390_add_pool_insn (curr_pool, insn);
6828 }
6829 else if (GET_CODE (insn) == INSN || CALL_P (insn))
6830 {
6831 rtx pool_ref = NULL_RTX;
6832 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6833 if (pool_ref)
6834 {
6835 rtx constant = get_pool_constant (pool_ref);
6836 enum machine_mode mode = get_pool_mode (pool_ref);
6837
6838 if (!curr_pool)
6839 curr_pool = s390_start_pool (&pool_list, insn);
6840
6841 s390_add_constant (curr_pool, constant, mode);
6842 s390_add_pool_insn (curr_pool, insn);
6843
6844 /* Don't split the pool chunk between a LTREL_OFFSET load
6845 and the corresponding LTREL_BASE. */
6846 if (GET_CODE (constant) == CONST
6847 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6848 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6849 {
6850 gcc_assert (!pending_ltrel);
6851 pending_ltrel = pool_ref;
6852 }
6853 }
6854 }
6855
6856 if (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CODE_LABEL)
6857 {
6858 if (curr_pool)
6859 s390_add_pool_insn (curr_pool, insn);
6860 /* An LTREL_BASE must follow within the same basic block. */
6861 gcc_assert (!pending_ltrel);
6862 }
6863
6864 if (NOTE_P (insn))
6865 switch (NOTE_KIND (insn))
6866 {
6867 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6868 section_switch_p = true;
6869 break;
6870 case NOTE_INSN_VAR_LOCATION:
6871 case NOTE_INSN_CALL_ARG_LOCATION:
6872 continue;
6873 default:
6874 break;
6875 }
6876
6877 if (!curr_pool
6878 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6879 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6880 continue;
6881
6882 if (TARGET_CPU_ZARCH)
6883 {
6884 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6885 continue;
6886
6887 s390_end_pool (curr_pool, NULL_RTX);
6888 curr_pool = NULL;
6889 }
6890 else
6891 {
6892 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6893 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6894 + extra_size;
6895
6896 /* We will later have to insert base register reload insns.
6897 Those will have an effect on code size, which we need to
6898 consider here. This calculation makes rather pessimistic
6899 worst-case assumptions. */
6900 if (GET_CODE (insn) == CODE_LABEL)
6901 extra_size += 6;
6902
6903 if (chunk_size < S390_POOL_CHUNK_MIN
6904 && curr_pool->size < S390_POOL_CHUNK_MIN
6905 && !section_switch_p)
6906 continue;
6907
6908 /* Pool chunks can only be inserted after BARRIERs ... */
6909 if (GET_CODE (insn) == BARRIER)
6910 {
6911 s390_end_pool (curr_pool, insn);
6912 curr_pool = NULL;
6913 extra_size = 0;
6914 }
6915
6916 /* ... so if we don't find one in time, create one. */
6917 else if (chunk_size > S390_POOL_CHUNK_MAX
6918 || curr_pool->size > S390_POOL_CHUNK_MAX
6919 || section_switch_p)
6920 {
6921 rtx label, jump, barrier, next, prev;
6922
6923 if (!section_switch_p)
6924 {
6925 /* We can insert the barrier only after a 'real' insn. */
6926 if (GET_CODE (insn) != INSN && GET_CODE (insn) != CALL_INSN)
6927 continue;
6928 if (get_attr_length (insn) == 0)
6929 continue;
6930 /* Don't separate LTREL_BASE from the corresponding
6931 LTREL_OFFSET load. */
6932 if (pending_ltrel)
6933 continue;
6934 next = insn;
6935 do
6936 {
6937 insn = next;
6938 next = NEXT_INSN (insn);
6939 }
6940 while (next
6941 && NOTE_P (next)
6942 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
6943 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
6944 }
6945 else
6946 {
6947 gcc_assert (!pending_ltrel);
6948
6949 /* The old pool has to end before the section switch
6950 note in order to make it part of the current
6951 section. */
6952 insn = PREV_INSN (insn);
6953 }
6954
6955 label = gen_label_rtx ();
6956 prev = insn;
6957 if (prev && NOTE_P (prev))
6958 prev = prev_nonnote_insn (prev);
6959 if (prev)
6960 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
6961 INSN_LOCATION (prev));
6962 else
6963 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
6964 barrier = emit_barrier_after (jump);
6965 insn = emit_label_after (label, barrier);
6966 JUMP_LABEL (jump) = label;
6967 LABEL_NUSES (label) = 1;
6968
6969 INSN_ADDRESSES_NEW (jump, -1);
6970 INSN_ADDRESSES_NEW (barrier, -1);
6971 INSN_ADDRESSES_NEW (insn, -1);
6972
6973 s390_end_pool (curr_pool, barrier);
6974 curr_pool = NULL;
6975 extra_size = 0;
6976 }
6977 }
6978 }
6979
6980 if (curr_pool)
6981 s390_end_pool (curr_pool, NULL_RTX);
6982 gcc_assert (!pending_ltrel);
6983
6984 /* Find all labels that are branched into
6985 from an insn belonging to a different chunk. */
6986
6987 far_labels = BITMAP_ALLOC (NULL);
6988
6989 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6990 {
6991 /* Labels marked with LABEL_PRESERVE_P can be target
6992 of non-local jumps, so we have to mark them.
6993 The same holds for named labels.
6994
6995 Don't do that, however, if it is the label before
6996 a jump table. */
6997
6998 if (GET_CODE (insn) == CODE_LABEL
6999 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7000 {
7001 rtx vec_insn = next_real_insn (insn);
7002 rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
7003 PATTERN (vec_insn) : NULL_RTX;
7004 if (!vec_pat
7005 || !(GET_CODE (vec_pat) == ADDR_VEC
7006 || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
7007 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7008 }
7009
7010 /* If we have a direct jump (conditional or unconditional)
7011 or a casesi jump, check all potential targets. */
7012 else if (GET_CODE (insn) == JUMP_INSN)
7013 {
7014 rtx pat = PATTERN (insn);
7015 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
7016 pat = XVECEXP (pat, 0, 0);
7017
7018 if (GET_CODE (pat) == SET)
7019 {
7020 rtx label = JUMP_LABEL (insn);
7021 if (label)
7022 {
7023 if (s390_find_pool (pool_list, label)
7024 != s390_find_pool (pool_list, insn))
7025 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7026 }
7027 }
7028 else if (GET_CODE (pat) == PARALLEL
7029 && XVECLEN (pat, 0) == 2
7030 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
7031 && GET_CODE (XVECEXP (pat, 0, 1)) == USE
7032 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
7033 {
7034 /* Find the jump table used by this casesi jump. */
7035 rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
7036 rtx vec_insn = next_real_insn (vec_label);
7037 rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
7038 PATTERN (vec_insn) : NULL_RTX;
7039 if (vec_pat
7040 && (GET_CODE (vec_pat) == ADDR_VEC
7041 || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
7042 {
7043 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7044
7045 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7046 {
7047 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7048
7049 if (s390_find_pool (pool_list, label)
7050 != s390_find_pool (pool_list, insn))
7051 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7052 }
7053 }
7054 }
7055 }
7056 }
7057
7058 /* Insert base register reload insns before every pool. */
7059
7060 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7061 {
7062 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7063 curr_pool->label);
7064 rtx insn = curr_pool->first_insn;
7065 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7066 }
7067
7068 /* Insert base register reload insns at every far label. */
7069
7070 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7071 if (GET_CODE (insn) == CODE_LABEL
7072 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7073 {
7074 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7075 if (pool)
7076 {
7077 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7078 pool->label);
7079 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7080 }
7081 }
7082
7083
7084 BITMAP_FREE (far_labels);
7085
7086
7087 /* Recompute insn addresses. */
7088
7089 init_insn_lengths ();
7090 shorten_branches (get_insns ());
7091
7092 return pool_list;
7093 }
7094
7095 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7096 After we have decided to use this list, finish implementing
7097 all changes to the current function as required. */
7098
7099 static void
7100 s390_chunkify_finish (struct constant_pool *pool_list)
7101 {
7102 struct constant_pool *curr_pool = NULL;
7103 rtx insn;
7104
7105
7106 /* Replace all literal pool references. */
7107
7108 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7109 {
7110 if (INSN_P (insn))
7111 replace_ltrel_base (&PATTERN (insn));
7112
7113 curr_pool = s390_find_pool (pool_list, insn);
7114 if (!curr_pool)
7115 continue;
7116
7117 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
7118 {
7119 rtx addr, pool_ref = NULL_RTX;
7120 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7121 if (pool_ref)
7122 {
7123 if (s390_execute_label (insn))
7124 addr = s390_find_execute (curr_pool, insn);
7125 else
7126 addr = s390_find_constant (curr_pool,
7127 get_pool_constant (pool_ref),
7128 get_pool_mode (pool_ref));
7129
7130 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7131 INSN_CODE (insn) = -1;
7132 }
7133 }
7134 }
7135
7136 /* Dump out all literal pools. */
7137
7138 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7139 s390_dump_pool (curr_pool, 0);
7140
7141 /* Free pool list. */
7142
7143 while (pool_list)
7144 {
7145 struct constant_pool *next = pool_list->next;
7146 s390_free_pool (pool_list);
7147 pool_list = next;
7148 }
7149 }
7150
7151 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7152 We have decided we cannot use this list, so revert all changes
7153 to the current function that were done by s390_chunkify_start. */
7154
7155 static void
7156 s390_chunkify_cancel (struct constant_pool *pool_list)
7157 {
7158 struct constant_pool *curr_pool = NULL;
7159 rtx insn;
7160
7161 /* Remove all pool placeholder insns. */
7162
7163 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7164 {
7165 /* Did we insert an extra barrier? Remove it. */
7166 rtx barrier = PREV_INSN (curr_pool->pool_insn);
7167 rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
7168 rtx label = NEXT_INSN (curr_pool->pool_insn);
7169
7170 if (jump && GET_CODE (jump) == JUMP_INSN
7171 && barrier && GET_CODE (barrier) == BARRIER
7172 && label && GET_CODE (label) == CODE_LABEL
7173 && GET_CODE (PATTERN (jump)) == SET
7174 && SET_DEST (PATTERN (jump)) == pc_rtx
7175 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7176 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7177 {
7178 remove_insn (jump);
7179 remove_insn (barrier);
7180 remove_insn (label);
7181 }
7182
7183 remove_insn (curr_pool->pool_insn);
7184 }
7185
7186 /* Remove all base register reload insns. */
7187
7188 for (insn = get_insns (); insn; )
7189 {
7190 rtx next_insn = NEXT_INSN (insn);
7191
7192 if (GET_CODE (insn) == INSN
7193 && GET_CODE (PATTERN (insn)) == SET
7194 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7195 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7196 remove_insn (insn);
7197
7198 insn = next_insn;
7199 }
7200
7201 /* Free pool list. */
7202
7203 while (pool_list)
7204 {
7205 struct constant_pool *next = pool_list->next;
7206 s390_free_pool (pool_list);
7207 pool_list = next;
7208 }
7209 }
7210
7211 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7212
7213 void
7214 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7215 {
7216 REAL_VALUE_TYPE r;
7217
7218 switch (GET_MODE_CLASS (mode))
7219 {
7220 case MODE_FLOAT:
7221 case MODE_DECIMAL_FLOAT:
7222 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7223
7224 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7225 assemble_real (r, mode, align);
7226 break;
7227
7228 case MODE_INT:
7229 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7230 mark_symbol_refs_as_used (exp);
7231 break;
7232
7233 default:
7234 gcc_unreachable ();
7235 }
7236 }
7237
7238
7239 /* Return an RTL expression representing the value of the return address
7240 for the frame COUNT steps up from the current frame. FRAME is the
7241 frame pointer of that frame. */
7242
7243 rtx
7244 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7245 {
7246 int offset;
7247 rtx addr;
7248
7249 /* Without backchain, we fail for all but the current frame. */
7250
7251 if (!TARGET_BACKCHAIN && count > 0)
7252 return NULL_RTX;
7253
7254 /* For the current frame, we need to make sure the initial
7255 value of RETURN_REGNUM is actually saved. */
7256
7257 if (count == 0)
7258 {
7259 /* On non-z architectures branch splitting could overwrite r14. */
7260 if (TARGET_CPU_ZARCH)
7261 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7262 else
7263 {
7264 cfun_frame_layout.save_return_addr_p = true;
7265 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7266 }
7267 }
7268
7269 if (TARGET_PACKED_STACK)
7270 offset = -2 * UNITS_PER_LONG;
7271 else
7272 offset = RETURN_REGNUM * UNITS_PER_LONG;
7273
7274 addr = plus_constant (Pmode, frame, offset);
7275 addr = memory_address (Pmode, addr);
7276 return gen_rtx_MEM (Pmode, addr);
7277 }
7278
7279 /* Return an RTL expression representing the back chain stored in
7280 the current stack frame. */
7281
7282 rtx
7283 s390_back_chain_rtx (void)
7284 {
7285 rtx chain;
7286
7287 gcc_assert (TARGET_BACKCHAIN);
7288
7289 if (TARGET_PACKED_STACK)
7290 chain = plus_constant (Pmode, stack_pointer_rtx,
7291 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7292 else
7293 chain = stack_pointer_rtx;
7294
7295 chain = gen_rtx_MEM (Pmode, chain);
7296 return chain;
7297 }
7298
7299 /* Find first call clobbered register unused in a function.
7300 This could be used as base register in a leaf function
7301 or for holding the return address before epilogue. */
7302
7303 static int
7304 find_unused_clobbered_reg (void)
7305 {
7306 int i;
7307 for (i = 0; i < 6; i++)
7308 if (!df_regs_ever_live_p (i))
7309 return i;
7310 return 0;
7311 }
7312
7313
7314 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7315 clobbered hard regs in SETREG. */
7316
7317 static void
7318 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7319 {
7320 int *regs_ever_clobbered = (int *)data;
7321 unsigned int i, regno;
7322 enum machine_mode mode = GET_MODE (setreg);
7323
7324 if (GET_CODE (setreg) == SUBREG)
7325 {
7326 rtx inner = SUBREG_REG (setreg);
7327 if (!GENERAL_REG_P (inner))
7328 return;
7329 regno = subreg_regno (setreg);
7330 }
7331 else if (GENERAL_REG_P (setreg))
7332 regno = REGNO (setreg);
7333 else
7334 return;
7335
7336 for (i = regno;
7337 i < regno + HARD_REGNO_NREGS (regno, mode);
7338 i++)
7339 regs_ever_clobbered[i] = 1;
7340 }
7341
7342 /* Walks through all basic blocks of the current function looking
7343 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7344 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7345 each of those regs. */
7346
7347 static void
7348 s390_regs_ever_clobbered (int *regs_ever_clobbered)
7349 {
7350 basic_block cur_bb;
7351 rtx cur_insn;
7352 unsigned int i;
7353
7354 memset (regs_ever_clobbered, 0, 16 * sizeof (int));
7355
7356 /* For non-leaf functions we have to consider all call clobbered regs to be
7357 clobbered. */
7358 if (!crtl->is_leaf)
7359 {
7360 for (i = 0; i < 16; i++)
7361 regs_ever_clobbered[i] = call_really_used_regs[i];
7362 }
7363
7364 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7365 this work is done by liveness analysis (mark_regs_live_at_end).
7366 Special care is needed for functions containing landing pads. Landing pads
7367 may use the eh registers, but the code which sets these registers is not
7368 contained in that function. Hence s390_regs_ever_clobbered is not able to
7369 deal with this automatically. */
7370 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7371 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7372 if (crtl->calls_eh_return
7373 || (cfun->machine->has_landing_pad_p
7374 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7375 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7376
7377 /* For nonlocal gotos all call-saved registers have to be saved.
7378 This flag is also set for the unwinding code in libgcc.
7379 See expand_builtin_unwind_init. For regs_ever_live this is done by
7380 reload. */
7381 if (cfun->has_nonlocal_label)
7382 for (i = 0; i < 16; i++)
7383 if (!call_really_used_regs[i])
7384 regs_ever_clobbered[i] = 1;
7385
7386 FOR_EACH_BB (cur_bb)
7387 {
7388 FOR_BB_INSNS (cur_bb, cur_insn)
7389 {
7390 if (INSN_P (cur_insn))
7391 note_stores (PATTERN (cur_insn),
7392 s390_reg_clobbered_rtx,
7393 regs_ever_clobbered);
7394 }
7395 }
7396 }
7397
7398 /* Determine the frame area which actually has to be accessed
7399 in the function epilogue. The values are stored at the
7400 given pointers AREA_BOTTOM (address of the lowest used stack
7401 address) and AREA_TOP (address of the first item which does
7402 not belong to the stack frame). */
7403
7404 static void
7405 s390_frame_area (int *area_bottom, int *area_top)
7406 {
7407 int b, t;
7408 int i;
7409
7410 b = INT_MAX;
7411 t = INT_MIN;
7412
7413 if (cfun_frame_layout.first_restore_gpr != -1)
7414 {
7415 b = (cfun_frame_layout.gprs_offset
7416 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7417 t = b + (cfun_frame_layout.last_restore_gpr
7418 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7419 }
7420
7421 if (TARGET_64BIT && cfun_save_high_fprs_p)
7422 {
7423 b = MIN (b, cfun_frame_layout.f8_offset);
7424 t = MAX (t, (cfun_frame_layout.f8_offset
7425 + cfun_frame_layout.high_fprs * 8));
7426 }
7427
7428 if (!TARGET_64BIT)
7429 for (i = 2; i < 4; i++)
7430 if (cfun_fpr_bit_p (i))
7431 {
7432 b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
7433 t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
7434 }
7435
7436 *area_bottom = b;
7437 *area_top = t;
7438 }
7439
7440 /* Fill cfun->machine with info about register usage of current function.
7441 Return in CLOBBERED_REGS which GPRs are currently considered set. */
7442
7443 static void
7444 s390_register_info (int clobbered_regs[])
7445 {
7446 int i, j;
7447
7448 /* fprs 8 - 15 are call saved for 64 Bit ABI. */
7449 cfun_frame_layout.fpr_bitmap = 0;
7450 cfun_frame_layout.high_fprs = 0;
7451 if (TARGET_64BIT)
7452 for (i = 24; i < 32; i++)
7453 if (df_regs_ever_live_p (i) && !global_regs[i])
7454 {
7455 cfun_set_fpr_bit (i - 16);
7456 cfun_frame_layout.high_fprs++;
7457 }
7458
7459 /* Find first and last gpr to be saved. We trust regs_ever_live
7460 data, except that we don't save and restore global registers.
7461
7462 Also, all registers with special meaning to the compiler need
7463 to be handled extra. */
7464
7465 s390_regs_ever_clobbered (clobbered_regs);
7466
7467 for (i = 0; i < 16; i++)
7468 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
7469
7470 if (frame_pointer_needed)
7471 clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
7472
7473 if (flag_pic)
7474 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7475 |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7476
7477 clobbered_regs[BASE_REGNUM]
7478 |= (cfun->machine->base_reg
7479 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7480
7481 clobbered_regs[RETURN_REGNUM]
7482 |= (!crtl->is_leaf
7483 || TARGET_TPF_PROFILING
7484 || cfun->machine->split_branches_pending_p
7485 || cfun_frame_layout.save_return_addr_p
7486 || crtl->calls_eh_return
7487 || cfun->stdarg);
7488
7489 clobbered_regs[STACK_POINTER_REGNUM]
7490 |= (!crtl->is_leaf
7491 || TARGET_TPF_PROFILING
7492 || cfun_save_high_fprs_p
7493 || get_frame_size () > 0
7494 || cfun->calls_alloca
7495 || cfun->stdarg);
7496
7497 for (i = 6; i < 16; i++)
7498 if (df_regs_ever_live_p (i) || clobbered_regs[i])
7499 break;
7500 for (j = 15; j > i; j--)
7501 if (df_regs_ever_live_p (j) || clobbered_regs[j])
7502 break;
7503
7504 if (i == 16)
7505 {
7506 /* Nothing to save/restore. */
7507 cfun_frame_layout.first_save_gpr_slot = -1;
7508 cfun_frame_layout.last_save_gpr_slot = -1;
7509 cfun_frame_layout.first_save_gpr = -1;
7510 cfun_frame_layout.first_restore_gpr = -1;
7511 cfun_frame_layout.last_save_gpr = -1;
7512 cfun_frame_layout.last_restore_gpr = -1;
7513 }
7514 else
7515 {
7516 /* Save slots for gprs from i to j. */
7517 cfun_frame_layout.first_save_gpr_slot = i;
7518 cfun_frame_layout.last_save_gpr_slot = j;
7519
7520 for (i = cfun_frame_layout.first_save_gpr_slot;
7521 i < cfun_frame_layout.last_save_gpr_slot + 1;
7522 i++)
7523 if (clobbered_regs[i])
7524 break;
7525
7526 for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
7527 if (clobbered_regs[j])
7528 break;
7529
7530 if (i == cfun_frame_layout.last_save_gpr_slot + 1)
7531 {
7532 /* Nothing to save/restore. */
7533 cfun_frame_layout.first_save_gpr = -1;
7534 cfun_frame_layout.first_restore_gpr = -1;
7535 cfun_frame_layout.last_save_gpr = -1;
7536 cfun_frame_layout.last_restore_gpr = -1;
7537 }
7538 else
7539 {
7540 /* Save / Restore from gpr i to j. */
7541 cfun_frame_layout.first_save_gpr = i;
7542 cfun_frame_layout.first_restore_gpr = i;
7543 cfun_frame_layout.last_save_gpr = j;
7544 cfun_frame_layout.last_restore_gpr = j;
7545 }
7546 }
7547
7548 if (cfun->stdarg)
7549 {
7550 /* Varargs functions need to save gprs 2 to 6. */
7551 if (cfun->va_list_gpr_size
7552 && crtl->args.info.gprs < GP_ARG_NUM_REG)
7553 {
7554 int min_gpr = crtl->args.info.gprs;
7555 int max_gpr = min_gpr + cfun->va_list_gpr_size;
7556 if (max_gpr > GP_ARG_NUM_REG)
7557 max_gpr = GP_ARG_NUM_REG;
7558
7559 if (cfun_frame_layout.first_save_gpr == -1
7560 || cfun_frame_layout.first_save_gpr > 2 + min_gpr)
7561 {
7562 cfun_frame_layout.first_save_gpr = 2 + min_gpr;
7563 cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr;
7564 }
7565
7566 if (cfun_frame_layout.last_save_gpr == -1
7567 || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1)
7568 {
7569 cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1;
7570 cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1;
7571 }
7572 }
7573
7574 /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved. */
7575 if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size
7576 && crtl->args.info.fprs < FP_ARG_NUM_REG)
7577 {
7578 int min_fpr = crtl->args.info.fprs;
7579 int max_fpr = min_fpr + cfun->va_list_fpr_size;
7580 if (max_fpr > FP_ARG_NUM_REG)
7581 max_fpr = FP_ARG_NUM_REG;
7582
7583 /* ??? This is currently required to ensure proper location
7584 of the fpr save slots within the va_list save area. */
7585 if (TARGET_PACKED_STACK)
7586 min_fpr = 0;
7587
7588 for (i = min_fpr; i < max_fpr; i++)
7589 cfun_set_fpr_bit (i);
7590 }
7591 }
7592
7593 if (!TARGET_64BIT)
7594 for (i = 2; i < 4; i++)
7595 if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16])
7596 cfun_set_fpr_bit (i);
7597 }
7598
7599 /* Fill cfun->machine with info about frame of current function. */
7600
7601 static void
7602 s390_frame_info (void)
7603 {
7604 int i;
7605
7606 cfun_frame_layout.frame_size = get_frame_size ();
7607 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7608 fatal_error ("total size of local variables exceeds architecture limit");
7609
7610 if (!TARGET_PACKED_STACK)
7611 {
7612 cfun_frame_layout.backchain_offset = 0;
7613 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7614 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7615 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7616 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7617 * UNITS_PER_LONG);
7618 }
7619 else if (TARGET_BACKCHAIN) /* kernel stack layout */
7620 {
7621 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7622 - UNITS_PER_LONG);
7623 cfun_frame_layout.gprs_offset
7624 = (cfun_frame_layout.backchain_offset
7625 - (STACK_POINTER_REGNUM - cfun_frame_layout.first_save_gpr_slot + 1)
7626 * UNITS_PER_LONG);
7627
7628 if (TARGET_64BIT)
7629 {
7630 cfun_frame_layout.f4_offset
7631 = (cfun_frame_layout.gprs_offset
7632 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7633
7634 cfun_frame_layout.f0_offset
7635 = (cfun_frame_layout.f4_offset
7636 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7637 }
7638 else
7639 {
7640 /* On 31 bit we have to care about alignment of the
7641 floating point regs to provide fastest access. */
7642 cfun_frame_layout.f0_offset
7643 = ((cfun_frame_layout.gprs_offset
7644 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
7645 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7646
7647 cfun_frame_layout.f4_offset
7648 = (cfun_frame_layout.f0_offset
7649 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7650 }
7651 }
7652 else /* no backchain */
7653 {
7654 cfun_frame_layout.f4_offset
7655 = (STACK_POINTER_OFFSET
7656 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7657
7658 cfun_frame_layout.f0_offset
7659 = (cfun_frame_layout.f4_offset
7660 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7661
7662 cfun_frame_layout.gprs_offset
7663 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7664 }
7665
7666 if (crtl->is_leaf
7667 && !TARGET_TPF_PROFILING
7668 && cfun_frame_layout.frame_size == 0
7669 && !cfun_save_high_fprs_p
7670 && !cfun->calls_alloca
7671 && !cfun->stdarg)
7672 return;
7673
7674 if (!TARGET_PACKED_STACK)
7675 cfun_frame_layout.frame_size += (STACK_POINTER_OFFSET
7676 + crtl->outgoing_args_size
7677 + cfun_frame_layout.high_fprs * 8);
7678 else
7679 {
7680 if (TARGET_BACKCHAIN)
7681 cfun_frame_layout.frame_size += UNITS_PER_LONG;
7682
7683 /* No alignment trouble here because f8-f15 are only saved under
7684 64 bit. */
7685 cfun_frame_layout.f8_offset = (MIN (MIN (cfun_frame_layout.f0_offset,
7686 cfun_frame_layout.f4_offset),
7687 cfun_frame_layout.gprs_offset)
7688 - cfun_frame_layout.high_fprs * 8);
7689
7690 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7691
7692 for (i = 0; i < 8; i++)
7693 if (cfun_fpr_bit_p (i))
7694 cfun_frame_layout.frame_size += 8;
7695
7696 cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
7697
7698 /* If under 31 bit an odd number of gprs has to be saved we have to adjust
7699 the frame size to sustain 8 byte alignment of stack frames. */
7700 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7701 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7702 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7703
7704 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7705 }
7706 }
7707
7708 /* Generate frame layout. Fills in register and frame data for the current
7709 function in cfun->machine. This routine can be called multiple times;
7710 it will re-do the complete frame layout every time. */
7711
7712 static void
7713 s390_init_frame_layout (void)
7714 {
7715 HOST_WIDE_INT frame_size;
7716 int base_used;
7717 int clobbered_regs[16];
7718
7719 /* On S/390 machines, we may need to perform branch splitting, which
7720 will require both base and return address register. We have no
7721 choice but to assume we're going to need them until right at the
7722 end of the machine dependent reorg phase. */
7723 if (!TARGET_CPU_ZARCH)
7724 cfun->machine->split_branches_pending_p = true;
7725
7726 do
7727 {
7728 frame_size = cfun_frame_layout.frame_size;
7729
7730 /* Try to predict whether we'll need the base register. */
7731 base_used = cfun->machine->split_branches_pending_p
7732 || crtl->uses_const_pool
7733 || (!DISP_IN_RANGE (frame_size)
7734 && !CONST_OK_FOR_K (frame_size));
7735
7736 /* Decide which register to use as literal pool base. In small
7737 leaf functions, try to use an unused call-clobbered register
7738 as base register to avoid save/restore overhead. */
7739 if (!base_used)
7740 cfun->machine->base_reg = NULL_RTX;
7741 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7742 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7743 else
7744 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7745
7746 s390_register_info (clobbered_regs);
7747 s390_frame_info ();
7748 }
7749 while (frame_size != cfun_frame_layout.frame_size);
7750 }
7751
7752 /* Update frame layout. Recompute actual register save data based on
7753 current info and update regs_ever_live for the special registers.
7754 May be called multiple times, but may never cause *more* registers
7755 to be saved than s390_init_frame_layout allocated room for. */
7756
7757 static void
7758 s390_update_frame_layout (void)
7759 {
7760 int clobbered_regs[16];
7761
7762 s390_register_info (clobbered_regs);
7763
7764 df_set_regs_ever_live (BASE_REGNUM,
7765 clobbered_regs[BASE_REGNUM] ? true : false);
7766 df_set_regs_ever_live (RETURN_REGNUM,
7767 clobbered_regs[RETURN_REGNUM] ? true : false);
7768 df_set_regs_ever_live (STACK_POINTER_REGNUM,
7769 clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
7770
7771 if (cfun->machine->base_reg)
7772 df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true);
7773 }
7774
7775 /* Return true if it is legal to put a value with MODE into REGNO. */
7776
7777 bool
7778 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
7779 {
7780 switch (REGNO_REG_CLASS (regno))
7781 {
7782 case FP_REGS:
7783 if (REGNO_PAIR_OK (regno, mode))
7784 {
7785 if (mode == SImode || mode == DImode)
7786 return true;
7787
7788 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
7789 return true;
7790 }
7791 break;
7792 case ADDR_REGS:
7793 if (FRAME_REGNO_P (regno) && mode == Pmode)
7794 return true;
7795
7796 /* fallthrough */
7797 case GENERAL_REGS:
7798 if (REGNO_PAIR_OK (regno, mode))
7799 {
7800 if (TARGET_ZARCH
7801 || (mode != TFmode && mode != TCmode && mode != TDmode))
7802 return true;
7803 }
7804 break;
7805 case CC_REGS:
7806 if (GET_MODE_CLASS (mode) == MODE_CC)
7807 return true;
7808 break;
7809 case ACCESS_REGS:
7810 if (REGNO_PAIR_OK (regno, mode))
7811 {
7812 if (mode == SImode || mode == Pmode)
7813 return true;
7814 }
7815 break;
7816 default:
7817 return false;
7818 }
7819
7820 return false;
7821 }
7822
7823 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
7824
7825 bool
7826 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
7827 {
7828 /* Once we've decided upon a register to use as base register, it must
7829 no longer be used for any other purpose. */
7830 if (cfun->machine->base_reg)
7831 if (REGNO (cfun->machine->base_reg) == old_reg
7832 || REGNO (cfun->machine->base_reg) == new_reg)
7833 return false;
7834
7835 return true;
7836 }
7837
7838 /* Maximum number of registers to represent a value of mode MODE
7839 in a register of class RCLASS. */
7840
7841 int
7842 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
7843 {
7844 switch (rclass)
7845 {
7846 case FP_REGS:
7847 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7848 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
7849 else
7850 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
7851 case ACCESS_REGS:
7852 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
7853 default:
7854 break;
7855 }
7856 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7857 }
7858
7859 /* Return true if register FROM can be eliminated via register TO. */
7860
7861 static bool
7862 s390_can_eliminate (const int from, const int to)
7863 {
7864 /* On zSeries machines, we have not marked the base register as fixed.
7865 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
7866 If a function requires the base register, we say here that this
7867 elimination cannot be performed. This will cause reload to free
7868 up the base register (as if it were fixed). On the other hand,
7869 if the current function does *not* require the base register, we
7870 say here the elimination succeeds, which in turn allows reload
7871 to allocate the base register for any other purpose. */
7872 if (from == BASE_REGNUM && to == BASE_REGNUM)
7873 {
7874 if (TARGET_CPU_ZARCH)
7875 {
7876 s390_init_frame_layout ();
7877 return cfun->machine->base_reg == NULL_RTX;
7878 }
7879
7880 return false;
7881 }
7882
7883 /* Everything else must point into the stack frame. */
7884 gcc_assert (to == STACK_POINTER_REGNUM
7885 || to == HARD_FRAME_POINTER_REGNUM);
7886
7887 gcc_assert (from == FRAME_POINTER_REGNUM
7888 || from == ARG_POINTER_REGNUM
7889 || from == RETURN_ADDRESS_POINTER_REGNUM);
7890
7891 /* Make sure we actually saved the return address. */
7892 if (from == RETURN_ADDRESS_POINTER_REGNUM)
7893 if (!crtl->calls_eh_return
7894 && !cfun->stdarg
7895 && !cfun_frame_layout.save_return_addr_p)
7896 return false;
7897
7898 return true;
7899 }
7900
7901 /* Return offset between register FROM and TO initially after prolog. */
7902
7903 HOST_WIDE_INT
7904 s390_initial_elimination_offset (int from, int to)
7905 {
7906 HOST_WIDE_INT offset;
7907 int index;
7908
7909 /* ??? Why are we called for non-eliminable pairs? */
7910 if (!s390_can_eliminate (from, to))
7911 return 0;
7912
7913 switch (from)
7914 {
7915 case FRAME_POINTER_REGNUM:
7916 offset = (get_frame_size()
7917 + STACK_POINTER_OFFSET
7918 + crtl->outgoing_args_size);
7919 break;
7920
7921 case ARG_POINTER_REGNUM:
7922 s390_init_frame_layout ();
7923 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
7924 break;
7925
7926 case RETURN_ADDRESS_POINTER_REGNUM:
7927 s390_init_frame_layout ();
7928 index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot;
7929 gcc_assert (index >= 0);
7930 offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset;
7931 offset += index * UNITS_PER_LONG;
7932 break;
7933
7934 case BASE_REGNUM:
7935 offset = 0;
7936 break;
7937
7938 default:
7939 gcc_unreachable ();
7940 }
7941
7942 return offset;
7943 }
7944
7945 /* Emit insn to save fpr REGNUM at offset OFFSET relative
7946 to register BASE. Return generated insn. */
7947
7948 static rtx
7949 save_fpr (rtx base, int offset, int regnum)
7950 {
7951 rtx addr;
7952 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7953
7954 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
7955 set_mem_alias_set (addr, get_varargs_alias_set ());
7956 else
7957 set_mem_alias_set (addr, get_frame_alias_set ());
7958
7959 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
7960 }
7961
7962 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
7963 to register BASE. Return generated insn. */
7964
7965 static rtx
7966 restore_fpr (rtx base, int offset, int regnum)
7967 {
7968 rtx addr;
7969 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7970 set_mem_alias_set (addr, get_frame_alias_set ());
7971
7972 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
7973 }
7974
7975 /* Return true if REGNO is a global register, but not one
7976 of the special ones that need to be saved/restored in anyway. */
7977
7978 static inline bool
7979 global_not_special_regno_p (int regno)
7980 {
7981 return (global_regs[regno]
7982 /* These registers are special and need to be
7983 restored in any case. */
7984 && !(regno == STACK_POINTER_REGNUM
7985 || regno == RETURN_REGNUM
7986 || regno == BASE_REGNUM
7987 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
7988 }
7989
7990 /* Generate insn to save registers FIRST to LAST into
7991 the register save area located at offset OFFSET
7992 relative to register BASE. */
7993
7994 static rtx
7995 save_gprs (rtx base, int offset, int first, int last)
7996 {
7997 rtx addr, insn, note;
7998 int i;
7999
8000 addr = plus_constant (Pmode, base, offset);
8001 addr = gen_rtx_MEM (Pmode, addr);
8002
8003 set_mem_alias_set (addr, get_frame_alias_set ());
8004
8005 /* Special-case single register. */
8006 if (first == last)
8007 {
8008 if (TARGET_64BIT)
8009 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8010 else
8011 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8012
8013 if (!global_not_special_regno_p (first))
8014 RTX_FRAME_RELATED_P (insn) = 1;
8015 return insn;
8016 }
8017
8018
8019 insn = gen_store_multiple (addr,
8020 gen_rtx_REG (Pmode, first),
8021 GEN_INT (last - first + 1));
8022
8023 if (first <= 6 && cfun->stdarg)
8024 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8025 {
8026 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8027
8028 if (first + i <= 6)
8029 set_mem_alias_set (mem, get_varargs_alias_set ());
8030 }
8031
8032 /* We need to set the FRAME_RELATED flag on all SETs
8033 inside the store-multiple pattern.
8034
8035 However, we must not emit DWARF records for registers 2..5
8036 if they are stored for use by variable arguments ...
8037
8038 ??? Unfortunately, it is not enough to simply not the
8039 FRAME_RELATED flags for those SETs, because the first SET
8040 of the PARALLEL is always treated as if it had the flag
8041 set, even if it does not. Therefore we emit a new pattern
8042 without those registers as REG_FRAME_RELATED_EXPR note. */
8043
8044 if (first >= 6 && !global_not_special_regno_p (first))
8045 {
8046 rtx pat = PATTERN (insn);
8047
8048 for (i = 0; i < XVECLEN (pat, 0); i++)
8049 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8050 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8051 0, i)))))
8052 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8053
8054 RTX_FRAME_RELATED_P (insn) = 1;
8055 }
8056 else if (last >= 6)
8057 {
8058 int start;
8059
8060 for (start = first >= 6 ? first : 6; start <= last; start++)
8061 if (!global_not_special_regno_p (start))
8062 break;
8063
8064 if (start > last)
8065 return insn;
8066
8067 addr = plus_constant (Pmode, base,
8068 offset + (start - first) * UNITS_PER_LONG);
8069 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8070 gen_rtx_REG (Pmode, start),
8071 GEN_INT (last - start + 1));
8072 note = PATTERN (note);
8073
8074 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8075
8076 for (i = 0; i < XVECLEN (note, 0); i++)
8077 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8078 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8079 0, i)))))
8080 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8081
8082 RTX_FRAME_RELATED_P (insn) = 1;
8083 }
8084
8085 return insn;
8086 }
8087
8088 /* Generate insn to restore registers FIRST to LAST from
8089 the register save area located at offset OFFSET
8090 relative to register BASE. */
8091
8092 static rtx
8093 restore_gprs (rtx base, int offset, int first, int last)
8094 {
8095 rtx addr, insn;
8096
8097 addr = plus_constant (Pmode, base, offset);
8098 addr = gen_rtx_MEM (Pmode, addr);
8099 set_mem_alias_set (addr, get_frame_alias_set ());
8100
8101 /* Special-case single register. */
8102 if (first == last)
8103 {
8104 if (TARGET_64BIT)
8105 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8106 else
8107 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8108
8109 return insn;
8110 }
8111
8112 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8113 addr,
8114 GEN_INT (last - first + 1));
8115 return insn;
8116 }
8117
8118 /* Return insn sequence to load the GOT register. */
8119
8120 static GTY(()) rtx got_symbol;
8121 rtx
8122 s390_load_got (void)
8123 {
8124 rtx insns;
8125
8126 /* We cannot use pic_offset_table_rtx here since we use this
8127 function also for non-pic if __tls_get_offset is called and in
8128 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8129 aren't usable. */
8130 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8131
8132 if (!got_symbol)
8133 {
8134 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8135 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8136 }
8137
8138 start_sequence ();
8139
8140 if (TARGET_CPU_ZARCH)
8141 {
8142 emit_move_insn (got_rtx, got_symbol);
8143 }
8144 else
8145 {
8146 rtx offset;
8147
8148 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8149 UNSPEC_LTREL_OFFSET);
8150 offset = gen_rtx_CONST (Pmode, offset);
8151 offset = force_const_mem (Pmode, offset);
8152
8153 emit_move_insn (got_rtx, offset);
8154
8155 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8156 UNSPEC_LTREL_BASE);
8157 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8158
8159 emit_move_insn (got_rtx, offset);
8160 }
8161
8162 insns = get_insns ();
8163 end_sequence ();
8164 return insns;
8165 }
8166
8167 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8168 and the change to the stack pointer. */
8169
8170 static void
8171 s390_emit_stack_tie (void)
8172 {
8173 rtx mem = gen_frame_mem (BLKmode,
8174 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8175
8176 emit_insn (gen_stack_tie (mem));
8177 }
8178
8179 /* Expand the prologue into a bunch of separate insns. */
8180
8181 void
8182 s390_emit_prologue (void)
8183 {
8184 rtx insn, addr;
8185 rtx temp_reg;
8186 int i;
8187 int offset;
8188 int next_fpr = 0;
8189
8190 /* Complete frame layout. */
8191
8192 s390_update_frame_layout ();
8193
8194 /* Annotate all constant pool references to let the scheduler know
8195 they implicitly use the base register. */
8196
8197 push_topmost_sequence ();
8198
8199 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8200 if (INSN_P (insn))
8201 {
8202 annotate_constant_pool_refs (&PATTERN (insn));
8203 df_insn_rescan (insn);
8204 }
8205
8206 pop_topmost_sequence ();
8207
8208 /* Choose best register to use for temp use within prologue.
8209 See below for why TPF must use the register 1. */
8210
8211 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8212 && !crtl->is_leaf
8213 && !TARGET_TPF_PROFILING)
8214 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8215 else
8216 temp_reg = gen_rtx_REG (Pmode, 1);
8217
8218 /* Save call saved gprs. */
8219 if (cfun_frame_layout.first_save_gpr != -1)
8220 {
8221 insn = save_gprs (stack_pointer_rtx,
8222 cfun_frame_layout.gprs_offset +
8223 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8224 - cfun_frame_layout.first_save_gpr_slot),
8225 cfun_frame_layout.first_save_gpr,
8226 cfun_frame_layout.last_save_gpr);
8227 emit_insn (insn);
8228 }
8229
8230 /* Dummy insn to mark literal pool slot. */
8231
8232 if (cfun->machine->base_reg)
8233 emit_insn (gen_main_pool (cfun->machine->base_reg));
8234
8235 offset = cfun_frame_layout.f0_offset;
8236
8237 /* Save f0 and f2. */
8238 for (i = 0; i < 2; i++)
8239 {
8240 if (cfun_fpr_bit_p (i))
8241 {
8242 save_fpr (stack_pointer_rtx, offset, i + 16);
8243 offset += 8;
8244 }
8245 else if (!TARGET_PACKED_STACK)
8246 offset += 8;
8247 }
8248
8249 /* Save f4 and f6. */
8250 offset = cfun_frame_layout.f4_offset;
8251 for (i = 2; i < 4; i++)
8252 {
8253 if (cfun_fpr_bit_p (i))
8254 {
8255 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8256 offset += 8;
8257
8258 /* If f4 and f6 are call clobbered they are saved due to stdargs and
8259 therefore are not frame related. */
8260 if (!call_really_used_regs[i + 16])
8261 RTX_FRAME_RELATED_P (insn) = 1;
8262 }
8263 else if (!TARGET_PACKED_STACK)
8264 offset += 8;
8265 }
8266
8267 if (TARGET_PACKED_STACK
8268 && cfun_save_high_fprs_p
8269 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8270 {
8271 offset = (cfun_frame_layout.f8_offset
8272 + (cfun_frame_layout.high_fprs - 1) * 8);
8273
8274 for (i = 15; i > 7 && offset >= 0; i--)
8275 if (cfun_fpr_bit_p (i))
8276 {
8277 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8278
8279 RTX_FRAME_RELATED_P (insn) = 1;
8280 offset -= 8;
8281 }
8282 if (offset >= cfun_frame_layout.f8_offset)
8283 next_fpr = i + 16;
8284 }
8285
8286 if (!TARGET_PACKED_STACK)
8287 next_fpr = cfun_save_high_fprs_p ? 31 : 0;
8288
8289 if (flag_stack_usage_info)
8290 current_function_static_stack_size = cfun_frame_layout.frame_size;
8291
8292 /* Decrement stack pointer. */
8293
8294 if (cfun_frame_layout.frame_size > 0)
8295 {
8296 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8297 rtx real_frame_off;
8298
8299 if (s390_stack_size)
8300 {
8301 HOST_WIDE_INT stack_guard;
8302
8303 if (s390_stack_guard)
8304 stack_guard = s390_stack_guard;
8305 else
8306 {
8307 /* If no value for stack guard is provided the smallest power of 2
8308 larger than the current frame size is chosen. */
8309 stack_guard = 1;
8310 while (stack_guard < cfun_frame_layout.frame_size)
8311 stack_guard <<= 1;
8312 }
8313
8314 if (cfun_frame_layout.frame_size >= s390_stack_size)
8315 {
8316 warning (0, "frame size of function %qs is %wd"
8317 " bytes exceeding user provided stack limit of "
8318 "%d bytes. "
8319 "An unconditional trap is added.",
8320 current_function_name(), cfun_frame_layout.frame_size,
8321 s390_stack_size);
8322 emit_insn (gen_trap ());
8323 }
8324 else
8325 {
8326 /* stack_guard has to be smaller than s390_stack_size.
8327 Otherwise we would emit an AND with zero which would
8328 not match the test under mask pattern. */
8329 if (stack_guard >= s390_stack_size)
8330 {
8331 warning (0, "frame size of function %qs is %wd"
8332 " bytes which is more than half the stack size. "
8333 "The dynamic check would not be reliable. "
8334 "No check emitted for this function.",
8335 current_function_name(),
8336 cfun_frame_layout.frame_size);
8337 }
8338 else
8339 {
8340 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8341 & ~(stack_guard - 1));
8342
8343 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8344 GEN_INT (stack_check_mask));
8345 if (TARGET_64BIT)
8346 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8347 t, const0_rtx),
8348 t, const0_rtx, const0_rtx));
8349 else
8350 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8351 t, const0_rtx),
8352 t, const0_rtx, const0_rtx));
8353 }
8354 }
8355 }
8356
8357 if (s390_warn_framesize > 0
8358 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8359 warning (0, "frame size of %qs is %wd bytes",
8360 current_function_name (), cfun_frame_layout.frame_size);
8361
8362 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8363 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8364
8365 /* Save incoming stack pointer into temp reg. */
8366 if (TARGET_BACKCHAIN || next_fpr)
8367 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8368
8369 /* Subtract frame size from stack pointer. */
8370
8371 if (DISP_IN_RANGE (INTVAL (frame_off)))
8372 {
8373 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8374 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8375 frame_off));
8376 insn = emit_insn (insn);
8377 }
8378 else
8379 {
8380 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8381 frame_off = force_const_mem (Pmode, frame_off);
8382
8383 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8384 annotate_constant_pool_refs (&PATTERN (insn));
8385 }
8386
8387 RTX_FRAME_RELATED_P (insn) = 1;
8388 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8389 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8390 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8391 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8392 real_frame_off)));
8393
8394 /* Set backchain. */
8395
8396 if (TARGET_BACKCHAIN)
8397 {
8398 if (cfun_frame_layout.backchain_offset)
8399 addr = gen_rtx_MEM (Pmode,
8400 plus_constant (Pmode, stack_pointer_rtx,
8401 cfun_frame_layout.backchain_offset));
8402 else
8403 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8404 set_mem_alias_set (addr, get_frame_alias_set ());
8405 insn = emit_insn (gen_move_insn (addr, temp_reg));
8406 }
8407
8408 /* If we support non-call exceptions (e.g. for Java),
8409 we need to make sure the backchain pointer is set up
8410 before any possibly trapping memory access. */
8411 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8412 {
8413 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8414 emit_clobber (addr);
8415 }
8416 }
8417
8418 /* Save fprs 8 - 15 (64 bit ABI). */
8419
8420 if (cfun_save_high_fprs_p && next_fpr)
8421 {
8422 /* If the stack might be accessed through a different register
8423 we have to make sure that the stack pointer decrement is not
8424 moved below the use of the stack slots. */
8425 s390_emit_stack_tie ();
8426
8427 insn = emit_insn (gen_add2_insn (temp_reg,
8428 GEN_INT (cfun_frame_layout.f8_offset)));
8429
8430 offset = 0;
8431
8432 for (i = 24; i <= next_fpr; i++)
8433 if (cfun_fpr_bit_p (i - 16))
8434 {
8435 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8436 cfun_frame_layout.frame_size
8437 + cfun_frame_layout.f8_offset
8438 + offset);
8439
8440 insn = save_fpr (temp_reg, offset, i);
8441 offset += 8;
8442 RTX_FRAME_RELATED_P (insn) = 1;
8443 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8444 gen_rtx_SET (VOIDmode,
8445 gen_rtx_MEM (DFmode, addr),
8446 gen_rtx_REG (DFmode, i)));
8447 }
8448 }
8449
8450 /* Set frame pointer, if needed. */
8451
8452 if (frame_pointer_needed)
8453 {
8454 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8455 RTX_FRAME_RELATED_P (insn) = 1;
8456 }
8457
8458 /* Set up got pointer, if needed. */
8459
8460 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8461 {
8462 rtx insns = s390_load_got ();
8463
8464 for (insn = insns; insn; insn = NEXT_INSN (insn))
8465 annotate_constant_pool_refs (&PATTERN (insn));
8466
8467 emit_insn (insns);
8468 }
8469
8470 if (TARGET_TPF_PROFILING)
8471 {
8472 /* Generate a BAS instruction to serve as a function
8473 entry intercept to facilitate the use of tracing
8474 algorithms located at the branch target. */
8475 emit_insn (gen_prologue_tpf ());
8476
8477 /* Emit a blockage here so that all code
8478 lies between the profiling mechanisms. */
8479 emit_insn (gen_blockage ());
8480 }
8481 }
8482
8483 /* Expand the epilogue into a bunch of separate insns. */
8484
8485 void
8486 s390_emit_epilogue (bool sibcall)
8487 {
8488 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8489 int area_bottom, area_top, offset = 0;
8490 int next_offset;
8491 rtvec p;
8492 int i;
8493
8494 if (TARGET_TPF_PROFILING)
8495 {
8496
8497 /* Generate a BAS instruction to serve as a function
8498 entry intercept to facilitate the use of tracing
8499 algorithms located at the branch target. */
8500
8501 /* Emit a blockage here so that all code
8502 lies between the profiling mechanisms. */
8503 emit_insn (gen_blockage ());
8504
8505 emit_insn (gen_epilogue_tpf ());
8506 }
8507
8508 /* Check whether to use frame or stack pointer for restore. */
8509
8510 frame_pointer = (frame_pointer_needed
8511 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8512
8513 s390_frame_area (&area_bottom, &area_top);
8514
8515 /* Check whether we can access the register save area.
8516 If not, increment the frame pointer as required. */
8517
8518 if (area_top <= area_bottom)
8519 {
8520 /* Nothing to restore. */
8521 }
8522 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8523 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8524 {
8525 /* Area is in range. */
8526 offset = cfun_frame_layout.frame_size;
8527 }
8528 else
8529 {
8530 rtx insn, frame_off, cfa;
8531
8532 offset = area_bottom < 0 ? -area_bottom : 0;
8533 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8534
8535 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8536 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8537 if (DISP_IN_RANGE (INTVAL (frame_off)))
8538 {
8539 insn = gen_rtx_SET (VOIDmode, frame_pointer,
8540 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8541 insn = emit_insn (insn);
8542 }
8543 else
8544 {
8545 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8546 frame_off = force_const_mem (Pmode, frame_off);
8547
8548 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
8549 annotate_constant_pool_refs (&PATTERN (insn));
8550 }
8551 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
8552 RTX_FRAME_RELATED_P (insn) = 1;
8553 }
8554
8555 /* Restore call saved fprs. */
8556
8557 if (TARGET_64BIT)
8558 {
8559 if (cfun_save_high_fprs_p)
8560 {
8561 next_offset = cfun_frame_layout.f8_offset;
8562 for (i = 24; i < 32; i++)
8563 {
8564 if (cfun_fpr_bit_p (i - 16))
8565 {
8566 restore_fpr (frame_pointer,
8567 offset + next_offset, i);
8568 cfa_restores
8569 = alloc_reg_note (REG_CFA_RESTORE,
8570 gen_rtx_REG (DFmode, i), cfa_restores);
8571 next_offset += 8;
8572 }
8573 }
8574 }
8575
8576 }
8577 else
8578 {
8579 next_offset = cfun_frame_layout.f4_offset;
8580 for (i = 18; i < 20; i++)
8581 {
8582 if (cfun_fpr_bit_p (i - 16))
8583 {
8584 restore_fpr (frame_pointer,
8585 offset + next_offset, i);
8586 cfa_restores
8587 = alloc_reg_note (REG_CFA_RESTORE,
8588 gen_rtx_REG (DFmode, i), cfa_restores);
8589 next_offset += 8;
8590 }
8591 else if (!TARGET_PACKED_STACK)
8592 next_offset += 8;
8593 }
8594
8595 }
8596
8597 /* Return register. */
8598
8599 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8600
8601 /* Restore call saved gprs. */
8602
8603 if (cfun_frame_layout.first_restore_gpr != -1)
8604 {
8605 rtx insn, addr;
8606 int i;
8607
8608 /* Check for global register and save them
8609 to stack location from where they get restored. */
8610
8611 for (i = cfun_frame_layout.first_restore_gpr;
8612 i <= cfun_frame_layout.last_restore_gpr;
8613 i++)
8614 {
8615 if (global_not_special_regno_p (i))
8616 {
8617 addr = plus_constant (Pmode, frame_pointer,
8618 offset + cfun_frame_layout.gprs_offset
8619 + (i - cfun_frame_layout.first_save_gpr_slot)
8620 * UNITS_PER_LONG);
8621 addr = gen_rtx_MEM (Pmode, addr);
8622 set_mem_alias_set (addr, get_frame_alias_set ());
8623 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
8624 }
8625 else
8626 cfa_restores
8627 = alloc_reg_note (REG_CFA_RESTORE,
8628 gen_rtx_REG (Pmode, i), cfa_restores);
8629 }
8630
8631 if (! sibcall)
8632 {
8633 /* Fetch return address from stack before load multiple,
8634 this will do good for scheduling. */
8635
8636 if (cfun_frame_layout.save_return_addr_p
8637 || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
8638 && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
8639 {
8640 int return_regnum = find_unused_clobbered_reg();
8641 if (!return_regnum)
8642 return_regnum = 4;
8643 return_reg = gen_rtx_REG (Pmode, return_regnum);
8644
8645 addr = plus_constant (Pmode, frame_pointer,
8646 offset + cfun_frame_layout.gprs_offset
8647 + (RETURN_REGNUM
8648 - cfun_frame_layout.first_save_gpr_slot)
8649 * UNITS_PER_LONG);
8650 addr = gen_rtx_MEM (Pmode, addr);
8651 set_mem_alias_set (addr, get_frame_alias_set ());
8652 emit_move_insn (return_reg, addr);
8653 }
8654 }
8655
8656 insn = restore_gprs (frame_pointer,
8657 offset + cfun_frame_layout.gprs_offset
8658 + (cfun_frame_layout.first_restore_gpr
8659 - cfun_frame_layout.first_save_gpr_slot)
8660 * UNITS_PER_LONG,
8661 cfun_frame_layout.first_restore_gpr,
8662 cfun_frame_layout.last_restore_gpr);
8663 insn = emit_insn (insn);
8664 REG_NOTES (insn) = cfa_restores;
8665 add_reg_note (insn, REG_CFA_DEF_CFA,
8666 plus_constant (Pmode, stack_pointer_rtx,
8667 STACK_POINTER_OFFSET));
8668 RTX_FRAME_RELATED_P (insn) = 1;
8669 }
8670
8671 if (! sibcall)
8672 {
8673
8674 /* Return to caller. */
8675
8676 p = rtvec_alloc (2);
8677
8678 RTVEC_ELT (p, 0) = ret_rtx;
8679 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
8680 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
8681 }
8682 }
8683
8684
8685 /* Return the size in bytes of a function argument of
8686 type TYPE and/or mode MODE. At least one of TYPE or
8687 MODE must be specified. */
8688
8689 static int
8690 s390_function_arg_size (enum machine_mode mode, const_tree type)
8691 {
8692 if (type)
8693 return int_size_in_bytes (type);
8694
8695 /* No type info available for some library calls ... */
8696 if (mode != BLKmode)
8697 return GET_MODE_SIZE (mode);
8698
8699 /* If we have neither type nor mode, abort */
8700 gcc_unreachable ();
8701 }
8702
8703 /* Return true if a function argument of type TYPE and mode MODE
8704 is to be passed in a floating-point register, if available. */
8705
8706 static bool
8707 s390_function_arg_float (enum machine_mode mode, const_tree type)
8708 {
8709 int size = s390_function_arg_size (mode, type);
8710 if (size > 8)
8711 return false;
8712
8713 /* Soft-float changes the ABI: no floating-point registers are used. */
8714 if (TARGET_SOFT_FLOAT)
8715 return false;
8716
8717 /* No type info available for some library calls ... */
8718 if (!type)
8719 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
8720
8721 /* The ABI says that record types with a single member are treated
8722 just like that member would be. */
8723 while (TREE_CODE (type) == RECORD_TYPE)
8724 {
8725 tree field, single = NULL_TREE;
8726
8727 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8728 {
8729 if (TREE_CODE (field) != FIELD_DECL)
8730 continue;
8731
8732 if (single == NULL_TREE)
8733 single = TREE_TYPE (field);
8734 else
8735 return false;
8736 }
8737
8738 if (single == NULL_TREE)
8739 return false;
8740 else
8741 type = single;
8742 }
8743
8744 return TREE_CODE (type) == REAL_TYPE;
8745 }
8746
8747 /* Return true if a function argument of type TYPE and mode MODE
8748 is to be passed in an integer register, or a pair of integer
8749 registers, if available. */
8750
8751 static bool
8752 s390_function_arg_integer (enum machine_mode mode, const_tree type)
8753 {
8754 int size = s390_function_arg_size (mode, type);
8755 if (size > 8)
8756 return false;
8757
8758 /* No type info available for some library calls ... */
8759 if (!type)
8760 return GET_MODE_CLASS (mode) == MODE_INT
8761 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
8762
8763 /* We accept small integral (and similar) types. */
8764 if (INTEGRAL_TYPE_P (type)
8765 || POINTER_TYPE_P (type)
8766 || TREE_CODE (type) == NULLPTR_TYPE
8767 || TREE_CODE (type) == OFFSET_TYPE
8768 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
8769 return true;
8770
8771 /* We also accept structs of size 1, 2, 4, 8 that are not
8772 passed in floating-point registers. */
8773 if (AGGREGATE_TYPE_P (type)
8774 && exact_log2 (size) >= 0
8775 && !s390_function_arg_float (mode, type))
8776 return true;
8777
8778 return false;
8779 }
8780
8781 /* Return 1 if a function argument of type TYPE and mode MODE
8782 is to be passed by reference. The ABI specifies that only
8783 structures of size 1, 2, 4, or 8 bytes are passed by value,
8784 all other structures (and complex numbers) are passed by
8785 reference. */
8786
8787 static bool
8788 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
8789 enum machine_mode mode, const_tree type,
8790 bool named ATTRIBUTE_UNUSED)
8791 {
8792 int size = s390_function_arg_size (mode, type);
8793 if (size > 8)
8794 return true;
8795
8796 if (type)
8797 {
8798 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
8799 return 1;
8800
8801 if (TREE_CODE (type) == COMPLEX_TYPE
8802 || TREE_CODE (type) == VECTOR_TYPE)
8803 return 1;
8804 }
8805
8806 return 0;
8807 }
8808
8809 /* Update the data in CUM to advance over an argument of mode MODE and
8810 data type TYPE. (TYPE is null for libcalls where that information
8811 may not be available.). The boolean NAMED specifies whether the
8812 argument is a named argument (as opposed to an unnamed argument
8813 matching an ellipsis). */
8814
8815 static void
8816 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
8817 const_tree type, bool named ATTRIBUTE_UNUSED)
8818 {
8819 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8820
8821 if (s390_function_arg_float (mode, type))
8822 {
8823 cum->fprs += 1;
8824 }
8825 else if (s390_function_arg_integer (mode, type))
8826 {
8827 int size = s390_function_arg_size (mode, type);
8828 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
8829 }
8830 else
8831 gcc_unreachable ();
8832 }
8833
8834 /* Define where to put the arguments to a function.
8835 Value is zero to push the argument on the stack,
8836 or a hard register in which to store the argument.
8837
8838 MODE is the argument's machine mode.
8839 TYPE is the data type of the argument (as a tree).
8840 This is null for libcalls where that information may
8841 not be available.
8842 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8843 the preceding args and about the function being called.
8844 NAMED is nonzero if this argument is a named parameter
8845 (otherwise it is an extra parameter matching an ellipsis).
8846
8847 On S/390, we use general purpose registers 2 through 6 to
8848 pass integer, pointer, and certain structure arguments, and
8849 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
8850 to pass floating point arguments. All remaining arguments
8851 are pushed to the stack. */
8852
8853 static rtx
8854 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
8855 const_tree type, bool named ATTRIBUTE_UNUSED)
8856 {
8857 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8858
8859 if (s390_function_arg_float (mode, type))
8860 {
8861 if (cum->fprs + 1 > FP_ARG_NUM_REG)
8862 return 0;
8863 else
8864 return gen_rtx_REG (mode, cum->fprs + 16);
8865 }
8866 else if (s390_function_arg_integer (mode, type))
8867 {
8868 int size = s390_function_arg_size (mode, type);
8869 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
8870
8871 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
8872 return 0;
8873 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
8874 return gen_rtx_REG (mode, cum->gprs + 2);
8875 else if (n_gprs == 2)
8876 {
8877 rtvec p = rtvec_alloc (2);
8878
8879 RTVEC_ELT (p, 0)
8880 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
8881 const0_rtx);
8882 RTVEC_ELT (p, 1)
8883 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
8884 GEN_INT (4));
8885
8886 return gen_rtx_PARALLEL (mode, p);
8887 }
8888 }
8889
8890 /* After the real arguments, expand_call calls us once again
8891 with a void_type_node type. Whatever we return here is
8892 passed as operand 2 to the call expanders.
8893
8894 We don't need this feature ... */
8895 else if (type == void_type_node)
8896 return const0_rtx;
8897
8898 gcc_unreachable ();
8899 }
8900
8901 /* Return true if return values of type TYPE should be returned
8902 in a memory buffer whose address is passed by the caller as
8903 hidden first argument. */
8904
8905 static bool
8906 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
8907 {
8908 /* We accept small integral (and similar) types. */
8909 if (INTEGRAL_TYPE_P (type)
8910 || POINTER_TYPE_P (type)
8911 || TREE_CODE (type) == OFFSET_TYPE
8912 || TREE_CODE (type) == REAL_TYPE)
8913 return int_size_in_bytes (type) > 8;
8914
8915 /* Aggregates and similar constructs are always returned
8916 in memory. */
8917 if (AGGREGATE_TYPE_P (type)
8918 || TREE_CODE (type) == COMPLEX_TYPE
8919 || TREE_CODE (type) == VECTOR_TYPE)
8920 return true;
8921
8922 /* ??? We get called on all sorts of random stuff from
8923 aggregate_value_p. We can't abort, but it's not clear
8924 what's safe to return. Pretend it's a struct I guess. */
8925 return true;
8926 }
8927
8928 /* Function arguments and return values are promoted to word size. */
8929
8930 static enum machine_mode
8931 s390_promote_function_mode (const_tree type, enum machine_mode mode,
8932 int *punsignedp,
8933 const_tree fntype ATTRIBUTE_UNUSED,
8934 int for_return ATTRIBUTE_UNUSED)
8935 {
8936 if (INTEGRAL_MODE_P (mode)
8937 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
8938 {
8939 if (type != NULL_TREE && POINTER_TYPE_P (type))
8940 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8941 return Pmode;
8942 }
8943
8944 return mode;
8945 }
8946
8947 /* Define where to return a (scalar) value of type RET_TYPE.
8948 If RET_TYPE is null, define where to return a (scalar)
8949 value of mode MODE from a libcall. */
8950
8951 static rtx
8952 s390_function_and_libcall_value (enum machine_mode mode,
8953 const_tree ret_type,
8954 const_tree fntype_or_decl,
8955 bool outgoing ATTRIBUTE_UNUSED)
8956 {
8957 /* For normal functions perform the promotion as
8958 promote_function_mode would do. */
8959 if (ret_type)
8960 {
8961 int unsignedp = TYPE_UNSIGNED (ret_type);
8962 mode = promote_function_mode (ret_type, mode, &unsignedp,
8963 fntype_or_decl, 1);
8964 }
8965
8966 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
8967 gcc_assert (GET_MODE_SIZE (mode) <= 8);
8968
8969 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
8970 return gen_rtx_REG (mode, 16);
8971 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
8972 || UNITS_PER_LONG == UNITS_PER_WORD)
8973 return gen_rtx_REG (mode, 2);
8974 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
8975 {
8976 /* This case is triggered when returning a 64 bit value with
8977 -m31 -mzarch. Although the value would fit into a single
8978 register it has to be forced into a 32 bit register pair in
8979 order to match the ABI. */
8980 rtvec p = rtvec_alloc (2);
8981
8982 RTVEC_ELT (p, 0)
8983 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
8984 RTVEC_ELT (p, 1)
8985 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
8986
8987 return gen_rtx_PARALLEL (mode, p);
8988 }
8989
8990 gcc_unreachable ();
8991 }
8992
8993 /* Define where to return a scalar return value of type RET_TYPE. */
8994
8995 static rtx
8996 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
8997 bool outgoing)
8998 {
8999 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9000 fn_decl_or_type, outgoing);
9001 }
9002
9003 /* Define where to return a scalar libcall return value of mode
9004 MODE. */
9005
9006 static rtx
9007 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9008 {
9009 return s390_function_and_libcall_value (mode, NULL_TREE,
9010 NULL_TREE, true);
9011 }
9012
9013
9014 /* Create and return the va_list datatype.
9015
9016 On S/390, va_list is an array type equivalent to
9017
9018 typedef struct __va_list_tag
9019 {
9020 long __gpr;
9021 long __fpr;
9022 void *__overflow_arg_area;
9023 void *__reg_save_area;
9024 } va_list[1];
9025
9026 where __gpr and __fpr hold the number of general purpose
9027 or floating point arguments used up to now, respectively,
9028 __overflow_arg_area points to the stack location of the
9029 next argument passed on the stack, and __reg_save_area
9030 always points to the start of the register area in the
9031 call frame of the current function. The function prologue
9032 saves all registers used for argument passing into this
9033 area if the function uses variable arguments. */
9034
9035 static tree
9036 s390_build_builtin_va_list (void)
9037 {
9038 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9039
9040 record = lang_hooks.types.make_type (RECORD_TYPE);
9041
9042 type_decl =
9043 build_decl (BUILTINS_LOCATION,
9044 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9045
9046 f_gpr = build_decl (BUILTINS_LOCATION,
9047 FIELD_DECL, get_identifier ("__gpr"),
9048 long_integer_type_node);
9049 f_fpr = build_decl (BUILTINS_LOCATION,
9050 FIELD_DECL, get_identifier ("__fpr"),
9051 long_integer_type_node);
9052 f_ovf = build_decl (BUILTINS_LOCATION,
9053 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9054 ptr_type_node);
9055 f_sav = build_decl (BUILTINS_LOCATION,
9056 FIELD_DECL, get_identifier ("__reg_save_area"),
9057 ptr_type_node);
9058
9059 va_list_gpr_counter_field = f_gpr;
9060 va_list_fpr_counter_field = f_fpr;
9061
9062 DECL_FIELD_CONTEXT (f_gpr) = record;
9063 DECL_FIELD_CONTEXT (f_fpr) = record;
9064 DECL_FIELD_CONTEXT (f_ovf) = record;
9065 DECL_FIELD_CONTEXT (f_sav) = record;
9066
9067 TYPE_STUB_DECL (record) = type_decl;
9068 TYPE_NAME (record) = type_decl;
9069 TYPE_FIELDS (record) = f_gpr;
9070 DECL_CHAIN (f_gpr) = f_fpr;
9071 DECL_CHAIN (f_fpr) = f_ovf;
9072 DECL_CHAIN (f_ovf) = f_sav;
9073
9074 layout_type (record);
9075
9076 /* The correct type is an array type of one element. */
9077 return build_array_type (record, build_index_type (size_zero_node));
9078 }
9079
9080 /* Implement va_start by filling the va_list structure VALIST.
9081 STDARG_P is always true, and ignored.
9082 NEXTARG points to the first anonymous stack argument.
9083
9084 The following global variables are used to initialize
9085 the va_list structure:
9086
9087 crtl->args.info:
9088 holds number of gprs and fprs used for named arguments.
9089 crtl->args.arg_offset_rtx:
9090 holds the offset of the first anonymous stack argument
9091 (relative to the virtual arg pointer). */
9092
9093 static void
9094 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9095 {
9096 HOST_WIDE_INT n_gpr, n_fpr;
9097 int off;
9098 tree f_gpr, f_fpr, f_ovf, f_sav;
9099 tree gpr, fpr, ovf, sav, t;
9100
9101 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9102 f_fpr = DECL_CHAIN (f_gpr);
9103 f_ovf = DECL_CHAIN (f_fpr);
9104 f_sav = DECL_CHAIN (f_ovf);
9105
9106 valist = build_simple_mem_ref (valist);
9107 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9108 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9109 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9110 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9111
9112 /* Count number of gp and fp argument registers used. */
9113
9114 n_gpr = crtl->args.info.gprs;
9115 n_fpr = crtl->args.info.fprs;
9116
9117 if (cfun->va_list_gpr_size)
9118 {
9119 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9120 build_int_cst (NULL_TREE, n_gpr));
9121 TREE_SIDE_EFFECTS (t) = 1;
9122 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9123 }
9124
9125 if (cfun->va_list_fpr_size)
9126 {
9127 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9128 build_int_cst (NULL_TREE, n_fpr));
9129 TREE_SIDE_EFFECTS (t) = 1;
9130 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9131 }
9132
9133 /* Find the overflow area. */
9134 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9135 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9136 {
9137 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9138
9139 off = INTVAL (crtl->args.arg_offset_rtx);
9140 off = off < 0 ? 0 : off;
9141 if (TARGET_DEBUG_ARG)
9142 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9143 (int)n_gpr, (int)n_fpr, off);
9144
9145 t = fold_build_pointer_plus_hwi (t, off);
9146
9147 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9148 TREE_SIDE_EFFECTS (t) = 1;
9149 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9150 }
9151
9152 /* Find the register save area. */
9153 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9154 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9155 {
9156 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9157 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9158
9159 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9160 TREE_SIDE_EFFECTS (t) = 1;
9161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9162 }
9163 }
9164
9165 /* Implement va_arg by updating the va_list structure
9166 VALIST as required to retrieve an argument of type
9167 TYPE, and returning that argument.
9168
9169 Generates code equivalent to:
9170
9171 if (integral value) {
9172 if (size <= 4 && args.gpr < 5 ||
9173 size > 4 && args.gpr < 4 )
9174 ret = args.reg_save_area[args.gpr+8]
9175 else
9176 ret = *args.overflow_arg_area++;
9177 } else if (float value) {
9178 if (args.fgpr < 2)
9179 ret = args.reg_save_area[args.fpr+64]
9180 else
9181 ret = *args.overflow_arg_area++;
9182 } else if (aggregate value) {
9183 if (args.gpr < 5)
9184 ret = *args.reg_save_area[args.gpr]
9185 else
9186 ret = **args.overflow_arg_area++;
9187 } */
9188
9189 static tree
9190 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9191 gimple_seq *post_p ATTRIBUTE_UNUSED)
9192 {
9193 tree f_gpr, f_fpr, f_ovf, f_sav;
9194 tree gpr, fpr, ovf, sav, reg, t, u;
9195 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9196 tree lab_false, lab_over, addr;
9197
9198 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9199 f_fpr = DECL_CHAIN (f_gpr);
9200 f_ovf = DECL_CHAIN (f_fpr);
9201 f_sav = DECL_CHAIN (f_ovf);
9202
9203 valist = build_va_arg_indirect_ref (valist);
9204 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9205 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9206 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9207
9208 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9209 both appear on a lhs. */
9210 valist = unshare_expr (valist);
9211 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9212
9213 size = int_size_in_bytes (type);
9214
9215 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9216 {
9217 if (TARGET_DEBUG_ARG)
9218 {
9219 fprintf (stderr, "va_arg: aggregate type");
9220 debug_tree (type);
9221 }
9222
9223 /* Aggregates are passed by reference. */
9224 indirect_p = 1;
9225 reg = gpr;
9226 n_reg = 1;
9227
9228 /* kernel stack layout on 31 bit: It is assumed here that no padding
9229 will be added by s390_frame_info because for va_args always an even
9230 number of gprs has to be saved r15-r2 = 14 regs. */
9231 sav_ofs = 2 * UNITS_PER_LONG;
9232 sav_scale = UNITS_PER_LONG;
9233 size = UNITS_PER_LONG;
9234 max_reg = GP_ARG_NUM_REG - n_reg;
9235 }
9236 else if (s390_function_arg_float (TYPE_MODE (type), type))
9237 {
9238 if (TARGET_DEBUG_ARG)
9239 {
9240 fprintf (stderr, "va_arg: float type");
9241 debug_tree (type);
9242 }
9243
9244 /* FP args go in FP registers, if present. */
9245 indirect_p = 0;
9246 reg = fpr;
9247 n_reg = 1;
9248 sav_ofs = 16 * UNITS_PER_LONG;
9249 sav_scale = 8;
9250 max_reg = FP_ARG_NUM_REG - n_reg;
9251 }
9252 else
9253 {
9254 if (TARGET_DEBUG_ARG)
9255 {
9256 fprintf (stderr, "va_arg: other type");
9257 debug_tree (type);
9258 }
9259
9260 /* Otherwise into GP registers. */
9261 indirect_p = 0;
9262 reg = gpr;
9263 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9264
9265 /* kernel stack layout on 31 bit: It is assumed here that no padding
9266 will be added by s390_frame_info because for va_args always an even
9267 number of gprs has to be saved r15-r2 = 14 regs. */
9268 sav_ofs = 2 * UNITS_PER_LONG;
9269
9270 if (size < UNITS_PER_LONG)
9271 sav_ofs += UNITS_PER_LONG - size;
9272
9273 sav_scale = UNITS_PER_LONG;
9274 max_reg = GP_ARG_NUM_REG - n_reg;
9275 }
9276
9277 /* Pull the value out of the saved registers ... */
9278
9279 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9280 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9281 addr = create_tmp_var (ptr_type_node, "addr");
9282
9283 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9284 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9285 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9286 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9287 gimplify_and_add (t, pre_p);
9288
9289 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9290 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9291 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9292 t = fold_build_pointer_plus (t, u);
9293
9294 gimplify_assign (addr, t, pre_p);
9295
9296 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9297
9298 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9299
9300
9301 /* ... Otherwise out of the overflow area. */
9302
9303 t = ovf;
9304 if (size < UNITS_PER_LONG)
9305 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9306
9307 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9308
9309 gimplify_assign (addr, t, pre_p);
9310
9311 t = fold_build_pointer_plus_hwi (t, size);
9312 gimplify_assign (ovf, t, pre_p);
9313
9314 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9315
9316
9317 /* Increment register save count. */
9318
9319 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9320 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9321 gimplify_and_add (u, pre_p);
9322
9323 if (indirect_p)
9324 {
9325 t = build_pointer_type_for_mode (build_pointer_type (type),
9326 ptr_mode, true);
9327 addr = fold_convert (t, addr);
9328 addr = build_va_arg_indirect_ref (addr);
9329 }
9330 else
9331 {
9332 t = build_pointer_type_for_mode (type, ptr_mode, true);
9333 addr = fold_convert (t, addr);
9334 }
9335
9336 return build_va_arg_indirect_ref (addr);
9337 }
9338
9339 /* Output assembly code for the trampoline template to
9340 stdio stream FILE.
9341
9342 On S/390, we use gpr 1 internally in the trampoline code;
9343 gpr 0 is used to hold the static chain. */
9344
9345 static void
9346 s390_asm_trampoline_template (FILE *file)
9347 {
9348 rtx op[2];
9349 op[0] = gen_rtx_REG (Pmode, 0);
9350 op[1] = gen_rtx_REG (Pmode, 1);
9351
9352 if (TARGET_64BIT)
9353 {
9354 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9355 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
9356 output_asm_insn ("br\t%1", op); /* 2 byte */
9357 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
9358 }
9359 else
9360 {
9361 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9362 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
9363 output_asm_insn ("br\t%1", op); /* 2 byte */
9364 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
9365 }
9366 }
9367
9368 /* Emit RTL insns to initialize the variable parts of a trampoline.
9369 FNADDR is an RTX for the address of the function's pure code.
9370 CXT is an RTX for the static chain value for the function. */
9371
9372 static void
9373 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9374 {
9375 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9376 rtx mem;
9377
9378 emit_block_move (m_tramp, assemble_trampoline_template (),
9379 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
9380
9381 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
9382 emit_move_insn (mem, cxt);
9383 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
9384 emit_move_insn (mem, fnaddr);
9385 }
9386
9387 /* Output assembler code to FILE to increment profiler label # LABELNO
9388 for profiling a function entry. */
9389
9390 void
9391 s390_function_profiler (FILE *file, int labelno)
9392 {
9393 rtx op[7];
9394
9395 char label[128];
9396 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
9397
9398 fprintf (file, "# function profiler \n");
9399
9400 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
9401 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
9402 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
9403
9404 op[2] = gen_rtx_REG (Pmode, 1);
9405 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
9406 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
9407
9408 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
9409 if (flag_pic)
9410 {
9411 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
9412 op[4] = gen_rtx_CONST (Pmode, op[4]);
9413 }
9414
9415 if (TARGET_64BIT)
9416 {
9417 output_asm_insn ("stg\t%0,%1", op);
9418 output_asm_insn ("larl\t%2,%3", op);
9419 output_asm_insn ("brasl\t%0,%4", op);
9420 output_asm_insn ("lg\t%0,%1", op);
9421 }
9422 else if (!flag_pic)
9423 {
9424 op[6] = gen_label_rtx ();
9425
9426 output_asm_insn ("st\t%0,%1", op);
9427 output_asm_insn ("bras\t%2,%l6", op);
9428 output_asm_insn (".long\t%4", op);
9429 output_asm_insn (".long\t%3", op);
9430 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9431 output_asm_insn ("l\t%0,0(%2)", op);
9432 output_asm_insn ("l\t%2,4(%2)", op);
9433 output_asm_insn ("basr\t%0,%0", op);
9434 output_asm_insn ("l\t%0,%1", op);
9435 }
9436 else
9437 {
9438 op[5] = gen_label_rtx ();
9439 op[6] = gen_label_rtx ();
9440
9441 output_asm_insn ("st\t%0,%1", op);
9442 output_asm_insn ("bras\t%2,%l6", op);
9443 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
9444 output_asm_insn (".long\t%4-%l5", op);
9445 output_asm_insn (".long\t%3-%l5", op);
9446 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9447 output_asm_insn ("lr\t%0,%2", op);
9448 output_asm_insn ("a\t%0,0(%2)", op);
9449 output_asm_insn ("a\t%2,4(%2)", op);
9450 output_asm_insn ("basr\t%0,%0", op);
9451 output_asm_insn ("l\t%0,%1", op);
9452 }
9453 }
9454
9455 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
9456 into its SYMBOL_REF_FLAGS. */
9457
9458 static void
9459 s390_encode_section_info (tree decl, rtx rtl, int first)
9460 {
9461 default_encode_section_info (decl, rtl, first);
9462
9463 if (TREE_CODE (decl) == VAR_DECL)
9464 {
9465 /* If a variable has a forced alignment to < 2 bytes, mark it
9466 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
9467 operand. */
9468 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
9469 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
9470 if (!DECL_SIZE (decl)
9471 || !DECL_ALIGN (decl)
9472 || !host_integerp (DECL_SIZE (decl), 0)
9473 || (DECL_ALIGN (decl) <= 64
9474 && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
9475 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9476 }
9477
9478 /* Literal pool references don't have a decl so they are handled
9479 differently here. We rely on the information in the MEM_ALIGN
9480 entry to decide upon natural alignment. */
9481 if (MEM_P (rtl)
9482 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
9483 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
9484 && (MEM_ALIGN (rtl) == 0
9485 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
9486 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
9487 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9488 }
9489
9490 /* Output thunk to FILE that implements a C++ virtual function call (with
9491 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
9492 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
9493 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
9494 relative to the resulting this pointer. */
9495
9496 static void
9497 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9498 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9499 tree function)
9500 {
9501 rtx op[10];
9502 int nonlocal = 0;
9503
9504 /* Make sure unwind info is emitted for the thunk if needed. */
9505 final_start_function (emit_barrier (), file, 1);
9506
9507 /* Operand 0 is the target function. */
9508 op[0] = XEXP (DECL_RTL (function), 0);
9509 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
9510 {
9511 nonlocal = 1;
9512 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
9513 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
9514 op[0] = gen_rtx_CONST (Pmode, op[0]);
9515 }
9516
9517 /* Operand 1 is the 'this' pointer. */
9518 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9519 op[1] = gen_rtx_REG (Pmode, 3);
9520 else
9521 op[1] = gen_rtx_REG (Pmode, 2);
9522
9523 /* Operand 2 is the delta. */
9524 op[2] = GEN_INT (delta);
9525
9526 /* Operand 3 is the vcall_offset. */
9527 op[3] = GEN_INT (vcall_offset);
9528
9529 /* Operand 4 is the temporary register. */
9530 op[4] = gen_rtx_REG (Pmode, 1);
9531
9532 /* Operands 5 to 8 can be used as labels. */
9533 op[5] = NULL_RTX;
9534 op[6] = NULL_RTX;
9535 op[7] = NULL_RTX;
9536 op[8] = NULL_RTX;
9537
9538 /* Operand 9 can be used for temporary register. */
9539 op[9] = NULL_RTX;
9540
9541 /* Generate code. */
9542 if (TARGET_64BIT)
9543 {
9544 /* Setup literal pool pointer if required. */
9545 if ((!DISP_IN_RANGE (delta)
9546 && !CONST_OK_FOR_K (delta)
9547 && !CONST_OK_FOR_Os (delta))
9548 || (!DISP_IN_RANGE (vcall_offset)
9549 && !CONST_OK_FOR_K (vcall_offset)
9550 && !CONST_OK_FOR_Os (vcall_offset)))
9551 {
9552 op[5] = gen_label_rtx ();
9553 output_asm_insn ("larl\t%4,%5", op);
9554 }
9555
9556 /* Add DELTA to this pointer. */
9557 if (delta)
9558 {
9559 if (CONST_OK_FOR_J (delta))
9560 output_asm_insn ("la\t%1,%2(%1)", op);
9561 else if (DISP_IN_RANGE (delta))
9562 output_asm_insn ("lay\t%1,%2(%1)", op);
9563 else if (CONST_OK_FOR_K (delta))
9564 output_asm_insn ("aghi\t%1,%2", op);
9565 else if (CONST_OK_FOR_Os (delta))
9566 output_asm_insn ("agfi\t%1,%2", op);
9567 else
9568 {
9569 op[6] = gen_label_rtx ();
9570 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
9571 }
9572 }
9573
9574 /* Perform vcall adjustment. */
9575 if (vcall_offset)
9576 {
9577 if (DISP_IN_RANGE (vcall_offset))
9578 {
9579 output_asm_insn ("lg\t%4,0(%1)", op);
9580 output_asm_insn ("ag\t%1,%3(%4)", op);
9581 }
9582 else if (CONST_OK_FOR_K (vcall_offset))
9583 {
9584 output_asm_insn ("lghi\t%4,%3", op);
9585 output_asm_insn ("ag\t%4,0(%1)", op);
9586 output_asm_insn ("ag\t%1,0(%4)", op);
9587 }
9588 else if (CONST_OK_FOR_Os (vcall_offset))
9589 {
9590 output_asm_insn ("lgfi\t%4,%3", op);
9591 output_asm_insn ("ag\t%4,0(%1)", op);
9592 output_asm_insn ("ag\t%1,0(%4)", op);
9593 }
9594 else
9595 {
9596 op[7] = gen_label_rtx ();
9597 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
9598 output_asm_insn ("ag\t%4,0(%1)", op);
9599 output_asm_insn ("ag\t%1,0(%4)", op);
9600 }
9601 }
9602
9603 /* Jump to target. */
9604 output_asm_insn ("jg\t%0", op);
9605
9606 /* Output literal pool if required. */
9607 if (op[5])
9608 {
9609 output_asm_insn (".align\t4", op);
9610 targetm.asm_out.internal_label (file, "L",
9611 CODE_LABEL_NUMBER (op[5]));
9612 }
9613 if (op[6])
9614 {
9615 targetm.asm_out.internal_label (file, "L",
9616 CODE_LABEL_NUMBER (op[6]));
9617 output_asm_insn (".long\t%2", op);
9618 }
9619 if (op[7])
9620 {
9621 targetm.asm_out.internal_label (file, "L",
9622 CODE_LABEL_NUMBER (op[7]));
9623 output_asm_insn (".long\t%3", op);
9624 }
9625 }
9626 else
9627 {
9628 /* Setup base pointer if required. */
9629 if (!vcall_offset
9630 || (!DISP_IN_RANGE (delta)
9631 && !CONST_OK_FOR_K (delta)
9632 && !CONST_OK_FOR_Os (delta))
9633 || (!DISP_IN_RANGE (delta)
9634 && !CONST_OK_FOR_K (vcall_offset)
9635 && !CONST_OK_FOR_Os (vcall_offset)))
9636 {
9637 op[5] = gen_label_rtx ();
9638 output_asm_insn ("basr\t%4,0", op);
9639 targetm.asm_out.internal_label (file, "L",
9640 CODE_LABEL_NUMBER (op[5]));
9641 }
9642
9643 /* Add DELTA to this pointer. */
9644 if (delta)
9645 {
9646 if (CONST_OK_FOR_J (delta))
9647 output_asm_insn ("la\t%1,%2(%1)", op);
9648 else if (DISP_IN_RANGE (delta))
9649 output_asm_insn ("lay\t%1,%2(%1)", op);
9650 else if (CONST_OK_FOR_K (delta))
9651 output_asm_insn ("ahi\t%1,%2", op);
9652 else if (CONST_OK_FOR_Os (delta))
9653 output_asm_insn ("afi\t%1,%2", op);
9654 else
9655 {
9656 op[6] = gen_label_rtx ();
9657 output_asm_insn ("a\t%1,%6-%5(%4)", op);
9658 }
9659 }
9660
9661 /* Perform vcall adjustment. */
9662 if (vcall_offset)
9663 {
9664 if (CONST_OK_FOR_J (vcall_offset))
9665 {
9666 output_asm_insn ("l\t%4,0(%1)", op);
9667 output_asm_insn ("a\t%1,%3(%4)", op);
9668 }
9669 else if (DISP_IN_RANGE (vcall_offset))
9670 {
9671 output_asm_insn ("l\t%4,0(%1)", op);
9672 output_asm_insn ("ay\t%1,%3(%4)", op);
9673 }
9674 else if (CONST_OK_FOR_K (vcall_offset))
9675 {
9676 output_asm_insn ("lhi\t%4,%3", op);
9677 output_asm_insn ("a\t%4,0(%1)", op);
9678 output_asm_insn ("a\t%1,0(%4)", op);
9679 }
9680 else if (CONST_OK_FOR_Os (vcall_offset))
9681 {
9682 output_asm_insn ("iilf\t%4,%3", op);
9683 output_asm_insn ("a\t%4,0(%1)", op);
9684 output_asm_insn ("a\t%1,0(%4)", op);
9685 }
9686 else
9687 {
9688 op[7] = gen_label_rtx ();
9689 output_asm_insn ("l\t%4,%7-%5(%4)", op);
9690 output_asm_insn ("a\t%4,0(%1)", op);
9691 output_asm_insn ("a\t%1,0(%4)", op);
9692 }
9693
9694 /* We had to clobber the base pointer register.
9695 Re-setup the base pointer (with a different base). */
9696 op[5] = gen_label_rtx ();
9697 output_asm_insn ("basr\t%4,0", op);
9698 targetm.asm_out.internal_label (file, "L",
9699 CODE_LABEL_NUMBER (op[5]));
9700 }
9701
9702 /* Jump to target. */
9703 op[8] = gen_label_rtx ();
9704
9705 if (!flag_pic)
9706 output_asm_insn ("l\t%4,%8-%5(%4)", op);
9707 else if (!nonlocal)
9708 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9709 /* We cannot call through .plt, since .plt requires %r12 loaded. */
9710 else if (flag_pic == 1)
9711 {
9712 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9713 output_asm_insn ("l\t%4,%0(%4)", op);
9714 }
9715 else if (flag_pic == 2)
9716 {
9717 op[9] = gen_rtx_REG (Pmode, 0);
9718 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
9719 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9720 output_asm_insn ("ar\t%4,%9", op);
9721 output_asm_insn ("l\t%4,0(%4)", op);
9722 }
9723
9724 output_asm_insn ("br\t%4", op);
9725
9726 /* Output literal pool. */
9727 output_asm_insn (".align\t4", op);
9728
9729 if (nonlocal && flag_pic == 2)
9730 output_asm_insn (".long\t%0", op);
9731 if (nonlocal)
9732 {
9733 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9734 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
9735 }
9736
9737 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
9738 if (!flag_pic)
9739 output_asm_insn (".long\t%0", op);
9740 else
9741 output_asm_insn (".long\t%0-%5", op);
9742
9743 if (op[6])
9744 {
9745 targetm.asm_out.internal_label (file, "L",
9746 CODE_LABEL_NUMBER (op[6]));
9747 output_asm_insn (".long\t%2", op);
9748 }
9749 if (op[7])
9750 {
9751 targetm.asm_out.internal_label (file, "L",
9752 CODE_LABEL_NUMBER (op[7]));
9753 output_asm_insn (".long\t%3", op);
9754 }
9755 }
9756 final_end_function ();
9757 }
9758
9759 static bool
9760 s390_valid_pointer_mode (enum machine_mode mode)
9761 {
9762 return (mode == SImode || (TARGET_64BIT && mode == DImode));
9763 }
9764
9765 /* Checks whether the given CALL_EXPR would use a caller
9766 saved register. This is used to decide whether sibling call
9767 optimization could be performed on the respective function
9768 call. */
9769
9770 static bool
9771 s390_call_saved_register_used (tree call_expr)
9772 {
9773 CUMULATIVE_ARGS cum_v;
9774 cumulative_args_t cum;
9775 tree parameter;
9776 enum machine_mode mode;
9777 tree type;
9778 rtx parm_rtx;
9779 int reg, i;
9780
9781 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
9782 cum = pack_cumulative_args (&cum_v);
9783
9784 for (i = 0; i < call_expr_nargs (call_expr); i++)
9785 {
9786 parameter = CALL_EXPR_ARG (call_expr, i);
9787 gcc_assert (parameter);
9788
9789 /* For an undeclared variable passed as parameter we will get
9790 an ERROR_MARK node here. */
9791 if (TREE_CODE (parameter) == ERROR_MARK)
9792 return true;
9793
9794 type = TREE_TYPE (parameter);
9795 gcc_assert (type);
9796
9797 mode = TYPE_MODE (type);
9798 gcc_assert (mode);
9799
9800 if (pass_by_reference (&cum_v, mode, type, true))
9801 {
9802 mode = Pmode;
9803 type = build_pointer_type (type);
9804 }
9805
9806 parm_rtx = s390_function_arg (cum, mode, type, 0);
9807
9808 s390_function_arg_advance (cum, mode, type, 0);
9809
9810 if (!parm_rtx)
9811 continue;
9812
9813 if (REG_P (parm_rtx))
9814 {
9815 for (reg = 0;
9816 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
9817 reg++)
9818 if (!call_used_regs[reg + REGNO (parm_rtx)])
9819 return true;
9820 }
9821
9822 if (GET_CODE (parm_rtx) == PARALLEL)
9823 {
9824 int i;
9825
9826 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
9827 {
9828 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
9829
9830 gcc_assert (REG_P (r));
9831
9832 for (reg = 0;
9833 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
9834 reg++)
9835 if (!call_used_regs[reg + REGNO (r)])
9836 return true;
9837 }
9838 }
9839
9840 }
9841 return false;
9842 }
9843
9844 /* Return true if the given call expression can be
9845 turned into a sibling call.
9846 DECL holds the declaration of the function to be called whereas
9847 EXP is the call expression itself. */
9848
9849 static bool
9850 s390_function_ok_for_sibcall (tree decl, tree exp)
9851 {
9852 /* The TPF epilogue uses register 1. */
9853 if (TARGET_TPF_PROFILING)
9854 return false;
9855
9856 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
9857 which would have to be restored before the sibcall. */
9858 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
9859 return false;
9860
9861 /* Register 6 on s390 is available as an argument register but unfortunately
9862 "caller saved". This makes functions needing this register for arguments
9863 not suitable for sibcalls. */
9864 return !s390_call_saved_register_used (exp);
9865 }
9866
9867 /* Return the fixed registers used for condition codes. */
9868
9869 static bool
9870 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9871 {
9872 *p1 = CC_REGNUM;
9873 *p2 = INVALID_REGNUM;
9874
9875 return true;
9876 }
9877
9878 /* This function is used by the call expanders of the machine description.
9879 It emits the call insn itself together with the necessary operations
9880 to adjust the target address and returns the emitted insn.
9881 ADDR_LOCATION is the target address rtx
9882 TLS_CALL the location of the thread-local symbol
9883 RESULT_REG the register where the result of the call should be stored
9884 RETADDR_REG the register where the return address should be stored
9885 If this parameter is NULL_RTX the call is considered
9886 to be a sibling call. */
9887
9888 rtx
9889 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
9890 rtx retaddr_reg)
9891 {
9892 bool plt_call = false;
9893 rtx insn;
9894 rtx call;
9895 rtx clobber;
9896 rtvec vec;
9897
9898 /* Direct function calls need special treatment. */
9899 if (GET_CODE (addr_location) == SYMBOL_REF)
9900 {
9901 /* When calling a global routine in PIC mode, we must
9902 replace the symbol itself with the PLT stub. */
9903 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
9904 {
9905 if (retaddr_reg != NULL_RTX)
9906 {
9907 addr_location = gen_rtx_UNSPEC (Pmode,
9908 gen_rtvec (1, addr_location),
9909 UNSPEC_PLT);
9910 addr_location = gen_rtx_CONST (Pmode, addr_location);
9911 plt_call = true;
9912 }
9913 else
9914 /* For -fpic code the PLT entries might use r12 which is
9915 call-saved. Therefore we cannot do a sibcall when
9916 calling directly using a symbol ref. When reaching
9917 this point we decided (in s390_function_ok_for_sibcall)
9918 to do a sibcall for a function pointer but one of the
9919 optimizers was able to get rid of the function pointer
9920 by propagating the symbol ref into the call. This
9921 optimization is illegal for S/390 so we turn the direct
9922 call into a indirect call again. */
9923 addr_location = force_reg (Pmode, addr_location);
9924 }
9925
9926 /* Unless we can use the bras(l) insn, force the
9927 routine address into a register. */
9928 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
9929 {
9930 if (flag_pic)
9931 addr_location = legitimize_pic_address (addr_location, 0);
9932 else
9933 addr_location = force_reg (Pmode, addr_location);
9934 }
9935 }
9936
9937 /* If it is already an indirect call or the code above moved the
9938 SYMBOL_REF to somewhere else make sure the address can be found in
9939 register 1. */
9940 if (retaddr_reg == NULL_RTX
9941 && GET_CODE (addr_location) != SYMBOL_REF
9942 && !plt_call)
9943 {
9944 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
9945 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
9946 }
9947
9948 addr_location = gen_rtx_MEM (QImode, addr_location);
9949 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
9950
9951 if (result_reg != NULL_RTX)
9952 call = gen_rtx_SET (VOIDmode, result_reg, call);
9953
9954 if (retaddr_reg != NULL_RTX)
9955 {
9956 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
9957
9958 if (tls_call != NULL_RTX)
9959 vec = gen_rtvec (3, call, clobber,
9960 gen_rtx_USE (VOIDmode, tls_call));
9961 else
9962 vec = gen_rtvec (2, call, clobber);
9963
9964 call = gen_rtx_PARALLEL (VOIDmode, vec);
9965 }
9966
9967 insn = emit_call_insn (call);
9968
9969 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
9970 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
9971 {
9972 /* s390_function_ok_for_sibcall should
9973 have denied sibcalls in this case. */
9974 gcc_assert (retaddr_reg != NULL_RTX);
9975 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
9976 }
9977 return insn;
9978 }
9979
9980 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9981
9982 static void
9983 s390_conditional_register_usage (void)
9984 {
9985 int i;
9986
9987 if (flag_pic)
9988 {
9989 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
9990 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
9991 }
9992 if (TARGET_CPU_ZARCH)
9993 {
9994 fixed_regs[BASE_REGNUM] = 0;
9995 call_used_regs[BASE_REGNUM] = 0;
9996 fixed_regs[RETURN_REGNUM] = 0;
9997 call_used_regs[RETURN_REGNUM] = 0;
9998 }
9999 if (TARGET_64BIT)
10000 {
10001 for (i = 24; i < 32; i++)
10002 call_used_regs[i] = call_really_used_regs[i] = 0;
10003 }
10004 else
10005 {
10006 for (i = 18; i < 20; i++)
10007 call_used_regs[i] = call_really_used_regs[i] = 0;
10008 }
10009
10010 if (TARGET_SOFT_FLOAT)
10011 {
10012 for (i = 16; i < 32; i++)
10013 call_used_regs[i] = fixed_regs[i] = 1;
10014 }
10015 }
10016
10017 /* Corresponding function to eh_return expander. */
10018
10019 static GTY(()) rtx s390_tpf_eh_return_symbol;
10020 void
10021 s390_emit_tpf_eh_return (rtx target)
10022 {
10023 rtx insn, reg;
10024
10025 if (!s390_tpf_eh_return_symbol)
10026 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10027
10028 reg = gen_rtx_REG (Pmode, 2);
10029
10030 emit_move_insn (reg, target);
10031 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10032 gen_rtx_REG (Pmode, RETURN_REGNUM));
10033 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10034
10035 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10036 }
10037
10038 /* Rework the prologue/epilogue to avoid saving/restoring
10039 registers unnecessarily. */
10040
10041 static void
10042 s390_optimize_prologue (void)
10043 {
10044 rtx insn, new_insn, next_insn;
10045
10046 /* Do a final recompute of the frame-related data. */
10047
10048 s390_update_frame_layout ();
10049
10050 /* If all special registers are in fact used, there's nothing we
10051 can do, so no point in walking the insn list. */
10052
10053 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10054 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10055 && (TARGET_CPU_ZARCH
10056 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10057 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10058 return;
10059
10060 /* Search for prologue/epilogue insns and replace them. */
10061
10062 for (insn = get_insns (); insn; insn = next_insn)
10063 {
10064 int first, last, off;
10065 rtx set, base, offset;
10066
10067 next_insn = NEXT_INSN (insn);
10068
10069 if (GET_CODE (insn) != INSN)
10070 continue;
10071
10072 if (GET_CODE (PATTERN (insn)) == PARALLEL
10073 && store_multiple_operation (PATTERN (insn), VOIDmode))
10074 {
10075 set = XVECEXP (PATTERN (insn), 0, 0);
10076 first = REGNO (SET_SRC (set));
10077 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10078 offset = const0_rtx;
10079 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10080 off = INTVAL (offset);
10081
10082 if (GET_CODE (base) != REG || off < 0)
10083 continue;
10084 if (cfun_frame_layout.first_save_gpr != -1
10085 && (cfun_frame_layout.first_save_gpr < first
10086 || cfun_frame_layout.last_save_gpr > last))
10087 continue;
10088 if (REGNO (base) != STACK_POINTER_REGNUM
10089 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10090 continue;
10091 if (first > BASE_REGNUM || last < BASE_REGNUM)
10092 continue;
10093
10094 if (cfun_frame_layout.first_save_gpr != -1)
10095 {
10096 new_insn = save_gprs (base,
10097 off + (cfun_frame_layout.first_save_gpr
10098 - first) * UNITS_PER_LONG,
10099 cfun_frame_layout.first_save_gpr,
10100 cfun_frame_layout.last_save_gpr);
10101 new_insn = emit_insn_before (new_insn, insn);
10102 INSN_ADDRESSES_NEW (new_insn, -1);
10103 }
10104
10105 remove_insn (insn);
10106 continue;
10107 }
10108
10109 if (cfun_frame_layout.first_save_gpr == -1
10110 && GET_CODE (PATTERN (insn)) == SET
10111 && GET_CODE (SET_SRC (PATTERN (insn))) == REG
10112 && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM
10113 || (!TARGET_CPU_ZARCH
10114 && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM))
10115 && GET_CODE (SET_DEST (PATTERN (insn))) == MEM)
10116 {
10117 set = PATTERN (insn);
10118 first = REGNO (SET_SRC (set));
10119 offset = const0_rtx;
10120 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10121 off = INTVAL (offset);
10122
10123 if (GET_CODE (base) != REG || off < 0)
10124 continue;
10125 if (REGNO (base) != STACK_POINTER_REGNUM
10126 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10127 continue;
10128
10129 remove_insn (insn);
10130 continue;
10131 }
10132
10133 if (GET_CODE (PATTERN (insn)) == PARALLEL
10134 && load_multiple_operation (PATTERN (insn), VOIDmode))
10135 {
10136 set = XVECEXP (PATTERN (insn), 0, 0);
10137 first = REGNO (SET_DEST (set));
10138 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10139 offset = const0_rtx;
10140 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10141 off = INTVAL (offset);
10142
10143 if (GET_CODE (base) != REG || off < 0)
10144 continue;
10145 if (cfun_frame_layout.first_restore_gpr != -1
10146 && (cfun_frame_layout.first_restore_gpr < first
10147 || cfun_frame_layout.last_restore_gpr > last))
10148 continue;
10149 if (REGNO (base) != STACK_POINTER_REGNUM
10150 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10151 continue;
10152 if (first > BASE_REGNUM || last < BASE_REGNUM)
10153 continue;
10154
10155 if (cfun_frame_layout.first_restore_gpr != -1)
10156 {
10157 new_insn = restore_gprs (base,
10158 off + (cfun_frame_layout.first_restore_gpr
10159 - first) * UNITS_PER_LONG,
10160 cfun_frame_layout.first_restore_gpr,
10161 cfun_frame_layout.last_restore_gpr);
10162 new_insn = emit_insn_before (new_insn, insn);
10163 INSN_ADDRESSES_NEW (new_insn, -1);
10164 }
10165
10166 remove_insn (insn);
10167 continue;
10168 }
10169
10170 if (cfun_frame_layout.first_restore_gpr == -1
10171 && GET_CODE (PATTERN (insn)) == SET
10172 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
10173 && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM
10174 || (!TARGET_CPU_ZARCH
10175 && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM))
10176 && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
10177 {
10178 set = PATTERN (insn);
10179 first = REGNO (SET_DEST (set));
10180 offset = const0_rtx;
10181 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10182 off = INTVAL (offset);
10183
10184 if (GET_CODE (base) != REG || off < 0)
10185 continue;
10186 if (REGNO (base) != STACK_POINTER_REGNUM
10187 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10188 continue;
10189
10190 remove_insn (insn);
10191 continue;
10192 }
10193 }
10194 }
10195
10196 /* On z10 and later the dynamic branch prediction must see the
10197 backward jump within a certain windows. If not it falls back to
10198 the static prediction. This function rearranges the loop backward
10199 branch in a way which makes the static prediction always correct.
10200 The function returns true if it added an instruction. */
10201 static bool
10202 s390_fix_long_loop_prediction (rtx insn)
10203 {
10204 rtx set = single_set (insn);
10205 rtx code_label, label_ref, new_label;
10206 rtx uncond_jump;
10207 rtx cur_insn;
10208 rtx tmp;
10209 int distance;
10210
10211 /* This will exclude branch on count and branch on index patterns
10212 since these are correctly statically predicted. */
10213 if (!set
10214 || SET_DEST (set) != pc_rtx
10215 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
10216 return false;
10217
10218 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
10219 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
10220
10221 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
10222
10223 code_label = XEXP (label_ref, 0);
10224
10225 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
10226 || INSN_ADDRESSES (INSN_UID (insn)) == -1
10227 || (INSN_ADDRESSES (INSN_UID (insn))
10228 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
10229 return false;
10230
10231 for (distance = 0, cur_insn = PREV_INSN (insn);
10232 distance < PREDICT_DISTANCE - 6;
10233 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
10234 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
10235 return false;
10236
10237 new_label = gen_label_rtx ();
10238 uncond_jump = emit_jump_insn_after (
10239 gen_rtx_SET (VOIDmode, pc_rtx,
10240 gen_rtx_LABEL_REF (VOIDmode, code_label)),
10241 insn);
10242 emit_label_after (new_label, uncond_jump);
10243
10244 tmp = XEXP (SET_SRC (set), 1);
10245 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
10246 XEXP (SET_SRC (set), 2) = tmp;
10247 INSN_CODE (insn) = -1;
10248
10249 XEXP (label_ref, 0) = new_label;
10250 JUMP_LABEL (insn) = new_label;
10251 JUMP_LABEL (uncond_jump) = code_label;
10252
10253 return true;
10254 }
10255
10256 /* Returns 1 if INSN reads the value of REG for purposes not related
10257 to addressing of memory, and 0 otherwise. */
10258 static int
10259 s390_non_addr_reg_read_p (rtx reg, rtx insn)
10260 {
10261 return reg_referenced_p (reg, PATTERN (insn))
10262 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
10263 }
10264
10265 /* Starting from INSN find_cond_jump looks downwards in the insn
10266 stream for a single jump insn which is the last user of the
10267 condition code set in INSN. */
10268 static rtx
10269 find_cond_jump (rtx insn)
10270 {
10271 for (; insn; insn = NEXT_INSN (insn))
10272 {
10273 rtx ite, cc;
10274
10275 if (LABEL_P (insn))
10276 break;
10277
10278 if (!JUMP_P (insn))
10279 {
10280 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
10281 break;
10282 continue;
10283 }
10284
10285 /* This will be triggered by a return. */
10286 if (GET_CODE (PATTERN (insn)) != SET)
10287 break;
10288
10289 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
10290 ite = SET_SRC (PATTERN (insn));
10291
10292 if (GET_CODE (ite) != IF_THEN_ELSE)
10293 break;
10294
10295 cc = XEXP (XEXP (ite, 0), 0);
10296 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
10297 break;
10298
10299 if (find_reg_note (insn, REG_DEAD, cc))
10300 return insn;
10301 break;
10302 }
10303
10304 return NULL_RTX;
10305 }
10306
10307 /* Swap the condition in COND and the operands in OP0 and OP1 so that
10308 the semantics does not change. If NULL_RTX is passed as COND the
10309 function tries to find the conditional jump starting with INSN. */
10310 static void
10311 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
10312 {
10313 rtx tmp = *op0;
10314
10315 if (cond == NULL_RTX)
10316 {
10317 rtx jump = find_cond_jump (NEXT_INSN (insn));
10318 jump = jump ? single_set (jump) : NULL_RTX;
10319
10320 if (jump == NULL_RTX)
10321 return;
10322
10323 cond = XEXP (XEXP (jump, 1), 0);
10324 }
10325
10326 *op0 = *op1;
10327 *op1 = tmp;
10328 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
10329 }
10330
10331 /* On z10, instructions of the compare-and-branch family have the
10332 property to access the register occurring as second operand with
10333 its bits complemented. If such a compare is grouped with a second
10334 instruction that accesses the same register non-complemented, and
10335 if that register's value is delivered via a bypass, then the
10336 pipeline recycles, thereby causing significant performance decline.
10337 This function locates such situations and exchanges the two
10338 operands of the compare. The function return true whenever it
10339 added an insn. */
10340 static bool
10341 s390_z10_optimize_cmp (rtx insn)
10342 {
10343 rtx prev_insn, next_insn;
10344 bool insn_added_p = false;
10345 rtx cond, *op0, *op1;
10346
10347 if (GET_CODE (PATTERN (insn)) == PARALLEL)
10348 {
10349 /* Handle compare and branch and branch on count
10350 instructions. */
10351 rtx pattern = single_set (insn);
10352
10353 if (!pattern
10354 || SET_DEST (pattern) != pc_rtx
10355 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
10356 return false;
10357
10358 cond = XEXP (SET_SRC (pattern), 0);
10359 op0 = &XEXP (cond, 0);
10360 op1 = &XEXP (cond, 1);
10361 }
10362 else if (GET_CODE (PATTERN (insn)) == SET)
10363 {
10364 rtx src, dest;
10365
10366 /* Handle normal compare instructions. */
10367 src = SET_SRC (PATTERN (insn));
10368 dest = SET_DEST (PATTERN (insn));
10369
10370 if (!REG_P (dest)
10371 || !CC_REGNO_P (REGNO (dest))
10372 || GET_CODE (src) != COMPARE)
10373 return false;
10374
10375 /* s390_swap_cmp will try to find the conditional
10376 jump when passing NULL_RTX as condition. */
10377 cond = NULL_RTX;
10378 op0 = &XEXP (src, 0);
10379 op1 = &XEXP (src, 1);
10380 }
10381 else
10382 return false;
10383
10384 if (!REG_P (*op0) || !REG_P (*op1))
10385 return false;
10386
10387 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
10388 return false;
10389
10390 /* Swap the COMPARE arguments and its mask if there is a
10391 conflicting access in the previous insn. */
10392 prev_insn = prev_active_insn (insn);
10393 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10394 && reg_referenced_p (*op1, PATTERN (prev_insn)))
10395 s390_swap_cmp (cond, op0, op1, insn);
10396
10397 /* Check if there is a conflict with the next insn. If there
10398 was no conflict with the previous insn, then swap the
10399 COMPARE arguments and its mask. If we already swapped
10400 the operands, or if swapping them would cause a conflict
10401 with the previous insn, issue a NOP after the COMPARE in
10402 order to separate the two instuctions. */
10403 next_insn = next_active_insn (insn);
10404 if (next_insn != NULL_RTX && INSN_P (next_insn)
10405 && s390_non_addr_reg_read_p (*op1, next_insn))
10406 {
10407 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10408 && s390_non_addr_reg_read_p (*op0, prev_insn))
10409 {
10410 if (REGNO (*op1) == 0)
10411 emit_insn_after (gen_nop1 (), insn);
10412 else
10413 emit_insn_after (gen_nop (), insn);
10414 insn_added_p = true;
10415 }
10416 else
10417 s390_swap_cmp (cond, op0, op1, insn);
10418 }
10419 return insn_added_p;
10420 }
10421
10422 /* Perform machine-dependent processing. */
10423
10424 static void
10425 s390_reorg (void)
10426 {
10427 bool pool_overflow = false;
10428
10429 /* Make sure all splits have been performed; splits after
10430 machine_dependent_reorg might confuse insn length counts. */
10431 split_all_insns_noflow ();
10432
10433 /* Install the main literal pool and the associated base
10434 register load insns.
10435
10436 In addition, there are two problematic situations we need
10437 to correct:
10438
10439 - the literal pool might be > 4096 bytes in size, so that
10440 some of its elements cannot be directly accessed
10441
10442 - a branch target might be > 64K away from the branch, so that
10443 it is not possible to use a PC-relative instruction.
10444
10445 To fix those, we split the single literal pool into multiple
10446 pool chunks, reloading the pool base register at various
10447 points throughout the function to ensure it always points to
10448 the pool chunk the following code expects, and / or replace
10449 PC-relative branches by absolute branches.
10450
10451 However, the two problems are interdependent: splitting the
10452 literal pool can move a branch further away from its target,
10453 causing the 64K limit to overflow, and on the other hand,
10454 replacing a PC-relative branch by an absolute branch means
10455 we need to put the branch target address into the literal
10456 pool, possibly causing it to overflow.
10457
10458 So, we loop trying to fix up both problems until we manage
10459 to satisfy both conditions at the same time. Note that the
10460 loop is guaranteed to terminate as every pass of the loop
10461 strictly decreases the total number of PC-relative branches
10462 in the function. (This is not completely true as there
10463 might be branch-over-pool insns introduced by chunkify_start.
10464 Those never need to be split however.) */
10465
10466 for (;;)
10467 {
10468 struct constant_pool *pool = NULL;
10469
10470 /* Collect the literal pool. */
10471 if (!pool_overflow)
10472 {
10473 pool = s390_mainpool_start ();
10474 if (!pool)
10475 pool_overflow = true;
10476 }
10477
10478 /* If literal pool overflowed, start to chunkify it. */
10479 if (pool_overflow)
10480 pool = s390_chunkify_start ();
10481
10482 /* Split out-of-range branches. If this has created new
10483 literal pool entries, cancel current chunk list and
10484 recompute it. zSeries machines have large branch
10485 instructions, so we never need to split a branch. */
10486 if (!TARGET_CPU_ZARCH && s390_split_branches ())
10487 {
10488 if (pool_overflow)
10489 s390_chunkify_cancel (pool);
10490 else
10491 s390_mainpool_cancel (pool);
10492
10493 continue;
10494 }
10495
10496 /* If we made it up to here, both conditions are satisfied.
10497 Finish up literal pool related changes. */
10498 if (pool_overflow)
10499 s390_chunkify_finish (pool);
10500 else
10501 s390_mainpool_finish (pool);
10502
10503 /* We're done splitting branches. */
10504 cfun->machine->split_branches_pending_p = false;
10505 break;
10506 }
10507
10508 /* Generate out-of-pool execute target insns. */
10509 if (TARGET_CPU_ZARCH)
10510 {
10511 rtx insn, label, target;
10512
10513 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10514 {
10515 label = s390_execute_label (insn);
10516 if (!label)
10517 continue;
10518
10519 gcc_assert (label != const0_rtx);
10520
10521 target = emit_label (XEXP (label, 0));
10522 INSN_ADDRESSES_NEW (target, -1);
10523
10524 target = emit_insn (s390_execute_target (insn));
10525 INSN_ADDRESSES_NEW (target, -1);
10526 }
10527 }
10528
10529 /* Try to optimize prologue and epilogue further. */
10530 s390_optimize_prologue ();
10531
10532 /* Walk over the insns and do some >=z10 specific changes. */
10533 if (s390_tune == PROCESSOR_2097_Z10
10534 || s390_tune == PROCESSOR_2817_Z196
10535 || s390_tune == PROCESSOR_2827_ZEC12)
10536 {
10537 rtx insn;
10538 bool insn_added_p = false;
10539
10540 /* The insn lengths and addresses have to be up to date for the
10541 following manipulations. */
10542 shorten_branches (get_insns ());
10543
10544 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10545 {
10546 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10547 continue;
10548
10549 if (JUMP_P (insn))
10550 insn_added_p |= s390_fix_long_loop_prediction (insn);
10551
10552 if ((GET_CODE (PATTERN (insn)) == PARALLEL
10553 || GET_CODE (PATTERN (insn)) == SET)
10554 && s390_tune == PROCESSOR_2097_Z10)
10555 insn_added_p |= s390_z10_optimize_cmp (insn);
10556 }
10557
10558 /* Adjust branches if we added new instructions. */
10559 if (insn_added_p)
10560 shorten_branches (get_insns ());
10561 }
10562 }
10563
10564 /* Return true if INSN is a fp load insn writing register REGNO. */
10565 static inline bool
10566 s390_fpload_toreg (rtx insn, unsigned int regno)
10567 {
10568 rtx set;
10569 enum attr_type flag = s390_safe_attr_type (insn);
10570
10571 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
10572 return false;
10573
10574 set = single_set (insn);
10575
10576 if (set == NULL_RTX)
10577 return false;
10578
10579 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
10580 return false;
10581
10582 if (REGNO (SET_DEST (set)) != regno)
10583 return false;
10584
10585 return true;
10586 }
10587
10588 /* This value describes the distance to be avoided between an
10589 aritmetic fp instruction and an fp load writing the same register.
10590 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
10591 fine but the exact value has to be avoided. Otherwise the FP
10592 pipeline will throw an exception causing a major penalty. */
10593 #define Z10_EARLYLOAD_DISTANCE 7
10594
10595 /* Rearrange the ready list in order to avoid the situation described
10596 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
10597 moved to the very end of the ready list. */
10598 static void
10599 s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
10600 {
10601 unsigned int regno;
10602 int nready = *nready_p;
10603 rtx tmp;
10604 int i;
10605 rtx insn;
10606 rtx set;
10607 enum attr_type flag;
10608 int distance;
10609
10610 /* Skip DISTANCE - 1 active insns. */
10611 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
10612 distance > 0 && insn != NULL_RTX;
10613 distance--, insn = prev_active_insn (insn))
10614 if (CALL_P (insn) || JUMP_P (insn))
10615 return;
10616
10617 if (insn == NULL_RTX)
10618 return;
10619
10620 set = single_set (insn);
10621
10622 if (set == NULL_RTX || !REG_P (SET_DEST (set))
10623 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
10624 return;
10625
10626 flag = s390_safe_attr_type (insn);
10627
10628 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
10629 return;
10630
10631 regno = REGNO (SET_DEST (set));
10632 i = nready - 1;
10633
10634 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
10635 i--;
10636
10637 if (!i)
10638 return;
10639
10640 tmp = ready[i];
10641 memmove (&ready[1], &ready[0], sizeof (rtx) * i);
10642 ready[0] = tmp;
10643 }
10644
10645
10646 /* The s390_sched_state variable tracks the state of the current or
10647 the last instruction group.
10648
10649 0,1,2 number of instructions scheduled in the current group
10650 3 the last group is complete - normal insns
10651 4 the last group was a cracked/expanded insn */
10652
10653 static int s390_sched_state;
10654
10655 #define S390_OOO_SCHED_STATE_NORMAL 3
10656 #define S390_OOO_SCHED_STATE_CRACKED 4
10657
10658 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
10659 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
10660 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
10661 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
10662
10663 static unsigned int
10664 s390_get_sched_attrmask (rtx insn)
10665 {
10666 unsigned int mask = 0;
10667
10668 if (get_attr_ooo_cracked (insn))
10669 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
10670 if (get_attr_ooo_expanded (insn))
10671 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
10672 if (get_attr_ooo_endgroup (insn))
10673 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
10674 if (get_attr_ooo_groupalone (insn))
10675 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
10676 return mask;
10677 }
10678
10679 /* Return the scheduling score for INSN. The higher the score the
10680 better. The score is calculated from the OOO scheduling attributes
10681 of INSN and the scheduling state s390_sched_state. */
10682 static int
10683 s390_sched_score (rtx insn)
10684 {
10685 unsigned int mask = s390_get_sched_attrmask (insn);
10686 int score = 0;
10687
10688 switch (s390_sched_state)
10689 {
10690 case 0:
10691 /* Try to put insns into the first slot which would otherwise
10692 break a group. */
10693 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10694 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10695 score += 5;
10696 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10697 score += 10;
10698 case 1:
10699 /* Prefer not cracked insns while trying to put together a
10700 group. */
10701 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10702 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10703 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10704 score += 10;
10705 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
10706 score += 5;
10707 break;
10708 case 2:
10709 /* Prefer not cracked insns while trying to put together a
10710 group. */
10711 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10712 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10713 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10714 score += 10;
10715 /* Prefer endgroup insns in the last slot. */
10716 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
10717 score += 10;
10718 break;
10719 case S390_OOO_SCHED_STATE_NORMAL:
10720 /* Prefer not cracked insns if the last was not cracked. */
10721 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10722 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
10723 score += 5;
10724 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10725 score += 10;
10726 break;
10727 case S390_OOO_SCHED_STATE_CRACKED:
10728 /* Try to keep cracked insns together to prevent them from
10729 interrupting groups. */
10730 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10731 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10732 score += 5;
10733 break;
10734 }
10735 return score;
10736 }
10737
10738 /* This function is called via hook TARGET_SCHED_REORDER before
10739 issueing one insn from list READY which contains *NREADYP entries.
10740 For target z10 it reorders load instructions to avoid early load
10741 conflicts in the floating point pipeline */
10742 static int
10743 s390_sched_reorder (FILE *file, int verbose,
10744 rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
10745 {
10746 if (s390_tune == PROCESSOR_2097_Z10)
10747 if (reload_completed && *nreadyp > 1)
10748 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
10749
10750 if (s390_tune == PROCESSOR_2827_ZEC12
10751 && reload_completed
10752 && *nreadyp > 1)
10753 {
10754 int i;
10755 int last_index = *nreadyp - 1;
10756 int max_index = -1;
10757 int max_score = -1;
10758 rtx tmp;
10759
10760 /* Just move the insn with the highest score to the top (the
10761 end) of the list. A full sort is not needed since a conflict
10762 in the hazard recognition cannot happen. So the top insn in
10763 the ready list will always be taken. */
10764 for (i = last_index; i >= 0; i--)
10765 {
10766 int score;
10767
10768 if (recog_memoized (ready[i]) < 0)
10769 continue;
10770
10771 score = s390_sched_score (ready[i]);
10772 if (score > max_score)
10773 {
10774 max_score = score;
10775 max_index = i;
10776 }
10777 }
10778
10779 if (max_index != -1)
10780 {
10781 if (max_index != last_index)
10782 {
10783 tmp = ready[max_index];
10784 ready[max_index] = ready[last_index];
10785 ready[last_index] = tmp;
10786
10787 if (verbose > 5)
10788 fprintf (file,
10789 "move insn %d to the top of list\n",
10790 INSN_UID (ready[last_index]));
10791 }
10792 else if (verbose > 5)
10793 fprintf (file,
10794 "best insn %d already on top\n",
10795 INSN_UID (ready[last_index]));
10796 }
10797
10798 if (verbose > 5)
10799 {
10800 fprintf (file, "ready list ooo attributes - sched state: %d\n",
10801 s390_sched_state);
10802
10803 for (i = last_index; i >= 0; i--)
10804 {
10805 if (recog_memoized (ready[i]) < 0)
10806 continue;
10807 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
10808 s390_sched_score (ready[i]));
10809 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
10810 PRINT_OOO_ATTR (ooo_cracked);
10811 PRINT_OOO_ATTR (ooo_expanded);
10812 PRINT_OOO_ATTR (ooo_endgroup);
10813 PRINT_OOO_ATTR (ooo_groupalone);
10814 #undef PRINT_OOO_ATTR
10815 fprintf (file, "\n");
10816 }
10817 }
10818 }
10819
10820 return s390_issue_rate ();
10821 }
10822
10823
10824 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
10825 the scheduler has issued INSN. It stores the last issued insn into
10826 last_scheduled_insn in order to make it available for
10827 s390_sched_reorder. */
10828 static int
10829 s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
10830 {
10831 last_scheduled_insn = insn;
10832
10833 if (s390_tune == PROCESSOR_2827_ZEC12
10834 && reload_completed
10835 && recog_memoized (insn) >= 0)
10836 {
10837 unsigned int mask = s390_get_sched_attrmask (insn);
10838
10839 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10840 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10841 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
10842 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
10843 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10844 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10845 else
10846 {
10847 /* Only normal insns are left (mask == 0). */
10848 switch (s390_sched_state)
10849 {
10850 case 0:
10851 case 1:
10852 case 2:
10853 case S390_OOO_SCHED_STATE_NORMAL:
10854 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
10855 s390_sched_state = 1;
10856 else
10857 s390_sched_state++;
10858
10859 break;
10860 case S390_OOO_SCHED_STATE_CRACKED:
10861 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10862 break;
10863 }
10864 }
10865 if (verbose > 5)
10866 {
10867 fprintf (file, "insn %d: ", INSN_UID (insn));
10868 #define PRINT_OOO_ATTR(ATTR) \
10869 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
10870 PRINT_OOO_ATTR (ooo_cracked);
10871 PRINT_OOO_ATTR (ooo_expanded);
10872 PRINT_OOO_ATTR (ooo_endgroup);
10873 PRINT_OOO_ATTR (ooo_groupalone);
10874 #undef PRINT_OOO_ATTR
10875 fprintf (file, "\n");
10876 fprintf (file, "sched state: %d\n", s390_sched_state);
10877 }
10878 }
10879
10880 if (GET_CODE (PATTERN (insn)) != USE
10881 && GET_CODE (PATTERN (insn)) != CLOBBER)
10882 return more - 1;
10883 else
10884 return more;
10885 }
10886
10887 static void
10888 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
10889 int verbose ATTRIBUTE_UNUSED,
10890 int max_ready ATTRIBUTE_UNUSED)
10891 {
10892 last_scheduled_insn = NULL_RTX;
10893 s390_sched_state = 0;
10894 }
10895
10896 /* This function checks the whole of insn X for memory references. The
10897 function always returns zero because the framework it is called
10898 from would stop recursively analyzing the insn upon a return value
10899 other than zero. The real result of this function is updating
10900 counter variable MEM_COUNT. */
10901 static int
10902 check_dpu (rtx *x, unsigned *mem_count)
10903 {
10904 if (*x != NULL_RTX && MEM_P (*x))
10905 (*mem_count)++;
10906 return 0;
10907 }
10908
10909 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
10910 a new number struct loop *loop should be unrolled if tuned for cpus with
10911 a built-in stride prefetcher.
10912 The loop is analyzed for memory accesses by calling check_dpu for
10913 each rtx of the loop. Depending on the loop_depth and the amount of
10914 memory accesses a new number <=nunroll is returned to improve the
10915 behaviour of the hardware prefetch unit. */
10916 static unsigned
10917 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
10918 {
10919 basic_block *bbs;
10920 rtx insn;
10921 unsigned i;
10922 unsigned mem_count = 0;
10923
10924 if (s390_tune != PROCESSOR_2097_Z10
10925 && s390_tune != PROCESSOR_2817_Z196
10926 && s390_tune != PROCESSOR_2827_ZEC12)
10927 return nunroll;
10928
10929 /* Count the number of memory references within the loop body. */
10930 bbs = get_loop_body (loop);
10931 for (i = 0; i < loop->num_nodes; i++)
10932 {
10933 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
10934 if (INSN_P (insn) && INSN_CODE (insn) != -1)
10935 for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
10936 }
10937 free (bbs);
10938
10939 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
10940 if (mem_count == 0)
10941 return nunroll;
10942
10943 switch (loop_depth(loop))
10944 {
10945 case 1:
10946 return MIN (nunroll, 28 / mem_count);
10947 case 2:
10948 return MIN (nunroll, 22 / mem_count);
10949 default:
10950 return MIN (nunroll, 16 / mem_count);
10951 }
10952 }
10953
10954 /* Initialize GCC target structure. */
10955
10956 #undef TARGET_ASM_ALIGNED_HI_OP
10957 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10958 #undef TARGET_ASM_ALIGNED_DI_OP
10959 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10960 #undef TARGET_ASM_INTEGER
10961 #define TARGET_ASM_INTEGER s390_assemble_integer
10962
10963 #undef TARGET_ASM_OPEN_PAREN
10964 #define TARGET_ASM_OPEN_PAREN ""
10965
10966 #undef TARGET_ASM_CLOSE_PAREN
10967 #define TARGET_ASM_CLOSE_PAREN ""
10968
10969 #undef TARGET_OPTION_OVERRIDE
10970 #define TARGET_OPTION_OVERRIDE s390_option_override
10971
10972 #undef TARGET_ENCODE_SECTION_INFO
10973 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
10974
10975 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10976 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
10977
10978 #ifdef HAVE_AS_TLS
10979 #undef TARGET_HAVE_TLS
10980 #define TARGET_HAVE_TLS true
10981 #endif
10982 #undef TARGET_CANNOT_FORCE_CONST_MEM
10983 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
10984
10985 #undef TARGET_DELEGITIMIZE_ADDRESS
10986 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
10987
10988 #undef TARGET_LEGITIMIZE_ADDRESS
10989 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
10990
10991 #undef TARGET_RETURN_IN_MEMORY
10992 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
10993
10994 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
10995 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
10996
10997 #undef TARGET_ASM_OUTPUT_MI_THUNK
10998 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
10999 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11000 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11001
11002 #undef TARGET_SCHED_ADJUST_PRIORITY
11003 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
11004 #undef TARGET_SCHED_ISSUE_RATE
11005 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
11006 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11007 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
11008
11009 #undef TARGET_SCHED_VARIABLE_ISSUE
11010 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
11011 #undef TARGET_SCHED_REORDER
11012 #define TARGET_SCHED_REORDER s390_sched_reorder
11013 #undef TARGET_SCHED_INIT
11014 #define TARGET_SCHED_INIT s390_sched_init
11015
11016 #undef TARGET_CANNOT_COPY_INSN_P
11017 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
11018 #undef TARGET_RTX_COSTS
11019 #define TARGET_RTX_COSTS s390_rtx_costs
11020 #undef TARGET_ADDRESS_COST
11021 #define TARGET_ADDRESS_COST s390_address_cost
11022 #undef TARGET_REGISTER_MOVE_COST
11023 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
11024 #undef TARGET_MEMORY_MOVE_COST
11025 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
11026
11027 #undef TARGET_MACHINE_DEPENDENT_REORG
11028 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
11029
11030 #undef TARGET_VALID_POINTER_MODE
11031 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
11032
11033 #undef TARGET_BUILD_BUILTIN_VA_LIST
11034 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
11035 #undef TARGET_EXPAND_BUILTIN_VA_START
11036 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
11037 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11038 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
11039
11040 #undef TARGET_PROMOTE_FUNCTION_MODE
11041 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
11042 #undef TARGET_PASS_BY_REFERENCE
11043 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
11044
11045 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11046 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
11047 #undef TARGET_FUNCTION_ARG
11048 #define TARGET_FUNCTION_ARG s390_function_arg
11049 #undef TARGET_FUNCTION_ARG_ADVANCE
11050 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
11051 #undef TARGET_FUNCTION_VALUE
11052 #define TARGET_FUNCTION_VALUE s390_function_value
11053 #undef TARGET_LIBCALL_VALUE
11054 #define TARGET_LIBCALL_VALUE s390_libcall_value
11055
11056 #undef TARGET_FIXED_CONDITION_CODE_REGS
11057 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
11058
11059 #undef TARGET_CC_MODES_COMPATIBLE
11060 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
11061
11062 #undef TARGET_INVALID_WITHIN_DOLOOP
11063 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
11064
11065 #ifdef HAVE_AS_TLS
11066 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
11067 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
11068 #endif
11069
11070 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11071 #undef TARGET_MANGLE_TYPE
11072 #define TARGET_MANGLE_TYPE s390_mangle_type
11073 #endif
11074
11075 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11076 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
11077
11078 #undef TARGET_PREFERRED_RELOAD_CLASS
11079 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
11080
11081 #undef TARGET_SECONDARY_RELOAD
11082 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
11083
11084 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11085 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
11086
11087 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
11088 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
11089
11090 #undef TARGET_LEGITIMATE_ADDRESS_P
11091 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
11092
11093 #undef TARGET_LEGITIMATE_CONSTANT_P
11094 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
11095
11096 #undef TARGET_CAN_ELIMINATE
11097 #define TARGET_CAN_ELIMINATE s390_can_eliminate
11098
11099 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11100 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
11101
11102 #undef TARGET_LOOP_UNROLL_ADJUST
11103 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
11104
11105 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11106 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
11107 #undef TARGET_TRAMPOLINE_INIT
11108 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
11109
11110 #undef TARGET_UNWIND_WORD_MODE
11111 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
11112
11113 #undef TARGET_CANONICALIZE_COMPARISON
11114 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
11115
11116 struct gcc_target targetm = TARGET_INITIALIZER;
11117
11118 #include "gt-s390.h"