s390: Implement extzv for z10
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
3 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
5 Ulrich Weigand (uweigand@de.ibm.com) and
6 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "tm_p.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "insn-config.h"
34 #include "conditions.h"
35 #include "output.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "reload.h"
43 #include "diagnostic-core.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "debug.h"
49 #include "langhooks.h"
50 #include "optabs.h"
51 #include "gimple.h"
52 #include "df.h"
53 #include "params.h"
54 #include "cfgloop.h"
55 #include "opts.h"
56
57 /* Define the specific costs for a given cpu. */
58
59 struct processor_costs
60 {
61 /* multiplication */
62 const int m; /* cost of an M instruction. */
63 const int mghi; /* cost of an MGHI instruction. */
64 const int mh; /* cost of an MH instruction. */
65 const int mhi; /* cost of an MHI instruction. */
66 const int ml; /* cost of an ML instruction. */
67 const int mr; /* cost of an MR instruction. */
68 const int ms; /* cost of an MS instruction. */
69 const int msg; /* cost of an MSG instruction. */
70 const int msgf; /* cost of an MSGF instruction. */
71 const int msgfr; /* cost of an MSGFR instruction. */
72 const int msgr; /* cost of an MSGR instruction. */
73 const int msr; /* cost of an MSR instruction. */
74 const int mult_df; /* cost of multiplication in DFmode. */
75 const int mxbr;
76 /* square root */
77 const int sqxbr; /* cost of square root in TFmode. */
78 const int sqdbr; /* cost of square root in DFmode. */
79 const int sqebr; /* cost of square root in SFmode. */
80 /* multiply and add */
81 const int madbr; /* cost of multiply and add in DFmode. */
82 const int maebr; /* cost of multiply and add in SFmode. */
83 /* division */
84 const int dxbr;
85 const int ddbr;
86 const int debr;
87 const int dlgr;
88 const int dlr;
89 const int dr;
90 const int dsgfr;
91 const int dsgr;
92 };
93
94 const struct processor_costs *s390_cost;
95
96 static const
97 struct processor_costs z900_cost =
98 {
99 COSTS_N_INSNS (5), /* M */
100 COSTS_N_INSNS (10), /* MGHI */
101 COSTS_N_INSNS (5), /* MH */
102 COSTS_N_INSNS (4), /* MHI */
103 COSTS_N_INSNS (5), /* ML */
104 COSTS_N_INSNS (5), /* MR */
105 COSTS_N_INSNS (4), /* MS */
106 COSTS_N_INSNS (15), /* MSG */
107 COSTS_N_INSNS (7), /* MSGF */
108 COSTS_N_INSNS (7), /* MSGFR */
109 COSTS_N_INSNS (10), /* MSGR */
110 COSTS_N_INSNS (4), /* MSR */
111 COSTS_N_INSNS (7), /* multiplication in DFmode */
112 COSTS_N_INSNS (13), /* MXBR */
113 COSTS_N_INSNS (136), /* SQXBR */
114 COSTS_N_INSNS (44), /* SQDBR */
115 COSTS_N_INSNS (35), /* SQEBR */
116 COSTS_N_INSNS (18), /* MADBR */
117 COSTS_N_INSNS (13), /* MAEBR */
118 COSTS_N_INSNS (134), /* DXBR */
119 COSTS_N_INSNS (30), /* DDBR */
120 COSTS_N_INSNS (27), /* DEBR */
121 COSTS_N_INSNS (220), /* DLGR */
122 COSTS_N_INSNS (34), /* DLR */
123 COSTS_N_INSNS (34), /* DR */
124 COSTS_N_INSNS (32), /* DSGFR */
125 COSTS_N_INSNS (32), /* DSGR */
126 };
127
128 static const
129 struct processor_costs z990_cost =
130 {
131 COSTS_N_INSNS (4), /* M */
132 COSTS_N_INSNS (2), /* MGHI */
133 COSTS_N_INSNS (2), /* MH */
134 COSTS_N_INSNS (2), /* MHI */
135 COSTS_N_INSNS (4), /* ML */
136 COSTS_N_INSNS (4), /* MR */
137 COSTS_N_INSNS (5), /* MS */
138 COSTS_N_INSNS (6), /* MSG */
139 COSTS_N_INSNS (4), /* MSGF */
140 COSTS_N_INSNS (4), /* MSGFR */
141 COSTS_N_INSNS (4), /* MSGR */
142 COSTS_N_INSNS (4), /* MSR */
143 COSTS_N_INSNS (1), /* multiplication in DFmode */
144 COSTS_N_INSNS (28), /* MXBR */
145 COSTS_N_INSNS (130), /* SQXBR */
146 COSTS_N_INSNS (66), /* SQDBR */
147 COSTS_N_INSNS (38), /* SQEBR */
148 COSTS_N_INSNS (1), /* MADBR */
149 COSTS_N_INSNS (1), /* MAEBR */
150 COSTS_N_INSNS (60), /* DXBR */
151 COSTS_N_INSNS (40), /* DDBR */
152 COSTS_N_INSNS (26), /* DEBR */
153 COSTS_N_INSNS (176), /* DLGR */
154 COSTS_N_INSNS (31), /* DLR */
155 COSTS_N_INSNS (31), /* DR */
156 COSTS_N_INSNS (31), /* DSGFR */
157 COSTS_N_INSNS (31), /* DSGR */
158 };
159
160 static const
161 struct processor_costs z9_109_cost =
162 {
163 COSTS_N_INSNS (4), /* M */
164 COSTS_N_INSNS (2), /* MGHI */
165 COSTS_N_INSNS (2), /* MH */
166 COSTS_N_INSNS (2), /* MHI */
167 COSTS_N_INSNS (4), /* ML */
168 COSTS_N_INSNS (4), /* MR */
169 COSTS_N_INSNS (5), /* MS */
170 COSTS_N_INSNS (6), /* MSG */
171 COSTS_N_INSNS (4), /* MSGF */
172 COSTS_N_INSNS (4), /* MSGFR */
173 COSTS_N_INSNS (4), /* MSGR */
174 COSTS_N_INSNS (4), /* MSR */
175 COSTS_N_INSNS (1), /* multiplication in DFmode */
176 COSTS_N_INSNS (28), /* MXBR */
177 COSTS_N_INSNS (130), /* SQXBR */
178 COSTS_N_INSNS (66), /* SQDBR */
179 COSTS_N_INSNS (38), /* SQEBR */
180 COSTS_N_INSNS (1), /* MADBR */
181 COSTS_N_INSNS (1), /* MAEBR */
182 COSTS_N_INSNS (60), /* DXBR */
183 COSTS_N_INSNS (40), /* DDBR */
184 COSTS_N_INSNS (26), /* DEBR */
185 COSTS_N_INSNS (30), /* DLGR */
186 COSTS_N_INSNS (23), /* DLR */
187 COSTS_N_INSNS (23), /* DR */
188 COSTS_N_INSNS (24), /* DSGFR */
189 COSTS_N_INSNS (24), /* DSGR */
190 };
191
192 static const
193 struct processor_costs z10_cost =
194 {
195 COSTS_N_INSNS (10), /* M */
196 COSTS_N_INSNS (10), /* MGHI */
197 COSTS_N_INSNS (10), /* MH */
198 COSTS_N_INSNS (10), /* MHI */
199 COSTS_N_INSNS (10), /* ML */
200 COSTS_N_INSNS (10), /* MR */
201 COSTS_N_INSNS (10), /* MS */
202 COSTS_N_INSNS (10), /* MSG */
203 COSTS_N_INSNS (10), /* MSGF */
204 COSTS_N_INSNS (10), /* MSGFR */
205 COSTS_N_INSNS (10), /* MSGR */
206 COSTS_N_INSNS (10), /* MSR */
207 COSTS_N_INSNS (1) , /* multiplication in DFmode */
208 COSTS_N_INSNS (50), /* MXBR */
209 COSTS_N_INSNS (120), /* SQXBR */
210 COSTS_N_INSNS (52), /* SQDBR */
211 COSTS_N_INSNS (38), /* SQEBR */
212 COSTS_N_INSNS (1), /* MADBR */
213 COSTS_N_INSNS (1), /* MAEBR */
214 COSTS_N_INSNS (111), /* DXBR */
215 COSTS_N_INSNS (39), /* DDBR */
216 COSTS_N_INSNS (32), /* DEBR */
217 COSTS_N_INSNS (160), /* DLGR */
218 COSTS_N_INSNS (71), /* DLR */
219 COSTS_N_INSNS (71), /* DR */
220 COSTS_N_INSNS (71), /* DSGFR */
221 COSTS_N_INSNS (71), /* DSGR */
222 };
223
224 static const
225 struct processor_costs z196_cost =
226 {
227 COSTS_N_INSNS (7), /* M */
228 COSTS_N_INSNS (5), /* MGHI */
229 COSTS_N_INSNS (5), /* MH */
230 COSTS_N_INSNS (5), /* MHI */
231 COSTS_N_INSNS (7), /* ML */
232 COSTS_N_INSNS (7), /* MR */
233 COSTS_N_INSNS (6), /* MS */
234 COSTS_N_INSNS (8), /* MSG */
235 COSTS_N_INSNS (6), /* MSGF */
236 COSTS_N_INSNS (6), /* MSGFR */
237 COSTS_N_INSNS (8), /* MSGR */
238 COSTS_N_INSNS (6), /* MSR */
239 COSTS_N_INSNS (1) , /* multiplication in DFmode */
240 COSTS_N_INSNS (40), /* MXBR B+40 */
241 COSTS_N_INSNS (100), /* SQXBR B+100 */
242 COSTS_N_INSNS (42), /* SQDBR B+42 */
243 COSTS_N_INSNS (28), /* SQEBR B+28 */
244 COSTS_N_INSNS (1), /* MADBR B */
245 COSTS_N_INSNS (1), /* MAEBR B */
246 COSTS_N_INSNS (101), /* DXBR B+101 */
247 COSTS_N_INSNS (29), /* DDBR */
248 COSTS_N_INSNS (22), /* DEBR */
249 COSTS_N_INSNS (160), /* DLGR cracked */
250 COSTS_N_INSNS (160), /* DLR cracked */
251 COSTS_N_INSNS (160), /* DR expanded */
252 COSTS_N_INSNS (160), /* DSGFR cracked */
253 COSTS_N_INSNS (160), /* DSGR cracked */
254 };
255
256 static const
257 struct processor_costs zEC12_cost =
258 {
259 COSTS_N_INSNS (7), /* M */
260 COSTS_N_INSNS (5), /* MGHI */
261 COSTS_N_INSNS (5), /* MH */
262 COSTS_N_INSNS (5), /* MHI */
263 COSTS_N_INSNS (7), /* ML */
264 COSTS_N_INSNS (7), /* MR */
265 COSTS_N_INSNS (6), /* MS */
266 COSTS_N_INSNS (8), /* MSG */
267 COSTS_N_INSNS (6), /* MSGF */
268 COSTS_N_INSNS (6), /* MSGFR */
269 COSTS_N_INSNS (8), /* MSGR */
270 COSTS_N_INSNS (6), /* MSR */
271 COSTS_N_INSNS (1) , /* multiplication in DFmode */
272 COSTS_N_INSNS (40), /* MXBR B+40 */
273 COSTS_N_INSNS (100), /* SQXBR B+100 */
274 COSTS_N_INSNS (42), /* SQDBR B+42 */
275 COSTS_N_INSNS (28), /* SQEBR B+28 */
276 COSTS_N_INSNS (1), /* MADBR B */
277 COSTS_N_INSNS (1), /* MAEBR B */
278 COSTS_N_INSNS (131), /* DXBR B+131 */
279 COSTS_N_INSNS (29), /* DDBR */
280 COSTS_N_INSNS (22), /* DEBR */
281 COSTS_N_INSNS (160), /* DLGR cracked */
282 COSTS_N_INSNS (160), /* DLR cracked */
283 COSTS_N_INSNS (160), /* DR expanded */
284 COSTS_N_INSNS (160), /* DSGFR cracked */
285 COSTS_N_INSNS (160), /* DSGR cracked */
286 };
287
288 extern int reload_completed;
289
290 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
291 static rtx last_scheduled_insn;
292
293 /* Structure used to hold the components of a S/390 memory
294 address. A legitimate address on S/390 is of the general
295 form
296 base + index + displacement
297 where any of the components is optional.
298
299 base and index are registers of the class ADDR_REGS,
300 displacement is an unsigned 12-bit immediate constant. */
301
302 struct s390_address
303 {
304 rtx base;
305 rtx indx;
306 rtx disp;
307 bool pointer;
308 bool literal_pool;
309 };
310
311 /* The following structure is embedded in the machine
312 specific part of struct function. */
313
314 struct GTY (()) s390_frame_layout
315 {
316 /* Offset within stack frame. */
317 HOST_WIDE_INT gprs_offset;
318 HOST_WIDE_INT f0_offset;
319 HOST_WIDE_INT f4_offset;
320 HOST_WIDE_INT f8_offset;
321 HOST_WIDE_INT backchain_offset;
322
323 /* Number of first and last gpr where slots in the register
324 save area are reserved for. */
325 int first_save_gpr_slot;
326 int last_save_gpr_slot;
327
328 /* Number of first and last gpr to be saved, restored. */
329 int first_save_gpr;
330 int first_restore_gpr;
331 int last_save_gpr;
332 int last_restore_gpr;
333
334 /* Bits standing for floating point registers. Set, if the
335 respective register has to be saved. Starting with reg 16 (f0)
336 at the rightmost bit.
337 Bit 15 - 8 7 6 5 4 3 2 1 0
338 fpr 15 - 8 7 5 3 1 6 4 2 0
339 reg 31 - 24 23 22 21 20 19 18 17 16 */
340 unsigned int fpr_bitmap;
341
342 /* Number of floating point registers f8-f15 which must be saved. */
343 int high_fprs;
344
345 /* Set if return address needs to be saved.
346 This flag is set by s390_return_addr_rtx if it could not use
347 the initial value of r14 and therefore depends on r14 saved
348 to the stack. */
349 bool save_return_addr_p;
350
351 /* Size of stack frame. */
352 HOST_WIDE_INT frame_size;
353 };
354
355 /* Define the structure for the machine field in struct function. */
356
357 struct GTY(()) machine_function
358 {
359 struct s390_frame_layout frame_layout;
360
361 /* Literal pool base register. */
362 rtx base_reg;
363
364 /* True if we may need to perform branch splitting. */
365 bool split_branches_pending_p;
366
367 /* Some local-dynamic TLS symbol name. */
368 const char *some_ld_name;
369
370 bool has_landing_pad_p;
371 };
372
373 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
374
375 #define cfun_frame_layout (cfun->machine->frame_layout)
376 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
377 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
378 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
379 #define cfun_set_fpr_bit(BITNUM) (cfun->machine->frame_layout.fpr_bitmap |= \
380 (1 << (BITNUM)))
381 #define cfun_fpr_bit_p(BITNUM) (!!(cfun->machine->frame_layout.fpr_bitmap & \
382 (1 << (BITNUM))))
383
384 /* Number of GPRs and FPRs used for argument passing. */
385 #define GP_ARG_NUM_REG 5
386 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
387
388 /* A couple of shortcuts. */
389 #define CONST_OK_FOR_J(x) \
390 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
391 #define CONST_OK_FOR_K(x) \
392 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
393 #define CONST_OK_FOR_Os(x) \
394 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
395 #define CONST_OK_FOR_Op(x) \
396 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
397 #define CONST_OK_FOR_On(x) \
398 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
399
400 #define REGNO_PAIR_OK(REGNO, MODE) \
401 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
402
403 /* That's the read ahead of the dynamic branch prediction unit in
404 bytes on a z10 (or higher) CPU. */
405 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
406
407 /* Return the alignment for LABEL. We default to the -falign-labels
408 value except for the literal pool base label. */
409 int
410 s390_label_align (rtx label)
411 {
412 rtx prev_insn = prev_active_insn (label);
413
414 if (prev_insn == NULL_RTX)
415 goto old;
416
417 prev_insn = single_set (prev_insn);
418
419 if (prev_insn == NULL_RTX)
420 goto old;
421
422 prev_insn = SET_SRC (prev_insn);
423
424 /* Don't align literal pool base labels. */
425 if (GET_CODE (prev_insn) == UNSPEC
426 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
427 return 0;
428
429 old:
430 return align_labels_log;
431 }
432
433 static enum machine_mode
434 s390_libgcc_cmp_return_mode (void)
435 {
436 return TARGET_64BIT ? DImode : SImode;
437 }
438
439 static enum machine_mode
440 s390_libgcc_shift_count_mode (void)
441 {
442 return TARGET_64BIT ? DImode : SImode;
443 }
444
445 static enum machine_mode
446 s390_unwind_word_mode (void)
447 {
448 return TARGET_64BIT ? DImode : SImode;
449 }
450
451 /* Return true if the back end supports mode MODE. */
452 static bool
453 s390_scalar_mode_supported_p (enum machine_mode mode)
454 {
455 /* In contrast to the default implementation reject TImode constants on 31bit
456 TARGET_ZARCH for ABI compliance. */
457 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
458 return false;
459
460 if (DECIMAL_FLOAT_MODE_P (mode))
461 return default_decimal_float_supported_p ();
462
463 return default_scalar_mode_supported_p (mode);
464 }
465
466 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
467
468 void
469 s390_set_has_landing_pad_p (bool value)
470 {
471 cfun->machine->has_landing_pad_p = value;
472 }
473
474 /* If two condition code modes are compatible, return a condition code
475 mode which is compatible with both. Otherwise, return
476 VOIDmode. */
477
478 static enum machine_mode
479 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
480 {
481 if (m1 == m2)
482 return m1;
483
484 switch (m1)
485 {
486 case CCZmode:
487 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
488 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
489 return m2;
490 return VOIDmode;
491
492 case CCSmode:
493 case CCUmode:
494 case CCTmode:
495 case CCSRmode:
496 case CCURmode:
497 case CCZ1mode:
498 if (m2 == CCZmode)
499 return m1;
500
501 return VOIDmode;
502
503 default:
504 return VOIDmode;
505 }
506 return VOIDmode;
507 }
508
509 /* Return true if SET either doesn't set the CC register, or else
510 the source and destination have matching CC modes and that
511 CC mode is at least as constrained as REQ_MODE. */
512
513 static bool
514 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
515 {
516 enum machine_mode set_mode;
517
518 gcc_assert (GET_CODE (set) == SET);
519
520 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
521 return 1;
522
523 set_mode = GET_MODE (SET_DEST (set));
524 switch (set_mode)
525 {
526 case CCSmode:
527 case CCSRmode:
528 case CCUmode:
529 case CCURmode:
530 case CCLmode:
531 case CCL1mode:
532 case CCL2mode:
533 case CCL3mode:
534 case CCT1mode:
535 case CCT2mode:
536 case CCT3mode:
537 if (req_mode != set_mode)
538 return 0;
539 break;
540
541 case CCZmode:
542 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
543 && req_mode != CCSRmode && req_mode != CCURmode)
544 return 0;
545 break;
546
547 case CCAPmode:
548 case CCANmode:
549 if (req_mode != CCAmode)
550 return 0;
551 break;
552
553 default:
554 gcc_unreachable ();
555 }
556
557 return (GET_MODE (SET_SRC (set)) == set_mode);
558 }
559
560 /* Return true if every SET in INSN that sets the CC register
561 has source and destination with matching CC modes and that
562 CC mode is at least as constrained as REQ_MODE.
563 If REQ_MODE is VOIDmode, always return false. */
564
565 bool
566 s390_match_ccmode (rtx insn, enum machine_mode req_mode)
567 {
568 int i;
569
570 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
571 if (req_mode == VOIDmode)
572 return false;
573
574 if (GET_CODE (PATTERN (insn)) == SET)
575 return s390_match_ccmode_set (PATTERN (insn), req_mode);
576
577 if (GET_CODE (PATTERN (insn)) == PARALLEL)
578 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
579 {
580 rtx set = XVECEXP (PATTERN (insn), 0, i);
581 if (GET_CODE (set) == SET)
582 if (!s390_match_ccmode_set (set, req_mode))
583 return false;
584 }
585
586 return true;
587 }
588
589 /* If a test-under-mask instruction can be used to implement
590 (compare (and ... OP1) OP2), return the CC mode required
591 to do that. Otherwise, return VOIDmode.
592 MIXED is true if the instruction can distinguish between
593 CC1 and CC2 for mixed selected bits (TMxx), it is false
594 if the instruction cannot (TM). */
595
596 enum machine_mode
597 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
598 {
599 int bit0, bit1;
600
601 /* ??? Fixme: should work on CONST_DOUBLE as well. */
602 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
603 return VOIDmode;
604
605 /* Selected bits all zero: CC0.
606 e.g.: int a; if ((a & (16 + 128)) == 0) */
607 if (INTVAL (op2) == 0)
608 return CCTmode;
609
610 /* Selected bits all one: CC3.
611 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
612 if (INTVAL (op2) == INTVAL (op1))
613 return CCT3mode;
614
615 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
616 int a;
617 if ((a & (16 + 128)) == 16) -> CCT1
618 if ((a & (16 + 128)) == 128) -> CCT2 */
619 if (mixed)
620 {
621 bit1 = exact_log2 (INTVAL (op2));
622 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
623 if (bit0 != -1 && bit1 != -1)
624 return bit0 > bit1 ? CCT1mode : CCT2mode;
625 }
626
627 return VOIDmode;
628 }
629
630 /* Given a comparison code OP (EQ, NE, etc.) and the operands
631 OP0 and OP1 of a COMPARE, return the mode to be used for the
632 comparison. */
633
634 enum machine_mode
635 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
636 {
637 switch (code)
638 {
639 case EQ:
640 case NE:
641 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
642 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
643 return CCAPmode;
644 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
645 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
646 return CCAPmode;
647 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
648 || GET_CODE (op1) == NEG)
649 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
650 return CCLmode;
651
652 if (GET_CODE (op0) == AND)
653 {
654 /* Check whether we can potentially do it via TM. */
655 enum machine_mode ccmode;
656 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
657 if (ccmode != VOIDmode)
658 {
659 /* Relax CCTmode to CCZmode to allow fall-back to AND
660 if that turns out to be beneficial. */
661 return ccmode == CCTmode ? CCZmode : ccmode;
662 }
663 }
664
665 if (register_operand (op0, HImode)
666 && GET_CODE (op1) == CONST_INT
667 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
668 return CCT3mode;
669 if (register_operand (op0, QImode)
670 && GET_CODE (op1) == CONST_INT
671 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
672 return CCT3mode;
673
674 return CCZmode;
675
676 case LE:
677 case LT:
678 case GE:
679 case GT:
680 /* The only overflow condition of NEG and ABS happens when
681 -INT_MAX is used as parameter, which stays negative. So
682 we have an overflow from a positive value to a negative.
683 Using CCAP mode the resulting cc can be used for comparisons. */
684 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
685 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
686 return CCAPmode;
687
688 /* If constants are involved in an add instruction it is possible to use
689 the resulting cc for comparisons with zero. Knowing the sign of the
690 constant the overflow behavior gets predictable. e.g.:
691 int a, b; if ((b = a + c) > 0)
692 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
693 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
694 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
695 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
696 /* Avoid INT32_MIN on 32 bit. */
697 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
698 {
699 if (INTVAL (XEXP((op0), 1)) < 0)
700 return CCANmode;
701 else
702 return CCAPmode;
703 }
704 /* Fall through. */
705 case UNORDERED:
706 case ORDERED:
707 case UNEQ:
708 case UNLE:
709 case UNLT:
710 case UNGE:
711 case UNGT:
712 case LTGT:
713 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
714 && GET_CODE (op1) != CONST_INT)
715 return CCSRmode;
716 return CCSmode;
717
718 case LTU:
719 case GEU:
720 if (GET_CODE (op0) == PLUS
721 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
722 return CCL1mode;
723
724 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
725 && GET_CODE (op1) != CONST_INT)
726 return CCURmode;
727 return CCUmode;
728
729 case LEU:
730 case GTU:
731 if (GET_CODE (op0) == MINUS
732 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
733 return CCL2mode;
734
735 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
736 && GET_CODE (op1) != CONST_INT)
737 return CCURmode;
738 return CCUmode;
739
740 default:
741 gcc_unreachable ();
742 }
743 }
744
745 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
746 that we can implement more efficiently. */
747
748 static void
749 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
750 bool op0_preserve_value)
751 {
752 if (op0_preserve_value)
753 return;
754
755 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
756 if ((*code == EQ || *code == NE)
757 && *op1 == const0_rtx
758 && GET_CODE (*op0) == ZERO_EXTRACT
759 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
760 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
761 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
762 {
763 rtx inner = XEXP (*op0, 0);
764 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
765 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
766 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
767
768 if (len > 0 && len < modesize
769 && pos >= 0 && pos + len <= modesize
770 && modesize <= HOST_BITS_PER_WIDE_INT)
771 {
772 unsigned HOST_WIDE_INT block;
773 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
774 block <<= modesize - pos - len;
775
776 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
777 gen_int_mode (block, GET_MODE (inner)));
778 }
779 }
780
781 /* Narrow AND of memory against immediate to enable TM. */
782 if ((*code == EQ || *code == NE)
783 && *op1 == const0_rtx
784 && GET_CODE (*op0) == AND
785 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
786 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
787 {
788 rtx inner = XEXP (*op0, 0);
789 rtx mask = XEXP (*op0, 1);
790
791 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
792 if (GET_CODE (inner) == SUBREG
793 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
794 && (GET_MODE_SIZE (GET_MODE (inner))
795 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
796 && ((INTVAL (mask)
797 & GET_MODE_MASK (GET_MODE (inner))
798 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
799 == 0))
800 inner = SUBREG_REG (inner);
801
802 /* Do not change volatile MEMs. */
803 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
804 {
805 int part = s390_single_part (XEXP (*op0, 1),
806 GET_MODE (inner), QImode, 0);
807 if (part >= 0)
808 {
809 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
810 inner = adjust_address_nv (inner, QImode, part);
811 *op0 = gen_rtx_AND (QImode, inner, mask);
812 }
813 }
814 }
815
816 /* Narrow comparisons against 0xffff to HImode if possible. */
817 if ((*code == EQ || *code == NE)
818 && GET_CODE (*op1) == CONST_INT
819 && INTVAL (*op1) == 0xffff
820 && SCALAR_INT_MODE_P (GET_MODE (*op0))
821 && (nonzero_bits (*op0, GET_MODE (*op0))
822 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
823 {
824 *op0 = gen_lowpart (HImode, *op0);
825 *op1 = constm1_rtx;
826 }
827
828 /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */
829 if (GET_CODE (*op0) == UNSPEC
830 && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
831 && XVECLEN (*op0, 0) == 1
832 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
833 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
834 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
835 && *op1 == const0_rtx)
836 {
837 enum rtx_code new_code = UNKNOWN;
838 switch (*code)
839 {
840 case EQ: new_code = EQ; break;
841 case NE: new_code = NE; break;
842 case LT: new_code = GTU; break;
843 case GT: new_code = LTU; break;
844 case LE: new_code = GEU; break;
845 case GE: new_code = LEU; break;
846 default: break;
847 }
848
849 if (new_code != UNKNOWN)
850 {
851 *op0 = XVECEXP (*op0, 0, 0);
852 *code = new_code;
853 }
854 }
855
856 /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */
857 if (GET_CODE (*op0) == UNSPEC
858 && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
859 && XVECLEN (*op0, 0) == 1
860 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
861 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
862 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
863 && *op1 == const0_rtx)
864 {
865 enum rtx_code new_code = UNKNOWN;
866 switch (*code)
867 {
868 case EQ: new_code = EQ; break;
869 case NE: new_code = NE; break;
870 default: break;
871 }
872
873 if (new_code != UNKNOWN)
874 {
875 *op0 = XVECEXP (*op0, 0, 0);
876 *code = new_code;
877 }
878 }
879
880 /* Simplify cascaded EQ, NE with const0_rtx. */
881 if ((*code == NE || *code == EQ)
882 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
883 && GET_MODE (*op0) == SImode
884 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
885 && REG_P (XEXP (*op0, 0))
886 && XEXP (*op0, 1) == const0_rtx
887 && *op1 == const0_rtx)
888 {
889 if ((*code == EQ && GET_CODE (*op0) == NE)
890 || (*code == NE && GET_CODE (*op0) == EQ))
891 *code = EQ;
892 else
893 *code = NE;
894 *op0 = XEXP (*op0, 0);
895 }
896
897 /* Prefer register over memory as first operand. */
898 if (MEM_P (*op0) && REG_P (*op1))
899 {
900 rtx tem = *op0; *op0 = *op1; *op1 = tem;
901 *code = (int)swap_condition ((enum rtx_code)*code);
902 }
903 }
904
905 /* Emit a compare instruction suitable to implement the comparison
906 OP0 CODE OP1. Return the correct condition RTL to be placed in
907 the IF_THEN_ELSE of the conditional branch testing the result. */
908
909 rtx
910 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
911 {
912 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
913 rtx cc;
914
915 /* Do not output a redundant compare instruction if a compare_and_swap
916 pattern already computed the result and the machine modes are compatible. */
917 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
918 {
919 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
920 == GET_MODE (op0));
921 cc = op0;
922 }
923 else
924 {
925 cc = gen_rtx_REG (mode, CC_REGNUM);
926 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
927 }
928
929 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
930 }
931
932 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
933 matches CMP.
934 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
935 conditional branch testing the result. */
936
937 static rtx
938 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
939 rtx cmp, rtx new_rtx)
940 {
941 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
942 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
943 const0_rtx);
944 }
945
946 /* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
947 unconditional jump, else a conditional jump under condition COND. */
948
949 void
950 s390_emit_jump (rtx target, rtx cond)
951 {
952 rtx insn;
953
954 target = gen_rtx_LABEL_REF (VOIDmode, target);
955 if (cond)
956 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
957
958 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
959 emit_jump_insn (insn);
960 }
961
962 /* Return branch condition mask to implement a branch
963 specified by CODE. Return -1 for invalid comparisons. */
964
965 int
966 s390_branch_condition_mask (rtx code)
967 {
968 const int CC0 = 1 << 3;
969 const int CC1 = 1 << 2;
970 const int CC2 = 1 << 1;
971 const int CC3 = 1 << 0;
972
973 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
974 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
975 gcc_assert (XEXP (code, 1) == const0_rtx);
976
977 switch (GET_MODE (XEXP (code, 0)))
978 {
979 case CCZmode:
980 case CCZ1mode:
981 switch (GET_CODE (code))
982 {
983 case EQ: return CC0;
984 case NE: return CC1 | CC2 | CC3;
985 default: return -1;
986 }
987 break;
988
989 case CCT1mode:
990 switch (GET_CODE (code))
991 {
992 case EQ: return CC1;
993 case NE: return CC0 | CC2 | CC3;
994 default: return -1;
995 }
996 break;
997
998 case CCT2mode:
999 switch (GET_CODE (code))
1000 {
1001 case EQ: return CC2;
1002 case NE: return CC0 | CC1 | CC3;
1003 default: return -1;
1004 }
1005 break;
1006
1007 case CCT3mode:
1008 switch (GET_CODE (code))
1009 {
1010 case EQ: return CC3;
1011 case NE: return CC0 | CC1 | CC2;
1012 default: return -1;
1013 }
1014 break;
1015
1016 case CCLmode:
1017 switch (GET_CODE (code))
1018 {
1019 case EQ: return CC0 | CC2;
1020 case NE: return CC1 | CC3;
1021 default: return -1;
1022 }
1023 break;
1024
1025 case CCL1mode:
1026 switch (GET_CODE (code))
1027 {
1028 case LTU: return CC2 | CC3; /* carry */
1029 case GEU: return CC0 | CC1; /* no carry */
1030 default: return -1;
1031 }
1032 break;
1033
1034 case CCL2mode:
1035 switch (GET_CODE (code))
1036 {
1037 case GTU: return CC0 | CC1; /* borrow */
1038 case LEU: return CC2 | CC3; /* no borrow */
1039 default: return -1;
1040 }
1041 break;
1042
1043 case CCL3mode:
1044 switch (GET_CODE (code))
1045 {
1046 case EQ: return CC0 | CC2;
1047 case NE: return CC1 | CC3;
1048 case LTU: return CC1;
1049 case GTU: return CC3;
1050 case LEU: return CC1 | CC2;
1051 case GEU: return CC2 | CC3;
1052 default: return -1;
1053 }
1054
1055 case CCUmode:
1056 switch (GET_CODE (code))
1057 {
1058 case EQ: return CC0;
1059 case NE: return CC1 | CC2 | CC3;
1060 case LTU: return CC1;
1061 case GTU: return CC2;
1062 case LEU: return CC0 | CC1;
1063 case GEU: return CC0 | CC2;
1064 default: return -1;
1065 }
1066 break;
1067
1068 case CCURmode:
1069 switch (GET_CODE (code))
1070 {
1071 case EQ: return CC0;
1072 case NE: return CC2 | CC1 | CC3;
1073 case LTU: return CC2;
1074 case GTU: return CC1;
1075 case LEU: return CC0 | CC2;
1076 case GEU: return CC0 | CC1;
1077 default: return -1;
1078 }
1079 break;
1080
1081 case CCAPmode:
1082 switch (GET_CODE (code))
1083 {
1084 case EQ: return CC0;
1085 case NE: return CC1 | CC2 | CC3;
1086 case LT: return CC1 | CC3;
1087 case GT: return CC2;
1088 case LE: return CC0 | CC1 | CC3;
1089 case GE: return CC0 | CC2;
1090 default: return -1;
1091 }
1092 break;
1093
1094 case CCANmode:
1095 switch (GET_CODE (code))
1096 {
1097 case EQ: return CC0;
1098 case NE: return CC1 | CC2 | CC3;
1099 case LT: return CC1;
1100 case GT: return CC2 | CC3;
1101 case LE: return CC0 | CC1;
1102 case GE: return CC0 | CC2 | CC3;
1103 default: return -1;
1104 }
1105 break;
1106
1107 case CCSmode:
1108 switch (GET_CODE (code))
1109 {
1110 case EQ: return CC0;
1111 case NE: return CC1 | CC2 | CC3;
1112 case LT: return CC1;
1113 case GT: return CC2;
1114 case LE: return CC0 | CC1;
1115 case GE: return CC0 | CC2;
1116 case UNORDERED: return CC3;
1117 case ORDERED: return CC0 | CC1 | CC2;
1118 case UNEQ: return CC0 | CC3;
1119 case UNLT: return CC1 | CC3;
1120 case UNGT: return CC2 | CC3;
1121 case UNLE: return CC0 | CC1 | CC3;
1122 case UNGE: return CC0 | CC2 | CC3;
1123 case LTGT: return CC1 | CC2;
1124 default: return -1;
1125 }
1126 break;
1127
1128 case CCSRmode:
1129 switch (GET_CODE (code))
1130 {
1131 case EQ: return CC0;
1132 case NE: return CC2 | CC1 | CC3;
1133 case LT: return CC2;
1134 case GT: return CC1;
1135 case LE: return CC0 | CC2;
1136 case GE: return CC0 | CC1;
1137 case UNORDERED: return CC3;
1138 case ORDERED: return CC0 | CC2 | CC1;
1139 case UNEQ: return CC0 | CC3;
1140 case UNLT: return CC2 | CC3;
1141 case UNGT: return CC1 | CC3;
1142 case UNLE: return CC0 | CC2 | CC3;
1143 case UNGE: return CC0 | CC1 | CC3;
1144 case LTGT: return CC2 | CC1;
1145 default: return -1;
1146 }
1147 break;
1148
1149 default:
1150 return -1;
1151 }
1152 }
1153
1154
1155 /* Return branch condition mask to implement a compare and branch
1156 specified by CODE. Return -1 for invalid comparisons. */
1157
1158 int
1159 s390_compare_and_branch_condition_mask (rtx code)
1160 {
1161 const int CC0 = 1 << 3;
1162 const int CC1 = 1 << 2;
1163 const int CC2 = 1 << 1;
1164
1165 switch (GET_CODE (code))
1166 {
1167 case EQ:
1168 return CC0;
1169 case NE:
1170 return CC1 | CC2;
1171 case LT:
1172 case LTU:
1173 return CC1;
1174 case GT:
1175 case GTU:
1176 return CC2;
1177 case LE:
1178 case LEU:
1179 return CC0 | CC1;
1180 case GE:
1181 case GEU:
1182 return CC0 | CC2;
1183 default:
1184 gcc_unreachable ();
1185 }
1186 return -1;
1187 }
1188
1189 /* If INV is false, return assembler mnemonic string to implement
1190 a branch specified by CODE. If INV is true, return mnemonic
1191 for the corresponding inverted branch. */
1192
1193 static const char *
1194 s390_branch_condition_mnemonic (rtx code, int inv)
1195 {
1196 int mask;
1197
1198 static const char *const mnemonic[16] =
1199 {
1200 NULL, "o", "h", "nle",
1201 "l", "nhe", "lh", "ne",
1202 "e", "nlh", "he", "nl",
1203 "le", "nh", "no", NULL
1204 };
1205
1206 if (GET_CODE (XEXP (code, 0)) == REG
1207 && REGNO (XEXP (code, 0)) == CC_REGNUM
1208 && XEXP (code, 1) == const0_rtx)
1209 mask = s390_branch_condition_mask (code);
1210 else
1211 mask = s390_compare_and_branch_condition_mask (code);
1212
1213 gcc_assert (mask >= 0);
1214
1215 if (inv)
1216 mask ^= 15;
1217
1218 gcc_assert (mask >= 1 && mask <= 14);
1219
1220 return mnemonic[mask];
1221 }
1222
1223 /* Return the part of op which has a value different from def.
1224 The size of the part is determined by mode.
1225 Use this function only if you already know that op really
1226 contains such a part. */
1227
1228 unsigned HOST_WIDE_INT
1229 s390_extract_part (rtx op, enum machine_mode mode, int def)
1230 {
1231 unsigned HOST_WIDE_INT value = 0;
1232 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1233 int part_bits = GET_MODE_BITSIZE (mode);
1234 unsigned HOST_WIDE_INT part_mask
1235 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1236 int i;
1237
1238 for (i = 0; i < max_parts; i++)
1239 {
1240 if (i == 0)
1241 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1242 else
1243 value >>= part_bits;
1244
1245 if ((value & part_mask) != (def & part_mask))
1246 return value & part_mask;
1247 }
1248
1249 gcc_unreachable ();
1250 }
1251
1252 /* If OP is an integer constant of mode MODE with exactly one
1253 part of mode PART_MODE unequal to DEF, return the number of that
1254 part. Otherwise, return -1. */
1255
1256 int
1257 s390_single_part (rtx op,
1258 enum machine_mode mode,
1259 enum machine_mode part_mode,
1260 int def)
1261 {
1262 unsigned HOST_WIDE_INT value = 0;
1263 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1264 unsigned HOST_WIDE_INT part_mask
1265 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1266 int i, part = -1;
1267
1268 if (GET_CODE (op) != CONST_INT)
1269 return -1;
1270
1271 for (i = 0; i < n_parts; i++)
1272 {
1273 if (i == 0)
1274 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1275 else
1276 value >>= GET_MODE_BITSIZE (part_mode);
1277
1278 if ((value & part_mask) != (def & part_mask))
1279 {
1280 if (part != -1)
1281 return -1;
1282 else
1283 part = i;
1284 }
1285 }
1286 return part == -1 ? -1 : n_parts - 1 - part;
1287 }
1288
1289 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1290 bits and no other bits are set in IN. POS and LENGTH can be used
1291 to obtain the start position and the length of the bitfield.
1292
1293 POS gives the position of the first bit of the bitfield counting
1294 from the lowest order bit starting with zero. In order to use this
1295 value for S/390 instructions this has to be converted to "bits big
1296 endian" style. */
1297
1298 bool
1299 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1300 int *pos, int *length)
1301 {
1302 int tmp_pos = 0;
1303 int tmp_length = 0;
1304 int i;
1305 unsigned HOST_WIDE_INT mask = 1ULL;
1306 bool contiguous = false;
1307
1308 for (i = 0; i < size; mask <<= 1, i++)
1309 {
1310 if (contiguous)
1311 {
1312 if (mask & in)
1313 tmp_length++;
1314 else
1315 break;
1316 }
1317 else
1318 {
1319 if (mask & in)
1320 {
1321 contiguous = true;
1322 tmp_length++;
1323 }
1324 else
1325 tmp_pos++;
1326 }
1327 }
1328
1329 if (!tmp_length)
1330 return false;
1331
1332 /* Calculate a mask for all bits beyond the contiguous bits. */
1333 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1334
1335 if (mask & in)
1336 return false;
1337
1338 if (tmp_length + tmp_pos - 1 > size)
1339 return false;
1340
1341 if (length)
1342 *length = tmp_length;
1343
1344 if (pos)
1345 *pos = tmp_pos;
1346
1347 return true;
1348 }
1349
1350 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1351 equivalent to a shift followed by the AND. In particular, CONTIG
1352 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1353 for ROTL indicate a rotate to the right. */
1354
1355 bool
1356 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1357 {
1358 int pos, len;
1359 bool ok;
1360
1361 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1362 gcc_assert (ok);
1363
1364 return ((rotl >= 0 && rotl <= pos)
1365 || (rotl < 0 && -rotl <= bitsize - len - pos));
1366 }
1367
1368 /* Check whether we can (and want to) split a double-word
1369 move in mode MODE from SRC to DST into two single-word
1370 moves, moving the subword FIRST_SUBWORD first. */
1371
1372 bool
1373 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1374 {
1375 /* Floating point registers cannot be split. */
1376 if (FP_REG_P (src) || FP_REG_P (dst))
1377 return false;
1378
1379 /* We don't need to split if operands are directly accessible. */
1380 if (s_operand (src, mode) || s_operand (dst, mode))
1381 return false;
1382
1383 /* Non-offsettable memory references cannot be split. */
1384 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1385 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1386 return false;
1387
1388 /* Moving the first subword must not clobber a register
1389 needed to move the second subword. */
1390 if (register_operand (dst, mode))
1391 {
1392 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1393 if (reg_overlap_mentioned_p (subreg, src))
1394 return false;
1395 }
1396
1397 return true;
1398 }
1399
1400 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1401 and [MEM2, MEM2 + SIZE] do overlap and false
1402 otherwise. */
1403
1404 bool
1405 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1406 {
1407 rtx addr1, addr2, addr_delta;
1408 HOST_WIDE_INT delta;
1409
1410 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1411 return true;
1412
1413 if (size == 0)
1414 return false;
1415
1416 addr1 = XEXP (mem1, 0);
1417 addr2 = XEXP (mem2, 0);
1418
1419 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1420
1421 /* This overlapping check is used by peepholes merging memory block operations.
1422 Overlapping operations would otherwise be recognized by the S/390 hardware
1423 and would fall back to a slower implementation. Allowing overlapping
1424 operations would lead to slow code but not to wrong code. Therefore we are
1425 somewhat optimistic if we cannot prove that the memory blocks are
1426 overlapping.
1427 That's why we return false here although this may accept operations on
1428 overlapping memory areas. */
1429 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1430 return false;
1431
1432 delta = INTVAL (addr_delta);
1433
1434 if (delta == 0
1435 || (delta > 0 && delta < size)
1436 || (delta < 0 && -delta < size))
1437 return true;
1438
1439 return false;
1440 }
1441
1442 /* Check whether the address of memory reference MEM2 equals exactly
1443 the address of memory reference MEM1 plus DELTA. Return true if
1444 we can prove this to be the case, false otherwise. */
1445
1446 bool
1447 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1448 {
1449 rtx addr1, addr2, addr_delta;
1450
1451 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1452 return false;
1453
1454 addr1 = XEXP (mem1, 0);
1455 addr2 = XEXP (mem2, 0);
1456
1457 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1458 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1459 return false;
1460
1461 return true;
1462 }
1463
1464 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1465
1466 void
1467 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1468 rtx *operands)
1469 {
1470 enum machine_mode wmode = mode;
1471 rtx dst = operands[0];
1472 rtx src1 = operands[1];
1473 rtx src2 = operands[2];
1474 rtx op, clob, tem;
1475
1476 /* If we cannot handle the operation directly, use a temp register. */
1477 if (!s390_logical_operator_ok_p (operands))
1478 dst = gen_reg_rtx (mode);
1479
1480 /* QImode and HImode patterns make sense only if we have a destination
1481 in memory. Otherwise perform the operation in SImode. */
1482 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1483 wmode = SImode;
1484
1485 /* Widen operands if required. */
1486 if (mode != wmode)
1487 {
1488 if (GET_CODE (dst) == SUBREG
1489 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1490 dst = tem;
1491 else if (REG_P (dst))
1492 dst = gen_rtx_SUBREG (wmode, dst, 0);
1493 else
1494 dst = gen_reg_rtx (wmode);
1495
1496 if (GET_CODE (src1) == SUBREG
1497 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1498 src1 = tem;
1499 else if (GET_MODE (src1) != VOIDmode)
1500 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1501
1502 if (GET_CODE (src2) == SUBREG
1503 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1504 src2 = tem;
1505 else if (GET_MODE (src2) != VOIDmode)
1506 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1507 }
1508
1509 /* Emit the instruction. */
1510 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1511 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1512 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1513
1514 /* Fix up the destination if needed. */
1515 if (dst != operands[0])
1516 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1517 }
1518
1519 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1520
1521 bool
1522 s390_logical_operator_ok_p (rtx *operands)
1523 {
1524 /* If the destination operand is in memory, it needs to coincide
1525 with one of the source operands. After reload, it has to be
1526 the first source operand. */
1527 if (GET_CODE (operands[0]) == MEM)
1528 return rtx_equal_p (operands[0], operands[1])
1529 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1530
1531 return true;
1532 }
1533
1534 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1535 operand IMMOP to switch from SS to SI type instructions. */
1536
1537 void
1538 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1539 {
1540 int def = code == AND ? -1 : 0;
1541 HOST_WIDE_INT mask;
1542 int part;
1543
1544 gcc_assert (GET_CODE (*memop) == MEM);
1545 gcc_assert (!MEM_VOLATILE_P (*memop));
1546
1547 mask = s390_extract_part (*immop, QImode, def);
1548 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1549 gcc_assert (part >= 0);
1550
1551 *memop = adjust_address (*memop, QImode, part);
1552 *immop = gen_int_mode (mask, QImode);
1553 }
1554
1555
1556 /* How to allocate a 'struct machine_function'. */
1557
1558 static struct machine_function *
1559 s390_init_machine_status (void)
1560 {
1561 return ggc_alloc_cleared_machine_function ();
1562 }
1563
1564 static void
1565 s390_option_override (void)
1566 {
1567 /* Set up function hooks. */
1568 init_machine_status = s390_init_machine_status;
1569
1570 /* Architecture mode defaults according to ABI. */
1571 if (!(target_flags_explicit & MASK_ZARCH))
1572 {
1573 if (TARGET_64BIT)
1574 target_flags |= MASK_ZARCH;
1575 else
1576 target_flags &= ~MASK_ZARCH;
1577 }
1578
1579 /* Set the march default in case it hasn't been specified on
1580 cmdline. */
1581 if (s390_arch == PROCESSOR_max)
1582 {
1583 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
1584 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
1585 s390_arch_flags = processor_flags_table[(int)s390_arch];
1586 }
1587
1588 /* Determine processor to tune for. */
1589 if (s390_tune == PROCESSOR_max)
1590 {
1591 s390_tune = s390_arch;
1592 s390_tune_flags = s390_arch_flags;
1593 }
1594
1595 /* Sanity checks. */
1596 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
1597 error ("z/Architecture mode not supported on %s", s390_arch_string);
1598 if (TARGET_64BIT && !TARGET_ZARCH)
1599 error ("64-bit ABI not supported in ESA/390 mode");
1600
1601 /* Use hardware DFP if available and not explicitly disabled by
1602 user. E.g. with -m31 -march=z10 -mzarch */
1603 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
1604 target_flags |= MASK_HARD_DFP;
1605
1606 if (TARGET_HARD_DFP && !TARGET_DFP)
1607 {
1608 if (target_flags_explicit & MASK_HARD_DFP)
1609 {
1610 if (!TARGET_CPU_DFP)
1611 error ("hardware decimal floating point instructions"
1612 " not available on %s", s390_arch_string);
1613 if (!TARGET_ZARCH)
1614 error ("hardware decimal floating point instructions"
1615 " not available in ESA/390 mode");
1616 }
1617 else
1618 target_flags &= ~MASK_HARD_DFP;
1619 }
1620
1621 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
1622 {
1623 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
1624 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
1625
1626 target_flags &= ~MASK_HARD_DFP;
1627 }
1628
1629 /* Set processor cost function. */
1630 switch (s390_tune)
1631 {
1632 case PROCESSOR_2084_Z990:
1633 s390_cost = &z990_cost;
1634 break;
1635 case PROCESSOR_2094_Z9_109:
1636 s390_cost = &z9_109_cost;
1637 break;
1638 case PROCESSOR_2097_Z10:
1639 s390_cost = &z10_cost;
1640 break;
1641 case PROCESSOR_2817_Z196:
1642 s390_cost = &z196_cost;
1643 break;
1644 case PROCESSOR_2827_ZEC12:
1645 s390_cost = &zEC12_cost;
1646 break;
1647 default:
1648 s390_cost = &z900_cost;
1649 }
1650
1651 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
1652 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
1653 "in combination");
1654
1655 if (s390_stack_size)
1656 {
1657 if (s390_stack_guard >= s390_stack_size)
1658 error ("stack size must be greater than the stack guard value");
1659 else if (s390_stack_size > 1 << 16)
1660 error ("stack size must not be greater than 64k");
1661 }
1662 else if (s390_stack_guard)
1663 error ("-mstack-guard implies use of -mstack-size");
1664
1665 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1666 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1667 target_flags |= MASK_LONG_DOUBLE_128;
1668 #endif
1669
1670 if (s390_tune == PROCESSOR_2097_Z10
1671 || s390_tune == PROCESSOR_2817_Z196
1672 || s390_tune == PROCESSOR_2827_ZEC12)
1673 {
1674 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
1675 global_options.x_param_values,
1676 global_options_set.x_param_values);
1677 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
1678 global_options.x_param_values,
1679 global_options_set.x_param_values);
1680 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
1681 global_options.x_param_values,
1682 global_options_set.x_param_values);
1683 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
1684 global_options.x_param_values,
1685 global_options_set.x_param_values);
1686 }
1687
1688 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
1689 global_options.x_param_values,
1690 global_options_set.x_param_values);
1691 /* values for loop prefetching */
1692 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
1693 global_options.x_param_values,
1694 global_options_set.x_param_values);
1695 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
1696 global_options.x_param_values,
1697 global_options_set.x_param_values);
1698 /* s390 has more than 2 levels and the size is much larger. Since
1699 we are always running virtualized assume that we only get a small
1700 part of the caches above l1. */
1701 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
1702 global_options.x_param_values,
1703 global_options_set.x_param_values);
1704 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
1705 global_options.x_param_values,
1706 global_options_set.x_param_values);
1707 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
1708 global_options.x_param_values,
1709 global_options_set.x_param_values);
1710
1711 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
1712 requires the arch flags to be evaluated already. Since prefetching
1713 is beneficial on s390, we enable it if available. */
1714 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
1715 flag_prefetch_loop_arrays = 1;
1716
1717 /* Use the alternative scheduling-pressure algorithm by default. */
1718 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
1719 global_options.x_param_values,
1720 global_options_set.x_param_values);
1721
1722 if (TARGET_TPF)
1723 {
1724 /* Don't emit DWARF3/4 unless specifically selected. The TPF
1725 debuggers do not yet support DWARF 3/4. */
1726 if (!global_options_set.x_dwarf_strict)
1727 dwarf_strict = 1;
1728 if (!global_options_set.x_dwarf_version)
1729 dwarf_version = 2;
1730 }
1731 }
1732
1733 /* Map for smallest class containing reg regno. */
1734
1735 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1736 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1737 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1738 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1739 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1740 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1741 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1742 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1743 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1744 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1745 ACCESS_REGS, ACCESS_REGS
1746 };
1747
1748 /* Return attribute type of insn. */
1749
1750 static enum attr_type
1751 s390_safe_attr_type (rtx insn)
1752 {
1753 if (recog_memoized (insn) >= 0)
1754 return get_attr_type (insn);
1755 else
1756 return TYPE_NONE;
1757 }
1758
1759 /* Return true if DISP is a valid short displacement. */
1760
1761 static bool
1762 s390_short_displacement (rtx disp)
1763 {
1764 /* No displacement is OK. */
1765 if (!disp)
1766 return true;
1767
1768 /* Without the long displacement facility we don't need to
1769 distingiush between long and short displacement. */
1770 if (!TARGET_LONG_DISPLACEMENT)
1771 return true;
1772
1773 /* Integer displacement in range. */
1774 if (GET_CODE (disp) == CONST_INT)
1775 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1776
1777 /* GOT offset is not OK, the GOT can be large. */
1778 if (GET_CODE (disp) == CONST
1779 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1780 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1781 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1782 return false;
1783
1784 /* All other symbolic constants are literal pool references,
1785 which are OK as the literal pool must be small. */
1786 if (GET_CODE (disp) == CONST)
1787 return true;
1788
1789 return false;
1790 }
1791
1792 /* Decompose a RTL expression ADDR for a memory address into
1793 its components, returned in OUT.
1794
1795 Returns false if ADDR is not a valid memory address, true
1796 otherwise. If OUT is NULL, don't return the components,
1797 but check for validity only.
1798
1799 Note: Only addresses in canonical form are recognized.
1800 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1801 canonical form so that they will be recognized. */
1802
1803 static int
1804 s390_decompose_address (rtx addr, struct s390_address *out)
1805 {
1806 HOST_WIDE_INT offset = 0;
1807 rtx base = NULL_RTX;
1808 rtx indx = NULL_RTX;
1809 rtx disp = NULL_RTX;
1810 rtx orig_disp;
1811 bool pointer = false;
1812 bool base_ptr = false;
1813 bool indx_ptr = false;
1814 bool literal_pool = false;
1815
1816 /* We may need to substitute the literal pool base register into the address
1817 below. However, at this point we do not know which register is going to
1818 be used as base, so we substitute the arg pointer register. This is going
1819 to be treated as holding a pointer below -- it shouldn't be used for any
1820 other purpose. */
1821 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1822
1823 /* Decompose address into base + index + displacement. */
1824
1825 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1826 base = addr;
1827
1828 else if (GET_CODE (addr) == PLUS)
1829 {
1830 rtx op0 = XEXP (addr, 0);
1831 rtx op1 = XEXP (addr, 1);
1832 enum rtx_code code0 = GET_CODE (op0);
1833 enum rtx_code code1 = GET_CODE (op1);
1834
1835 if (code0 == REG || code0 == UNSPEC)
1836 {
1837 if (code1 == REG || code1 == UNSPEC)
1838 {
1839 indx = op0; /* index + base */
1840 base = op1;
1841 }
1842
1843 else
1844 {
1845 base = op0; /* base + displacement */
1846 disp = op1;
1847 }
1848 }
1849
1850 else if (code0 == PLUS)
1851 {
1852 indx = XEXP (op0, 0); /* index + base + disp */
1853 base = XEXP (op0, 1);
1854 disp = op1;
1855 }
1856
1857 else
1858 {
1859 return false;
1860 }
1861 }
1862
1863 else
1864 disp = addr; /* displacement */
1865
1866 /* Extract integer part of displacement. */
1867 orig_disp = disp;
1868 if (disp)
1869 {
1870 if (GET_CODE (disp) == CONST_INT)
1871 {
1872 offset = INTVAL (disp);
1873 disp = NULL_RTX;
1874 }
1875 else if (GET_CODE (disp) == CONST
1876 && GET_CODE (XEXP (disp, 0)) == PLUS
1877 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1878 {
1879 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1880 disp = XEXP (XEXP (disp, 0), 0);
1881 }
1882 }
1883
1884 /* Strip off CONST here to avoid special case tests later. */
1885 if (disp && GET_CODE (disp) == CONST)
1886 disp = XEXP (disp, 0);
1887
1888 /* We can convert literal pool addresses to
1889 displacements by basing them off the base register. */
1890 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1891 {
1892 /* Either base or index must be free to hold the base register. */
1893 if (!base)
1894 base = fake_pool_base, literal_pool = true;
1895 else if (!indx)
1896 indx = fake_pool_base, literal_pool = true;
1897 else
1898 return false;
1899
1900 /* Mark up the displacement. */
1901 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1902 UNSPEC_LTREL_OFFSET);
1903 }
1904
1905 /* Validate base register. */
1906 if (base)
1907 {
1908 if (GET_CODE (base) == UNSPEC)
1909 switch (XINT (base, 1))
1910 {
1911 case UNSPEC_LTREF:
1912 if (!disp)
1913 disp = gen_rtx_UNSPEC (Pmode,
1914 gen_rtvec (1, XVECEXP (base, 0, 0)),
1915 UNSPEC_LTREL_OFFSET);
1916 else
1917 return false;
1918
1919 base = XVECEXP (base, 0, 1);
1920 break;
1921
1922 case UNSPEC_LTREL_BASE:
1923 if (XVECLEN (base, 0) == 1)
1924 base = fake_pool_base, literal_pool = true;
1925 else
1926 base = XVECEXP (base, 0, 1);
1927 break;
1928
1929 default:
1930 return false;
1931 }
1932
1933 if (!REG_P (base)
1934 || (GET_MODE (base) != SImode
1935 && GET_MODE (base) != Pmode))
1936 return false;
1937
1938 if (REGNO (base) == STACK_POINTER_REGNUM
1939 || REGNO (base) == FRAME_POINTER_REGNUM
1940 || ((reload_completed || reload_in_progress)
1941 && frame_pointer_needed
1942 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1943 || REGNO (base) == ARG_POINTER_REGNUM
1944 || (flag_pic
1945 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1946 pointer = base_ptr = true;
1947
1948 if ((reload_completed || reload_in_progress)
1949 && base == cfun->machine->base_reg)
1950 pointer = base_ptr = literal_pool = true;
1951 }
1952
1953 /* Validate index register. */
1954 if (indx)
1955 {
1956 if (GET_CODE (indx) == UNSPEC)
1957 switch (XINT (indx, 1))
1958 {
1959 case UNSPEC_LTREF:
1960 if (!disp)
1961 disp = gen_rtx_UNSPEC (Pmode,
1962 gen_rtvec (1, XVECEXP (indx, 0, 0)),
1963 UNSPEC_LTREL_OFFSET);
1964 else
1965 return false;
1966
1967 indx = XVECEXP (indx, 0, 1);
1968 break;
1969
1970 case UNSPEC_LTREL_BASE:
1971 if (XVECLEN (indx, 0) == 1)
1972 indx = fake_pool_base, literal_pool = true;
1973 else
1974 indx = XVECEXP (indx, 0, 1);
1975 break;
1976
1977 default:
1978 return false;
1979 }
1980
1981 if (!REG_P (indx)
1982 || (GET_MODE (indx) != SImode
1983 && GET_MODE (indx) != Pmode))
1984 return false;
1985
1986 if (REGNO (indx) == STACK_POINTER_REGNUM
1987 || REGNO (indx) == FRAME_POINTER_REGNUM
1988 || ((reload_completed || reload_in_progress)
1989 && frame_pointer_needed
1990 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
1991 || REGNO (indx) == ARG_POINTER_REGNUM
1992 || (flag_pic
1993 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
1994 pointer = indx_ptr = true;
1995
1996 if ((reload_completed || reload_in_progress)
1997 && indx == cfun->machine->base_reg)
1998 pointer = indx_ptr = literal_pool = true;
1999 }
2000
2001 /* Prefer to use pointer as base, not index. */
2002 if (base && indx && !base_ptr
2003 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2004 {
2005 rtx tmp = base;
2006 base = indx;
2007 indx = tmp;
2008 }
2009
2010 /* Validate displacement. */
2011 if (!disp)
2012 {
2013 /* If virtual registers are involved, the displacement will change later
2014 anyway as the virtual registers get eliminated. This could make a
2015 valid displacement invalid, but it is more likely to make an invalid
2016 displacement valid, because we sometimes access the register save area
2017 via negative offsets to one of those registers.
2018 Thus we don't check the displacement for validity here. If after
2019 elimination the displacement turns out to be invalid after all,
2020 this is fixed up by reload in any case. */
2021 if (base != arg_pointer_rtx
2022 && indx != arg_pointer_rtx
2023 && base != return_address_pointer_rtx
2024 && indx != return_address_pointer_rtx
2025 && base != frame_pointer_rtx
2026 && indx != frame_pointer_rtx
2027 && base != virtual_stack_vars_rtx
2028 && indx != virtual_stack_vars_rtx)
2029 if (!DISP_IN_RANGE (offset))
2030 return false;
2031 }
2032 else
2033 {
2034 /* All the special cases are pointers. */
2035 pointer = true;
2036
2037 /* In the small-PIC case, the linker converts @GOT
2038 and @GOTNTPOFF offsets to possible displacements. */
2039 if (GET_CODE (disp) == UNSPEC
2040 && (XINT (disp, 1) == UNSPEC_GOT
2041 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2042 && flag_pic == 1)
2043 {
2044 ;
2045 }
2046
2047 /* Accept pool label offsets. */
2048 else if (GET_CODE (disp) == UNSPEC
2049 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2050 ;
2051
2052 /* Accept literal pool references. */
2053 else if (GET_CODE (disp) == UNSPEC
2054 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2055 {
2056 /* In case CSE pulled a non literal pool reference out of
2057 the pool we have to reject the address. This is
2058 especially important when loading the GOT pointer on non
2059 zarch CPUs. In this case the literal pool contains an lt
2060 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2061 will most likely exceed the displacement. */
2062 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2063 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2064 return false;
2065
2066 orig_disp = gen_rtx_CONST (Pmode, disp);
2067 if (offset)
2068 {
2069 /* If we have an offset, make sure it does not
2070 exceed the size of the constant pool entry. */
2071 rtx sym = XVECEXP (disp, 0, 0);
2072 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2073 return false;
2074
2075 orig_disp = plus_constant (Pmode, orig_disp, offset);
2076 }
2077 }
2078
2079 else
2080 return false;
2081 }
2082
2083 if (!base && !indx)
2084 pointer = true;
2085
2086 if (out)
2087 {
2088 out->base = base;
2089 out->indx = indx;
2090 out->disp = orig_disp;
2091 out->pointer = pointer;
2092 out->literal_pool = literal_pool;
2093 }
2094
2095 return true;
2096 }
2097
2098 /* Decompose a RTL expression OP for a shift count into its components,
2099 and return the base register in BASE and the offset in OFFSET.
2100
2101 Return true if OP is a valid shift count, false if not. */
2102
2103 bool
2104 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2105 {
2106 HOST_WIDE_INT off = 0;
2107
2108 /* We can have an integer constant, an address register,
2109 or a sum of the two. */
2110 if (GET_CODE (op) == CONST_INT)
2111 {
2112 off = INTVAL (op);
2113 op = NULL_RTX;
2114 }
2115 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2116 {
2117 off = INTVAL (XEXP (op, 1));
2118 op = XEXP (op, 0);
2119 }
2120 while (op && GET_CODE (op) == SUBREG)
2121 op = SUBREG_REG (op);
2122
2123 if (op && GET_CODE (op) != REG)
2124 return false;
2125
2126 if (offset)
2127 *offset = off;
2128 if (base)
2129 *base = op;
2130
2131 return true;
2132 }
2133
2134
2135 /* Return true if CODE is a valid address without index. */
2136
2137 bool
2138 s390_legitimate_address_without_index_p (rtx op)
2139 {
2140 struct s390_address addr;
2141
2142 if (!s390_decompose_address (XEXP (op, 0), &addr))
2143 return false;
2144 if (addr.indx)
2145 return false;
2146
2147 return true;
2148 }
2149
2150
2151 /* Return true if ADDR is of kind symbol_ref or symbol_ref + const_int
2152 and return these parts in SYMREF and ADDEND. You can pass NULL in
2153 SYMREF and/or ADDEND if you are not interested in these values.
2154 Literal pool references are *not* considered symbol references. */
2155
2156 static bool
2157 s390_symref_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2158 {
2159 HOST_WIDE_INT tmpaddend = 0;
2160
2161 if (GET_CODE (addr) == CONST)
2162 addr = XEXP (addr, 0);
2163
2164 if (GET_CODE (addr) == PLUS)
2165 {
2166 if (GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
2167 && !CONSTANT_POOL_ADDRESS_P (XEXP (addr, 0))
2168 && CONST_INT_P (XEXP (addr, 1)))
2169 {
2170 tmpaddend = INTVAL (XEXP (addr, 1));
2171 addr = XEXP (addr, 0);
2172 }
2173 else
2174 return false;
2175 }
2176 else
2177 if (GET_CODE (addr) != SYMBOL_REF || CONSTANT_POOL_ADDRESS_P (addr))
2178 return false;
2179
2180 if (symref)
2181 *symref = addr;
2182 if (addend)
2183 *addend = tmpaddend;
2184
2185 return true;
2186 }
2187
2188 /* Return TRUE if ADDR is an operand valid for a load/store relative
2189 instructions. Be aware that the alignment of the operand needs to
2190 be checked separately. */
2191 static bool
2192 s390_loadrelative_operand_p (rtx addr)
2193 {
2194 if (GET_CODE (addr) == CONST)
2195 addr = XEXP (addr, 0);
2196
2197 /* Enable load relative for symbol@GOTENT. */
2198 if (GET_CODE (addr) == UNSPEC
2199 && XINT (addr, 1) == UNSPEC_GOTENT)
2200 return true;
2201
2202 return s390_symref_operand_p (addr, NULL, NULL);
2203 }
2204
2205 /* Return true if the address in OP is valid for constraint letter C
2206 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2207 pool MEMs should be accepted. Only the Q, R, S, T constraint
2208 letters are allowed for C. */
2209
2210 static int
2211 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2212 {
2213 struct s390_address addr;
2214 bool decomposed = false;
2215
2216 /* This check makes sure that no symbolic address (except literal
2217 pool references) are accepted by the R or T constraints. */
2218 if (s390_loadrelative_operand_p (op))
2219 return 0;
2220
2221 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2222 if (!lit_pool_ok)
2223 {
2224 if (!s390_decompose_address (op, &addr))
2225 return 0;
2226 if (addr.literal_pool)
2227 return 0;
2228 decomposed = true;
2229 }
2230
2231 switch (c)
2232 {
2233 case 'Q': /* no index short displacement */
2234 if (!decomposed && !s390_decompose_address (op, &addr))
2235 return 0;
2236 if (addr.indx)
2237 return 0;
2238 if (!s390_short_displacement (addr.disp))
2239 return 0;
2240 break;
2241
2242 case 'R': /* with index short displacement */
2243 if (TARGET_LONG_DISPLACEMENT)
2244 {
2245 if (!decomposed && !s390_decompose_address (op, &addr))
2246 return 0;
2247 if (!s390_short_displacement (addr.disp))
2248 return 0;
2249 }
2250 /* Any invalid address here will be fixed up by reload,
2251 so accept it for the most generic constraint. */
2252 break;
2253
2254 case 'S': /* no index long displacement */
2255 if (!TARGET_LONG_DISPLACEMENT)
2256 return 0;
2257 if (!decomposed && !s390_decompose_address (op, &addr))
2258 return 0;
2259 if (addr.indx)
2260 return 0;
2261 if (s390_short_displacement (addr.disp))
2262 return 0;
2263 break;
2264
2265 case 'T': /* with index long displacement */
2266 if (!TARGET_LONG_DISPLACEMENT)
2267 return 0;
2268 /* Any invalid address here will be fixed up by reload,
2269 so accept it for the most generic constraint. */
2270 if ((decomposed || s390_decompose_address (op, &addr))
2271 && s390_short_displacement (addr.disp))
2272 return 0;
2273 break;
2274 default:
2275 return 0;
2276 }
2277 return 1;
2278 }
2279
2280
2281 /* Evaluates constraint strings described by the regular expression
2282 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2283 the constraint given in STR, or 0 else. */
2284
2285 int
2286 s390_mem_constraint (const char *str, rtx op)
2287 {
2288 char c = str[0];
2289
2290 switch (c)
2291 {
2292 case 'A':
2293 /* Check for offsettable variants of memory constraints. */
2294 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2295 return 0;
2296 if ((reload_completed || reload_in_progress)
2297 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2298 return 0;
2299 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2300 case 'B':
2301 /* Check for non-literal-pool variants of memory constraints. */
2302 if (!MEM_P (op))
2303 return 0;
2304 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2305 case 'Q':
2306 case 'R':
2307 case 'S':
2308 case 'T':
2309 if (GET_CODE (op) != MEM)
2310 return 0;
2311 return s390_check_qrst_address (c, XEXP (op, 0), true);
2312 case 'U':
2313 return (s390_check_qrst_address ('Q', op, true)
2314 || s390_check_qrst_address ('R', op, true));
2315 case 'W':
2316 return (s390_check_qrst_address ('S', op, true)
2317 || s390_check_qrst_address ('T', op, true));
2318 case 'Y':
2319 /* Simply check for the basic form of a shift count. Reload will
2320 take care of making sure we have a proper base register. */
2321 if (!s390_decompose_shift_count (op, NULL, NULL))
2322 return 0;
2323 break;
2324 case 'Z':
2325 return s390_check_qrst_address (str[1], op, true);
2326 default:
2327 return 0;
2328 }
2329 return 1;
2330 }
2331
2332
2333 /* Evaluates constraint strings starting with letter O. Input
2334 parameter C is the second letter following the "O" in the constraint
2335 string. Returns 1 if VALUE meets the respective constraint and 0
2336 otherwise. */
2337
2338 int
2339 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2340 {
2341 if (!TARGET_EXTIMM)
2342 return 0;
2343
2344 switch (c)
2345 {
2346 case 's':
2347 return trunc_int_for_mode (value, SImode) == value;
2348
2349 case 'p':
2350 return value == 0
2351 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2352
2353 case 'n':
2354 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2355
2356 default:
2357 gcc_unreachable ();
2358 }
2359 }
2360
2361
2362 /* Evaluates constraint strings starting with letter N. Parameter STR
2363 contains the letters following letter "N" in the constraint string.
2364 Returns true if VALUE matches the constraint. */
2365
2366 int
2367 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2368 {
2369 enum machine_mode mode, part_mode;
2370 int def;
2371 int part, part_goal;
2372
2373
2374 if (str[0] == 'x')
2375 part_goal = -1;
2376 else
2377 part_goal = str[0] - '0';
2378
2379 switch (str[1])
2380 {
2381 case 'Q':
2382 part_mode = QImode;
2383 break;
2384 case 'H':
2385 part_mode = HImode;
2386 break;
2387 case 'S':
2388 part_mode = SImode;
2389 break;
2390 default:
2391 return 0;
2392 }
2393
2394 switch (str[2])
2395 {
2396 case 'H':
2397 mode = HImode;
2398 break;
2399 case 'S':
2400 mode = SImode;
2401 break;
2402 case 'D':
2403 mode = DImode;
2404 break;
2405 default:
2406 return 0;
2407 }
2408
2409 switch (str[3])
2410 {
2411 case '0':
2412 def = 0;
2413 break;
2414 case 'F':
2415 def = -1;
2416 break;
2417 default:
2418 return 0;
2419 }
2420
2421 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2422 return 0;
2423
2424 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2425 if (part < 0)
2426 return 0;
2427 if (part_goal != -1 && part_goal != part)
2428 return 0;
2429
2430 return 1;
2431 }
2432
2433
2434 /* Returns true if the input parameter VALUE is a float zero. */
2435
2436 int
2437 s390_float_const_zero_p (rtx value)
2438 {
2439 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2440 && value == CONST0_RTX (GET_MODE (value)));
2441 }
2442
2443 /* Implement TARGET_REGISTER_MOVE_COST. */
2444
2445 static int
2446 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2447 reg_class_t from, reg_class_t to)
2448 {
2449 /* On s390, copy between fprs and gprs is expensive. */
2450 if ((reg_classes_intersect_p (from, GENERAL_REGS)
2451 && reg_classes_intersect_p (to, FP_REGS))
2452 || (reg_classes_intersect_p (from, FP_REGS)
2453 && reg_classes_intersect_p (to, GENERAL_REGS)))
2454 return 10;
2455
2456 return 1;
2457 }
2458
2459 /* Implement TARGET_MEMORY_MOVE_COST. */
2460
2461 static int
2462 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2463 reg_class_t rclass ATTRIBUTE_UNUSED,
2464 bool in ATTRIBUTE_UNUSED)
2465 {
2466 return 1;
2467 }
2468
2469 /* Compute a (partial) cost for rtx X. Return true if the complete
2470 cost has been computed, and false if subexpressions should be
2471 scanned. In either case, *TOTAL contains the cost result.
2472 CODE contains GET_CODE (x), OUTER_CODE contains the code
2473 of the superexpression of x. */
2474
2475 static bool
2476 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2477 int *total, bool speed ATTRIBUTE_UNUSED)
2478 {
2479 switch (code)
2480 {
2481 case CONST:
2482 case CONST_INT:
2483 case LABEL_REF:
2484 case SYMBOL_REF:
2485 case CONST_DOUBLE:
2486 case MEM:
2487 *total = 0;
2488 return true;
2489
2490 case ASHIFT:
2491 case ASHIFTRT:
2492 case LSHIFTRT:
2493 case ROTATE:
2494 case ROTATERT:
2495 case AND:
2496 case IOR:
2497 case XOR:
2498 case NEG:
2499 case NOT:
2500 *total = COSTS_N_INSNS (1);
2501 return false;
2502
2503 case PLUS:
2504 case MINUS:
2505 *total = COSTS_N_INSNS (1);
2506 return false;
2507
2508 case MULT:
2509 switch (GET_MODE (x))
2510 {
2511 case SImode:
2512 {
2513 rtx left = XEXP (x, 0);
2514 rtx right = XEXP (x, 1);
2515 if (GET_CODE (right) == CONST_INT
2516 && CONST_OK_FOR_K (INTVAL (right)))
2517 *total = s390_cost->mhi;
2518 else if (GET_CODE (left) == SIGN_EXTEND)
2519 *total = s390_cost->mh;
2520 else
2521 *total = s390_cost->ms; /* msr, ms, msy */
2522 break;
2523 }
2524 case DImode:
2525 {
2526 rtx left = XEXP (x, 0);
2527 rtx right = XEXP (x, 1);
2528 if (TARGET_ZARCH)
2529 {
2530 if (GET_CODE (right) == CONST_INT
2531 && CONST_OK_FOR_K (INTVAL (right)))
2532 *total = s390_cost->mghi;
2533 else if (GET_CODE (left) == SIGN_EXTEND)
2534 *total = s390_cost->msgf;
2535 else
2536 *total = s390_cost->msg; /* msgr, msg */
2537 }
2538 else /* TARGET_31BIT */
2539 {
2540 if (GET_CODE (left) == SIGN_EXTEND
2541 && GET_CODE (right) == SIGN_EXTEND)
2542 /* mulsidi case: mr, m */
2543 *total = s390_cost->m;
2544 else if (GET_CODE (left) == ZERO_EXTEND
2545 && GET_CODE (right) == ZERO_EXTEND
2546 && TARGET_CPU_ZARCH)
2547 /* umulsidi case: ml, mlr */
2548 *total = s390_cost->ml;
2549 else
2550 /* Complex calculation is required. */
2551 *total = COSTS_N_INSNS (40);
2552 }
2553 break;
2554 }
2555 case SFmode:
2556 case DFmode:
2557 *total = s390_cost->mult_df;
2558 break;
2559 case TFmode:
2560 *total = s390_cost->mxbr;
2561 break;
2562 default:
2563 return false;
2564 }
2565 return false;
2566
2567 case FMA:
2568 switch (GET_MODE (x))
2569 {
2570 case DFmode:
2571 *total = s390_cost->madbr;
2572 break;
2573 case SFmode:
2574 *total = s390_cost->maebr;
2575 break;
2576 default:
2577 return false;
2578 }
2579 /* Negate in the third argument is free: FMSUB. */
2580 if (GET_CODE (XEXP (x, 2)) == NEG)
2581 {
2582 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2583 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2584 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2585 return true;
2586 }
2587 return false;
2588
2589 case UDIV:
2590 case UMOD:
2591 if (GET_MODE (x) == TImode) /* 128 bit division */
2592 *total = s390_cost->dlgr;
2593 else if (GET_MODE (x) == DImode)
2594 {
2595 rtx right = XEXP (x, 1);
2596 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2597 *total = s390_cost->dlr;
2598 else /* 64 by 64 bit division */
2599 *total = s390_cost->dlgr;
2600 }
2601 else if (GET_MODE (x) == SImode) /* 32 bit division */
2602 *total = s390_cost->dlr;
2603 return false;
2604
2605 case DIV:
2606 case MOD:
2607 if (GET_MODE (x) == DImode)
2608 {
2609 rtx right = XEXP (x, 1);
2610 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2611 if (TARGET_ZARCH)
2612 *total = s390_cost->dsgfr;
2613 else
2614 *total = s390_cost->dr;
2615 else /* 64 by 64 bit division */
2616 *total = s390_cost->dsgr;
2617 }
2618 else if (GET_MODE (x) == SImode) /* 32 bit division */
2619 *total = s390_cost->dlr;
2620 else if (GET_MODE (x) == SFmode)
2621 {
2622 *total = s390_cost->debr;
2623 }
2624 else if (GET_MODE (x) == DFmode)
2625 {
2626 *total = s390_cost->ddbr;
2627 }
2628 else if (GET_MODE (x) == TFmode)
2629 {
2630 *total = s390_cost->dxbr;
2631 }
2632 return false;
2633
2634 case SQRT:
2635 if (GET_MODE (x) == SFmode)
2636 *total = s390_cost->sqebr;
2637 else if (GET_MODE (x) == DFmode)
2638 *total = s390_cost->sqdbr;
2639 else /* TFmode */
2640 *total = s390_cost->sqxbr;
2641 return false;
2642
2643 case SIGN_EXTEND:
2644 case ZERO_EXTEND:
2645 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2646 || outer_code == PLUS || outer_code == MINUS
2647 || outer_code == COMPARE)
2648 *total = 0;
2649 return false;
2650
2651 case COMPARE:
2652 *total = COSTS_N_INSNS (1);
2653 if (GET_CODE (XEXP (x, 0)) == AND
2654 && GET_CODE (XEXP (x, 1)) == CONST_INT
2655 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2656 {
2657 rtx op0 = XEXP (XEXP (x, 0), 0);
2658 rtx op1 = XEXP (XEXP (x, 0), 1);
2659 rtx op2 = XEXP (x, 1);
2660
2661 if (memory_operand (op0, GET_MODE (op0))
2662 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2663 return true;
2664 if (register_operand (op0, GET_MODE (op0))
2665 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2666 return true;
2667 }
2668 return false;
2669
2670 default:
2671 return false;
2672 }
2673 }
2674
2675 /* Return the cost of an address rtx ADDR. */
2676
2677 static int
2678 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2679 addr_space_t as ATTRIBUTE_UNUSED,
2680 bool speed ATTRIBUTE_UNUSED)
2681 {
2682 struct s390_address ad;
2683 if (!s390_decompose_address (addr, &ad))
2684 return 1000;
2685
2686 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2687 }
2688
2689 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2690 otherwise return 0. */
2691
2692 int
2693 tls_symbolic_operand (rtx op)
2694 {
2695 if (GET_CODE (op) != SYMBOL_REF)
2696 return 0;
2697 return SYMBOL_REF_TLS_MODEL (op);
2698 }
2699 \f
2700 /* Split DImode access register reference REG (on 64-bit) into its constituent
2701 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2702 gen_highpart cannot be used as they assume all registers are word-sized,
2703 while our access registers have only half that size. */
2704
2705 void
2706 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2707 {
2708 gcc_assert (TARGET_64BIT);
2709 gcc_assert (ACCESS_REG_P (reg));
2710 gcc_assert (GET_MODE (reg) == DImode);
2711 gcc_assert (!(REGNO (reg) & 1));
2712
2713 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2714 *hi = gen_rtx_REG (SImode, REGNO (reg));
2715 }
2716
2717 /* Return true if OP contains a symbol reference */
2718
2719 bool
2720 symbolic_reference_mentioned_p (rtx op)
2721 {
2722 const char *fmt;
2723 int i;
2724
2725 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2726 return 1;
2727
2728 fmt = GET_RTX_FORMAT (GET_CODE (op));
2729 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2730 {
2731 if (fmt[i] == 'E')
2732 {
2733 int j;
2734
2735 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2736 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2737 return 1;
2738 }
2739
2740 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2741 return 1;
2742 }
2743
2744 return 0;
2745 }
2746
2747 /* Return true if OP contains a reference to a thread-local symbol. */
2748
2749 bool
2750 tls_symbolic_reference_mentioned_p (rtx op)
2751 {
2752 const char *fmt;
2753 int i;
2754
2755 if (GET_CODE (op) == SYMBOL_REF)
2756 return tls_symbolic_operand (op);
2757
2758 fmt = GET_RTX_FORMAT (GET_CODE (op));
2759 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2760 {
2761 if (fmt[i] == 'E')
2762 {
2763 int j;
2764
2765 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2766 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2767 return true;
2768 }
2769
2770 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2771 return true;
2772 }
2773
2774 return false;
2775 }
2776
2777
2778 /* Return true if OP is a legitimate general operand when
2779 generating PIC code. It is given that flag_pic is on
2780 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2781
2782 int
2783 legitimate_pic_operand_p (rtx op)
2784 {
2785 /* Accept all non-symbolic constants. */
2786 if (!SYMBOLIC_CONST (op))
2787 return 1;
2788
2789 /* Reject everything else; must be handled
2790 via emit_symbolic_move. */
2791 return 0;
2792 }
2793
2794 /* Returns true if the constant value OP is a legitimate general operand.
2795 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2796
2797 static bool
2798 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2799 {
2800 /* Accept all non-symbolic constants. */
2801 if (!SYMBOLIC_CONST (op))
2802 return 1;
2803
2804 /* Accept immediate LARL operands. */
2805 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2806 return 1;
2807
2808 /* Thread-local symbols are never legal constants. This is
2809 so that emit_call knows that computing such addresses
2810 might require a function call. */
2811 if (TLS_SYMBOLIC_CONST (op))
2812 return 0;
2813
2814 /* In the PIC case, symbolic constants must *not* be
2815 forced into the literal pool. We accept them here,
2816 so that they will be handled by emit_symbolic_move. */
2817 if (flag_pic)
2818 return 1;
2819
2820 /* All remaining non-PIC symbolic constants are
2821 forced into the literal pool. */
2822 return 0;
2823 }
2824
2825 /* Determine if it's legal to put X into the constant pool. This
2826 is not possible if X contains the address of a symbol that is
2827 not constant (TLS) or not known at final link time (PIC). */
2828
2829 static bool
2830 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2831 {
2832 switch (GET_CODE (x))
2833 {
2834 case CONST_INT:
2835 case CONST_DOUBLE:
2836 /* Accept all non-symbolic constants. */
2837 return false;
2838
2839 case LABEL_REF:
2840 /* Labels are OK iff we are non-PIC. */
2841 return flag_pic != 0;
2842
2843 case SYMBOL_REF:
2844 /* 'Naked' TLS symbol references are never OK,
2845 non-TLS symbols are OK iff we are non-PIC. */
2846 if (tls_symbolic_operand (x))
2847 return true;
2848 else
2849 return flag_pic != 0;
2850
2851 case CONST:
2852 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2853 case PLUS:
2854 case MINUS:
2855 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2856 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2857
2858 case UNSPEC:
2859 switch (XINT (x, 1))
2860 {
2861 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2862 case UNSPEC_LTREL_OFFSET:
2863 case UNSPEC_GOT:
2864 case UNSPEC_GOTOFF:
2865 case UNSPEC_PLTOFF:
2866 case UNSPEC_TLSGD:
2867 case UNSPEC_TLSLDM:
2868 case UNSPEC_NTPOFF:
2869 case UNSPEC_DTPOFF:
2870 case UNSPEC_GOTNTPOFF:
2871 case UNSPEC_INDNTPOFF:
2872 return false;
2873
2874 /* If the literal pool shares the code section, be put
2875 execute template placeholders into the pool as well. */
2876 case UNSPEC_INSN:
2877 return TARGET_CPU_ZARCH;
2878
2879 default:
2880 return true;
2881 }
2882 break;
2883
2884 default:
2885 gcc_unreachable ();
2886 }
2887 }
2888
2889 /* Returns true if the constant value OP is a legitimate general
2890 operand during and after reload. The difference to
2891 legitimate_constant_p is that this function will not accept
2892 a constant that would need to be forced to the literal pool
2893 before it can be used as operand.
2894 This function accepts all constants which can be loaded directly
2895 into a GPR. */
2896
2897 bool
2898 legitimate_reload_constant_p (rtx op)
2899 {
2900 /* Accept la(y) operands. */
2901 if (GET_CODE (op) == CONST_INT
2902 && DISP_IN_RANGE (INTVAL (op)))
2903 return true;
2904
2905 /* Accept l(g)hi/l(g)fi operands. */
2906 if (GET_CODE (op) == CONST_INT
2907 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2908 return true;
2909
2910 /* Accept lliXX operands. */
2911 if (TARGET_ZARCH
2912 && GET_CODE (op) == CONST_INT
2913 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2914 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2915 return true;
2916
2917 if (TARGET_EXTIMM
2918 && GET_CODE (op) == CONST_INT
2919 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2920 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2921 return true;
2922
2923 /* Accept larl operands. */
2924 if (TARGET_CPU_ZARCH
2925 && larl_operand (op, VOIDmode))
2926 return true;
2927
2928 /* Accept floating-point zero operands that fit into a single GPR. */
2929 if (GET_CODE (op) == CONST_DOUBLE
2930 && s390_float_const_zero_p (op)
2931 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2932 return true;
2933
2934 /* Accept double-word operands that can be split. */
2935 if (GET_CODE (op) == CONST_INT
2936 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2937 {
2938 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2939 rtx hi = operand_subword (op, 0, 0, dword_mode);
2940 rtx lo = operand_subword (op, 1, 0, dword_mode);
2941 return legitimate_reload_constant_p (hi)
2942 && legitimate_reload_constant_p (lo);
2943 }
2944
2945 /* Everything else cannot be handled without reload. */
2946 return false;
2947 }
2948
2949 /* Returns true if the constant value OP is a legitimate fp operand
2950 during and after reload.
2951 This function accepts all constants which can be loaded directly
2952 into an FPR. */
2953
2954 static bool
2955 legitimate_reload_fp_constant_p (rtx op)
2956 {
2957 /* Accept floating-point zero operands if the load zero instruction
2958 can be used. Prior to z196 the load fp zero instruction caused a
2959 performance penalty if the result is used as BFP number. */
2960 if (TARGET_Z196
2961 && GET_CODE (op) == CONST_DOUBLE
2962 && s390_float_const_zero_p (op))
2963 return true;
2964
2965 return false;
2966 }
2967
2968 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
2969 return the class of reg to actually use. */
2970
2971 static reg_class_t
2972 s390_preferred_reload_class (rtx op, reg_class_t rclass)
2973 {
2974 switch (GET_CODE (op))
2975 {
2976 /* Constants we cannot reload into general registers
2977 must be forced into the literal pool. */
2978 case CONST_DOUBLE:
2979 case CONST_INT:
2980 if (reg_class_subset_p (GENERAL_REGS, rclass)
2981 && legitimate_reload_constant_p (op))
2982 return GENERAL_REGS;
2983 else if (reg_class_subset_p (ADDR_REGS, rclass)
2984 && legitimate_reload_constant_p (op))
2985 return ADDR_REGS;
2986 else if (reg_class_subset_p (FP_REGS, rclass)
2987 && legitimate_reload_fp_constant_p (op))
2988 return FP_REGS;
2989 return NO_REGS;
2990
2991 /* If a symbolic constant or a PLUS is reloaded,
2992 it is most likely being used as an address, so
2993 prefer ADDR_REGS. If 'class' is not a superset
2994 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
2995 case LABEL_REF:
2996 case SYMBOL_REF:
2997 case CONST:
2998 if (!legitimate_reload_constant_p (op))
2999 return NO_REGS;
3000 /* fallthrough */
3001 case PLUS:
3002 /* load address will be used. */
3003 if (reg_class_subset_p (ADDR_REGS, rclass))
3004 return ADDR_REGS;
3005 else
3006 return NO_REGS;
3007
3008 default:
3009 break;
3010 }
3011
3012 return rclass;
3013 }
3014
3015 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3016 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3017 aligned. */
3018
3019 bool
3020 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3021 {
3022 HOST_WIDE_INT addend;
3023 rtx symref;
3024
3025 /* Accept symbol@GOTENT with pointer size alignment. */
3026 if (GET_CODE (addr) == CONST
3027 && GET_CODE (XEXP (addr, 0)) == UNSPEC
3028 && XINT (XEXP (addr, 0), 1) == UNSPEC_GOTENT
3029 && alignment <= UNITS_PER_LONG)
3030 return true;
3031
3032 if (!s390_symref_operand_p (addr, &symref, &addend))
3033 return false;
3034
3035 return (!SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref)
3036 && !(addend & (alignment - 1)));
3037 }
3038
3039 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3040 operand SCRATCH is used to reload the even part of the address and
3041 adding one. */
3042
3043 void
3044 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3045 {
3046 HOST_WIDE_INT addend;
3047 rtx symref;
3048
3049 if (!s390_symref_operand_p (addr, &symref, &addend))
3050 gcc_unreachable ();
3051
3052 if (!(addend & 1))
3053 /* Easy case. The addend is even so larl will do fine. */
3054 emit_move_insn (reg, addr);
3055 else
3056 {
3057 /* We can leave the scratch register untouched if the target
3058 register is a valid base register. */
3059 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3060 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3061 scratch = reg;
3062
3063 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3064 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3065
3066 if (addend != 1)
3067 emit_move_insn (scratch,
3068 gen_rtx_CONST (Pmode,
3069 gen_rtx_PLUS (Pmode, symref,
3070 GEN_INT (addend - 1))));
3071 else
3072 emit_move_insn (scratch, symref);
3073
3074 /* Increment the address using la in order to avoid clobbering cc. */
3075 emit_move_insn (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3076 }
3077 }
3078
3079 /* Generate what is necessary to move between REG and MEM using
3080 SCRATCH. The direction is given by TOMEM. */
3081
3082 void
3083 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3084 {
3085 /* Reload might have pulled a constant out of the literal pool.
3086 Force it back in. */
3087 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3088 || GET_CODE (mem) == CONST)
3089 mem = force_const_mem (GET_MODE (reg), mem);
3090
3091 gcc_assert (MEM_P (mem));
3092
3093 /* For a load from memory we can leave the scratch register
3094 untouched if the target register is a valid base register. */
3095 if (!tomem
3096 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3097 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3098 && GET_MODE (reg) == GET_MODE (scratch))
3099 scratch = reg;
3100
3101 /* Load address into scratch register. Since we can't have a
3102 secondary reload for a secondary reload we have to cover the case
3103 where larl would need a secondary reload here as well. */
3104 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3105
3106 /* Now we can use a standard load/store to do the move. */
3107 if (tomem)
3108 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3109 else
3110 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3111 }
3112
3113 /* Inform reload about cases where moving X with a mode MODE to a register in
3114 RCLASS requires an extra scratch or immediate register. Return the class
3115 needed for the immediate register. */
3116
3117 static reg_class_t
3118 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3119 enum machine_mode mode, secondary_reload_info *sri)
3120 {
3121 enum reg_class rclass = (enum reg_class) rclass_i;
3122
3123 /* Intermediate register needed. */
3124 if (reg_classes_intersect_p (CC_REGS, rclass))
3125 return GENERAL_REGS;
3126
3127 if (TARGET_Z10)
3128 {
3129 HOST_WIDE_INT offset;
3130 rtx symref;
3131
3132 /* On z10 several optimizer steps may generate larl operands with
3133 an odd addend. */
3134 if (in_p
3135 && s390_symref_operand_p (x, &symref, &offset)
3136 && mode == Pmode
3137 && !SYMBOL_REF_ALIGN1_P (symref)
3138 && (offset & 1) == 1)
3139 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3140 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3141
3142 /* On z10 we need a scratch register when moving QI, TI or floating
3143 point mode values from or to a memory location with a SYMBOL_REF
3144 or if the symref addend of a SI or DI move is not aligned to the
3145 width of the access. */
3146 if (MEM_P (x)
3147 && s390_symref_operand_p (XEXP (x, 0), NULL, NULL)
3148 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3149 || (!TARGET_ZARCH && mode == DImode)
3150 || ((mode == HImode || mode == SImode || mode == DImode)
3151 && (!s390_check_symref_alignment (XEXP (x, 0),
3152 GET_MODE_SIZE (mode))))))
3153 {
3154 #define __SECONDARY_RELOAD_CASE(M,m) \
3155 case M##mode: \
3156 if (TARGET_64BIT) \
3157 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3158 CODE_FOR_reload##m##di_tomem_z10; \
3159 else \
3160 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3161 CODE_FOR_reload##m##si_tomem_z10; \
3162 break;
3163
3164 switch (GET_MODE (x))
3165 {
3166 __SECONDARY_RELOAD_CASE (QI, qi);
3167 __SECONDARY_RELOAD_CASE (HI, hi);
3168 __SECONDARY_RELOAD_CASE (SI, si);
3169 __SECONDARY_RELOAD_CASE (DI, di);
3170 __SECONDARY_RELOAD_CASE (TI, ti);
3171 __SECONDARY_RELOAD_CASE (SF, sf);
3172 __SECONDARY_RELOAD_CASE (DF, df);
3173 __SECONDARY_RELOAD_CASE (TF, tf);
3174 __SECONDARY_RELOAD_CASE (SD, sd);
3175 __SECONDARY_RELOAD_CASE (DD, dd);
3176 __SECONDARY_RELOAD_CASE (TD, td);
3177
3178 default:
3179 gcc_unreachable ();
3180 }
3181 #undef __SECONDARY_RELOAD_CASE
3182 }
3183 }
3184
3185 /* We need a scratch register when loading a PLUS expression which
3186 is not a legitimate operand of the LOAD ADDRESS instruction. */
3187 if (in_p && s390_plus_operand (x, mode))
3188 sri->icode = (TARGET_64BIT ?
3189 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3190
3191 /* Performing a multiword move from or to memory we have to make sure the
3192 second chunk in memory is addressable without causing a displacement
3193 overflow. If that would be the case we calculate the address in
3194 a scratch register. */
3195 if (MEM_P (x)
3196 && GET_CODE (XEXP (x, 0)) == PLUS
3197 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3198 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3199 + GET_MODE_SIZE (mode) - 1))
3200 {
3201 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3202 in a s_operand address since we may fallback to lm/stm. So we only
3203 have to care about overflows in the b+i+d case. */
3204 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3205 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3206 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3207 /* For FP_REGS no lm/stm is available so this check is triggered
3208 for displacement overflows in b+i+d and b+d like addresses. */
3209 || (reg_classes_intersect_p (FP_REGS, rclass)
3210 && s390_class_max_nregs (FP_REGS, mode) > 1))
3211 {
3212 if (in_p)
3213 sri->icode = (TARGET_64BIT ?
3214 CODE_FOR_reloaddi_nonoffmem_in :
3215 CODE_FOR_reloadsi_nonoffmem_in);
3216 else
3217 sri->icode = (TARGET_64BIT ?
3218 CODE_FOR_reloaddi_nonoffmem_out :
3219 CODE_FOR_reloadsi_nonoffmem_out);
3220 }
3221 }
3222
3223 /* A scratch address register is needed when a symbolic constant is
3224 copied to r0 compiling with -fPIC. In other cases the target
3225 register might be used as temporary (see legitimize_pic_address). */
3226 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3227 sri->icode = (TARGET_64BIT ?
3228 CODE_FOR_reloaddi_PIC_addr :
3229 CODE_FOR_reloadsi_PIC_addr);
3230
3231 /* Either scratch or no register needed. */
3232 return NO_REGS;
3233 }
3234
3235 /* Generate code to load SRC, which is PLUS that is not a
3236 legitimate operand for the LA instruction, into TARGET.
3237 SCRATCH may be used as scratch register. */
3238
3239 void
3240 s390_expand_plus_operand (rtx target, rtx src,
3241 rtx scratch)
3242 {
3243 rtx sum1, sum2;
3244 struct s390_address ad;
3245
3246 /* src must be a PLUS; get its two operands. */
3247 gcc_assert (GET_CODE (src) == PLUS);
3248 gcc_assert (GET_MODE (src) == Pmode);
3249
3250 /* Check if any of the two operands is already scheduled
3251 for replacement by reload. This can happen e.g. when
3252 float registers occur in an address. */
3253 sum1 = find_replacement (&XEXP (src, 0));
3254 sum2 = find_replacement (&XEXP (src, 1));
3255 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3256
3257 /* If the address is already strictly valid, there's nothing to do. */
3258 if (!s390_decompose_address (src, &ad)
3259 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3260 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3261 {
3262 /* Otherwise, one of the operands cannot be an address register;
3263 we reload its value into the scratch register. */
3264 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3265 {
3266 emit_move_insn (scratch, sum1);
3267 sum1 = scratch;
3268 }
3269 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3270 {
3271 emit_move_insn (scratch, sum2);
3272 sum2 = scratch;
3273 }
3274
3275 /* According to the way these invalid addresses are generated
3276 in reload.c, it should never happen (at least on s390) that
3277 *neither* of the PLUS components, after find_replacements
3278 was applied, is an address register. */
3279 if (sum1 == scratch && sum2 == scratch)
3280 {
3281 debug_rtx (src);
3282 gcc_unreachable ();
3283 }
3284
3285 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3286 }
3287
3288 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3289 is only ever performed on addresses, so we can mark the
3290 sum as legitimate for LA in any case. */
3291 s390_load_address (target, src);
3292 }
3293
3294
3295 /* Return true if ADDR is a valid memory address.
3296 STRICT specifies whether strict register checking applies. */
3297
3298 static bool
3299 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3300 {
3301 struct s390_address ad;
3302
3303 if (TARGET_Z10
3304 && larl_operand (addr, VOIDmode)
3305 && (mode == VOIDmode
3306 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3307 return true;
3308
3309 if (!s390_decompose_address (addr, &ad))
3310 return false;
3311
3312 if (strict)
3313 {
3314 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3315 return false;
3316
3317 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3318 return false;
3319 }
3320 else
3321 {
3322 if (ad.base
3323 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3324 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3325 return false;
3326
3327 if (ad.indx
3328 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3329 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3330 return false;
3331 }
3332 return true;
3333 }
3334
3335 /* Return true if OP is a valid operand for the LA instruction.
3336 In 31-bit, we need to prove that the result is used as an
3337 address, as LA performs only a 31-bit addition. */
3338
3339 bool
3340 legitimate_la_operand_p (rtx op)
3341 {
3342 struct s390_address addr;
3343 if (!s390_decompose_address (op, &addr))
3344 return false;
3345
3346 return (TARGET_64BIT || addr.pointer);
3347 }
3348
3349 /* Return true if it is valid *and* preferable to use LA to
3350 compute the sum of OP1 and OP2. */
3351
3352 bool
3353 preferred_la_operand_p (rtx op1, rtx op2)
3354 {
3355 struct s390_address addr;
3356
3357 if (op2 != const0_rtx)
3358 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3359
3360 if (!s390_decompose_address (op1, &addr))
3361 return false;
3362 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3363 return false;
3364 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3365 return false;
3366
3367 /* Avoid LA instructions with index register on z196; it is
3368 preferable to use regular add instructions when possible.
3369 Starting with zEC12 the la with index register is "uncracked"
3370 again. */
3371 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3372 return false;
3373
3374 if (!TARGET_64BIT && !addr.pointer)
3375 return false;
3376
3377 if (addr.pointer)
3378 return true;
3379
3380 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3381 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3382 return true;
3383
3384 return false;
3385 }
3386
3387 /* Emit a forced load-address operation to load SRC into DST.
3388 This will use the LOAD ADDRESS instruction even in situations
3389 where legitimate_la_operand_p (SRC) returns false. */
3390
3391 void
3392 s390_load_address (rtx dst, rtx src)
3393 {
3394 if (TARGET_64BIT)
3395 emit_move_insn (dst, src);
3396 else
3397 emit_insn (gen_force_la_31 (dst, src));
3398 }
3399
3400 /* Return a legitimate reference for ORIG (an address) using the
3401 register REG. If REG is 0, a new pseudo is generated.
3402
3403 There are two types of references that must be handled:
3404
3405 1. Global data references must load the address from the GOT, via
3406 the PIC reg. An insn is emitted to do this load, and the reg is
3407 returned.
3408
3409 2. Static data references, constant pool addresses, and code labels
3410 compute the address as an offset from the GOT, whose base is in
3411 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3412 differentiate them from global data objects. The returned
3413 address is the PIC reg + an unspec constant.
3414
3415 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3416 reg also appears in the address. */
3417
3418 rtx
3419 legitimize_pic_address (rtx orig, rtx reg)
3420 {
3421 rtx addr = orig;
3422 rtx new_rtx = orig;
3423 rtx base;
3424
3425 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3426
3427 if (GET_CODE (addr) == LABEL_REF
3428 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr)))
3429 {
3430 /* This is a local symbol. */
3431 if (TARGET_CPU_ZARCH && larl_operand (addr, VOIDmode))
3432 {
3433 /* Access local symbols PC-relative via LARL.
3434 This is the same as in the non-PIC case, so it is
3435 handled automatically ... */
3436 }
3437 else
3438 {
3439 /* Access local symbols relative to the GOT. */
3440
3441 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3442
3443 if (reload_in_progress || reload_completed)
3444 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3445
3446 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3447 addr = gen_rtx_CONST (Pmode, addr);
3448 addr = force_const_mem (Pmode, addr);
3449 emit_move_insn (temp, addr);
3450
3451 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3452 if (reg != 0)
3453 {
3454 s390_load_address (reg, new_rtx);
3455 new_rtx = reg;
3456 }
3457 }
3458 }
3459 else if (GET_CODE (addr) == SYMBOL_REF)
3460 {
3461 if (reg == 0)
3462 reg = gen_reg_rtx (Pmode);
3463
3464 if (flag_pic == 1)
3465 {
3466 /* Assume GOT offset < 4k. This is handled the same way
3467 in both 31- and 64-bit code (@GOT). */
3468
3469 if (reload_in_progress || reload_completed)
3470 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3471
3472 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3473 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3474 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3475 new_rtx = gen_const_mem (Pmode, new_rtx);
3476 emit_move_insn (reg, new_rtx);
3477 new_rtx = reg;
3478 }
3479 else if (TARGET_CPU_ZARCH)
3480 {
3481 /* If the GOT offset might be >= 4k, we determine the position
3482 of the GOT entry via a PC-relative LARL (@GOTENT). */
3483
3484 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3485
3486 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3487 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3488
3489 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3490 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3491
3492 if (!TARGET_Z10)
3493 {
3494 emit_move_insn (temp, new_rtx);
3495 new_rtx = gen_const_mem (Pmode, temp);
3496 }
3497 else
3498 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3499 emit_move_insn (reg, new_rtx);
3500 new_rtx = reg;
3501 }
3502 else
3503 {
3504 /* If the GOT offset might be >= 4k, we have to load it
3505 from the literal pool (@GOT). */
3506
3507 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3508
3509 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3510 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3511
3512 if (reload_in_progress || reload_completed)
3513 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3514
3515 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3516 addr = gen_rtx_CONST (Pmode, addr);
3517 addr = force_const_mem (Pmode, addr);
3518 emit_move_insn (temp, addr);
3519
3520 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3521 new_rtx = gen_const_mem (Pmode, new_rtx);
3522 emit_move_insn (reg, new_rtx);
3523 new_rtx = reg;
3524 }
3525 }
3526 else
3527 {
3528 if (GET_CODE (addr) == CONST)
3529 {
3530 addr = XEXP (addr, 0);
3531 if (GET_CODE (addr) == UNSPEC)
3532 {
3533 gcc_assert (XVECLEN (addr, 0) == 1);
3534 switch (XINT (addr, 1))
3535 {
3536 /* If someone moved a GOT-relative UNSPEC
3537 out of the literal pool, force them back in. */
3538 case UNSPEC_GOTOFF:
3539 case UNSPEC_PLTOFF:
3540 new_rtx = force_const_mem (Pmode, orig);
3541 break;
3542
3543 /* @GOT is OK as is if small. */
3544 case UNSPEC_GOT:
3545 if (flag_pic == 2)
3546 new_rtx = force_const_mem (Pmode, orig);
3547 break;
3548
3549 /* @GOTENT is OK as is. */
3550 case UNSPEC_GOTENT:
3551 break;
3552
3553 /* @PLT is OK as is on 64-bit, must be converted to
3554 GOT-relative @PLTOFF on 31-bit. */
3555 case UNSPEC_PLT:
3556 if (!TARGET_CPU_ZARCH)
3557 {
3558 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3559
3560 if (reload_in_progress || reload_completed)
3561 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3562
3563 addr = XVECEXP (addr, 0, 0);
3564 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3565 UNSPEC_PLTOFF);
3566 addr = gen_rtx_CONST (Pmode, addr);
3567 addr = force_const_mem (Pmode, addr);
3568 emit_move_insn (temp, addr);
3569
3570 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3571 if (reg != 0)
3572 {
3573 s390_load_address (reg, new_rtx);
3574 new_rtx = reg;
3575 }
3576 }
3577 break;
3578
3579 /* Everything else cannot happen. */
3580 default:
3581 gcc_unreachable ();
3582 }
3583 }
3584 else
3585 gcc_assert (GET_CODE (addr) == PLUS);
3586 }
3587 if (GET_CODE (addr) == PLUS)
3588 {
3589 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
3590
3591 gcc_assert (!TLS_SYMBOLIC_CONST (op0));
3592 gcc_assert (!TLS_SYMBOLIC_CONST (op1));
3593
3594 /* Check first to see if this is a constant offset
3595 from a local symbol reference. */
3596 if ((GET_CODE (op0) == LABEL_REF
3597 || (GET_CODE (op0) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (op0)))
3598 && GET_CODE (op1) == CONST_INT)
3599 {
3600 if (TARGET_CPU_ZARCH
3601 && larl_operand (op0, VOIDmode)
3602 && INTVAL (op1) < (HOST_WIDE_INT)1 << 31
3603 && INTVAL (op1) >= -((HOST_WIDE_INT)1 << 31))
3604 {
3605 if (INTVAL (op1) & 1)
3606 {
3607 /* LARL can't handle odd offsets, so emit a
3608 pair of LARL and LA. */
3609 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3610
3611 if (!DISP_IN_RANGE (INTVAL (op1)))
3612 {
3613 HOST_WIDE_INT even = INTVAL (op1) - 1;
3614 op0 = gen_rtx_PLUS (Pmode, op0, GEN_INT (even));
3615 op0 = gen_rtx_CONST (Pmode, op0);
3616 op1 = const1_rtx;
3617 }
3618
3619 emit_move_insn (temp, op0);
3620 new_rtx = gen_rtx_PLUS (Pmode, temp, op1);
3621
3622 if (reg != 0)
3623 {
3624 s390_load_address (reg, new_rtx);
3625 new_rtx = reg;
3626 }
3627 }
3628 else
3629 {
3630 /* If the offset is even, we can just use LARL.
3631 This will happen automatically. */
3632 }
3633 }
3634 else
3635 {
3636 /* Access local symbols relative to the GOT. */
3637
3638 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3639
3640 if (reload_in_progress || reload_completed)
3641 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3642
3643 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
3644 UNSPEC_GOTOFF);
3645 addr = gen_rtx_PLUS (Pmode, addr, op1);
3646 addr = gen_rtx_CONST (Pmode, addr);
3647 addr = force_const_mem (Pmode, addr);
3648 emit_move_insn (temp, addr);
3649
3650 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3651 if (reg != 0)
3652 {
3653 s390_load_address (reg, new_rtx);
3654 new_rtx = reg;
3655 }
3656 }
3657 }
3658
3659 /* Now, check whether it is a GOT relative symbol plus offset
3660 that was pulled out of the literal pool. Force it back in. */
3661
3662 else if (GET_CODE (op0) == UNSPEC
3663 && GET_CODE (op1) == CONST_INT
3664 && XINT (op0, 1) == UNSPEC_GOTOFF)
3665 {
3666 gcc_assert (XVECLEN (op0, 0) == 1);
3667
3668 new_rtx = force_const_mem (Pmode, orig);
3669 }
3670
3671 /* Otherwise, compute the sum. */
3672 else
3673 {
3674 base = legitimize_pic_address (XEXP (addr, 0), reg);
3675 new_rtx = legitimize_pic_address (XEXP (addr, 1),
3676 base == reg ? NULL_RTX : reg);
3677 if (GET_CODE (new_rtx) == CONST_INT)
3678 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3679 else
3680 {
3681 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3682 {
3683 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3684 new_rtx = XEXP (new_rtx, 1);
3685 }
3686 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3687 }
3688
3689 if (GET_CODE (new_rtx) == CONST)
3690 new_rtx = XEXP (new_rtx, 0);
3691 new_rtx = force_operand (new_rtx, 0);
3692 }
3693 }
3694 }
3695 return new_rtx;
3696 }
3697
3698 /* Load the thread pointer into a register. */
3699
3700 rtx
3701 s390_get_thread_pointer (void)
3702 {
3703 rtx tp = gen_reg_rtx (Pmode);
3704
3705 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3706 mark_reg_pointer (tp, BITS_PER_WORD);
3707
3708 return tp;
3709 }
3710
3711 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3712 in s390_tls_symbol which always refers to __tls_get_offset.
3713 The returned offset is written to RESULT_REG and an USE rtx is
3714 generated for TLS_CALL. */
3715
3716 static GTY(()) rtx s390_tls_symbol;
3717
3718 static void
3719 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3720 {
3721 rtx insn;
3722
3723 if (!flag_pic)
3724 emit_insn (s390_load_got ());
3725
3726 if (!s390_tls_symbol)
3727 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3728
3729 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3730 gen_rtx_REG (Pmode, RETURN_REGNUM));
3731
3732 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3733 RTL_CONST_CALL_P (insn) = 1;
3734 }
3735
3736 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3737 this (thread-local) address. REG may be used as temporary. */
3738
3739 static rtx
3740 legitimize_tls_address (rtx addr, rtx reg)
3741 {
3742 rtx new_rtx, tls_call, temp, base, r2, insn;
3743
3744 if (GET_CODE (addr) == SYMBOL_REF)
3745 switch (tls_symbolic_operand (addr))
3746 {
3747 case TLS_MODEL_GLOBAL_DYNAMIC:
3748 start_sequence ();
3749 r2 = gen_rtx_REG (Pmode, 2);
3750 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3751 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3752 new_rtx = force_const_mem (Pmode, new_rtx);
3753 emit_move_insn (r2, new_rtx);
3754 s390_emit_tls_call_insn (r2, tls_call);
3755 insn = get_insns ();
3756 end_sequence ();
3757
3758 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3759 temp = gen_reg_rtx (Pmode);
3760 emit_libcall_block (insn, temp, r2, new_rtx);
3761
3762 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3763 if (reg != 0)
3764 {
3765 s390_load_address (reg, new_rtx);
3766 new_rtx = reg;
3767 }
3768 break;
3769
3770 case TLS_MODEL_LOCAL_DYNAMIC:
3771 start_sequence ();
3772 r2 = gen_rtx_REG (Pmode, 2);
3773 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3774 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3775 new_rtx = force_const_mem (Pmode, new_rtx);
3776 emit_move_insn (r2, new_rtx);
3777 s390_emit_tls_call_insn (r2, tls_call);
3778 insn = get_insns ();
3779 end_sequence ();
3780
3781 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3782 temp = gen_reg_rtx (Pmode);
3783 emit_libcall_block (insn, temp, r2, new_rtx);
3784
3785 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3786 base = gen_reg_rtx (Pmode);
3787 s390_load_address (base, new_rtx);
3788
3789 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3790 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3791 new_rtx = force_const_mem (Pmode, new_rtx);
3792 temp = gen_reg_rtx (Pmode);
3793 emit_move_insn (temp, new_rtx);
3794
3795 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3796 if (reg != 0)
3797 {
3798 s390_load_address (reg, new_rtx);
3799 new_rtx = reg;
3800 }
3801 break;
3802
3803 case TLS_MODEL_INITIAL_EXEC:
3804 if (flag_pic == 1)
3805 {
3806 /* Assume GOT offset < 4k. This is handled the same way
3807 in both 31- and 64-bit code. */
3808
3809 if (reload_in_progress || reload_completed)
3810 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3811
3812 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3813 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3814 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3815 new_rtx = gen_const_mem (Pmode, new_rtx);
3816 temp = gen_reg_rtx (Pmode);
3817 emit_move_insn (temp, new_rtx);
3818 }
3819 else if (TARGET_CPU_ZARCH)
3820 {
3821 /* If the GOT offset might be >= 4k, we determine the position
3822 of the GOT entry via a PC-relative LARL. */
3823
3824 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3825 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3826 temp = gen_reg_rtx (Pmode);
3827 emit_move_insn (temp, new_rtx);
3828
3829 new_rtx = gen_const_mem (Pmode, temp);
3830 temp = gen_reg_rtx (Pmode);
3831 emit_move_insn (temp, new_rtx);
3832 }
3833 else if (flag_pic)
3834 {
3835 /* If the GOT offset might be >= 4k, we have to load it
3836 from the literal pool. */
3837
3838 if (reload_in_progress || reload_completed)
3839 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3840
3841 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3842 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3843 new_rtx = force_const_mem (Pmode, new_rtx);
3844 temp = gen_reg_rtx (Pmode);
3845 emit_move_insn (temp, new_rtx);
3846
3847 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3848 new_rtx = gen_const_mem (Pmode, new_rtx);
3849
3850 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3851 temp = gen_reg_rtx (Pmode);
3852 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3853 }
3854 else
3855 {
3856 /* In position-dependent code, load the absolute address of
3857 the GOT entry from the literal pool. */
3858
3859 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3860 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3861 new_rtx = force_const_mem (Pmode, new_rtx);
3862 temp = gen_reg_rtx (Pmode);
3863 emit_move_insn (temp, new_rtx);
3864
3865 new_rtx = temp;
3866 new_rtx = gen_const_mem (Pmode, new_rtx);
3867 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3868 temp = gen_reg_rtx (Pmode);
3869 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3870 }
3871
3872 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3873 if (reg != 0)
3874 {
3875 s390_load_address (reg, new_rtx);
3876 new_rtx = reg;
3877 }
3878 break;
3879
3880 case TLS_MODEL_LOCAL_EXEC:
3881 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3882 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3883 new_rtx = force_const_mem (Pmode, new_rtx);
3884 temp = gen_reg_rtx (Pmode);
3885 emit_move_insn (temp, new_rtx);
3886
3887 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3888 if (reg != 0)
3889 {
3890 s390_load_address (reg, new_rtx);
3891 new_rtx = reg;
3892 }
3893 break;
3894
3895 default:
3896 gcc_unreachable ();
3897 }
3898
3899 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3900 {
3901 switch (XINT (XEXP (addr, 0), 1))
3902 {
3903 case UNSPEC_INDNTPOFF:
3904 gcc_assert (TARGET_CPU_ZARCH);
3905 new_rtx = addr;
3906 break;
3907
3908 default:
3909 gcc_unreachable ();
3910 }
3911 }
3912
3913 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3914 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3915 {
3916 new_rtx = XEXP (XEXP (addr, 0), 0);
3917 if (GET_CODE (new_rtx) != SYMBOL_REF)
3918 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3919
3920 new_rtx = legitimize_tls_address (new_rtx, reg);
3921 new_rtx = plus_constant (Pmode, new_rtx,
3922 INTVAL (XEXP (XEXP (addr, 0), 1)));
3923 new_rtx = force_operand (new_rtx, 0);
3924 }
3925
3926 else
3927 gcc_unreachable (); /* for now ... */
3928
3929 return new_rtx;
3930 }
3931
3932 /* Emit insns making the address in operands[1] valid for a standard
3933 move to operands[0]. operands[1] is replaced by an address which
3934 should be used instead of the former RTX to emit the move
3935 pattern. */
3936
3937 void
3938 emit_symbolic_move (rtx *operands)
3939 {
3940 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3941
3942 if (GET_CODE (operands[0]) == MEM)
3943 operands[1] = force_reg (Pmode, operands[1]);
3944 else if (TLS_SYMBOLIC_CONST (operands[1]))
3945 operands[1] = legitimize_tls_address (operands[1], temp);
3946 else if (flag_pic)
3947 operands[1] = legitimize_pic_address (operands[1], temp);
3948 }
3949
3950 /* Try machine-dependent ways of modifying an illegitimate address X
3951 to be legitimate. If we find one, return the new, valid address.
3952
3953 OLDX is the address as it was before break_out_memory_refs was called.
3954 In some cases it is useful to look at this to decide what needs to be done.
3955
3956 MODE is the mode of the operand pointed to by X.
3957
3958 When -fpic is used, special handling is needed for symbolic references.
3959 See comments by legitimize_pic_address for details. */
3960
3961 static rtx
3962 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3963 enum machine_mode mode ATTRIBUTE_UNUSED)
3964 {
3965 rtx constant_term = const0_rtx;
3966
3967 if (TLS_SYMBOLIC_CONST (x))
3968 {
3969 x = legitimize_tls_address (x, 0);
3970
3971 if (s390_legitimate_address_p (mode, x, FALSE))
3972 return x;
3973 }
3974 else if (GET_CODE (x) == PLUS
3975 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
3976 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
3977 {
3978 return x;
3979 }
3980 else if (flag_pic)
3981 {
3982 if (SYMBOLIC_CONST (x)
3983 || (GET_CODE (x) == PLUS
3984 && (SYMBOLIC_CONST (XEXP (x, 0))
3985 || SYMBOLIC_CONST (XEXP (x, 1)))))
3986 x = legitimize_pic_address (x, 0);
3987
3988 if (s390_legitimate_address_p (mode, x, FALSE))
3989 return x;
3990 }
3991
3992 x = eliminate_constant_term (x, &constant_term);
3993
3994 /* Optimize loading of large displacements by splitting them
3995 into the multiple of 4K and the rest; this allows the
3996 former to be CSE'd if possible.
3997
3998 Don't do this if the displacement is added to a register
3999 pointing into the stack frame, as the offsets will
4000 change later anyway. */
4001
4002 if (GET_CODE (constant_term) == CONST_INT
4003 && !TARGET_LONG_DISPLACEMENT
4004 && !DISP_IN_RANGE (INTVAL (constant_term))
4005 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4006 {
4007 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4008 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4009
4010 rtx temp = gen_reg_rtx (Pmode);
4011 rtx val = force_operand (GEN_INT (upper), temp);
4012 if (val != temp)
4013 emit_move_insn (temp, val);
4014
4015 x = gen_rtx_PLUS (Pmode, x, temp);
4016 constant_term = GEN_INT (lower);
4017 }
4018
4019 if (GET_CODE (x) == PLUS)
4020 {
4021 if (GET_CODE (XEXP (x, 0)) == REG)
4022 {
4023 rtx temp = gen_reg_rtx (Pmode);
4024 rtx val = force_operand (XEXP (x, 1), temp);
4025 if (val != temp)
4026 emit_move_insn (temp, val);
4027
4028 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4029 }
4030
4031 else if (GET_CODE (XEXP (x, 1)) == REG)
4032 {
4033 rtx temp = gen_reg_rtx (Pmode);
4034 rtx val = force_operand (XEXP (x, 0), temp);
4035 if (val != temp)
4036 emit_move_insn (temp, val);
4037
4038 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4039 }
4040 }
4041
4042 if (constant_term != const0_rtx)
4043 x = gen_rtx_PLUS (Pmode, x, constant_term);
4044
4045 return x;
4046 }
4047
4048 /* Try a machine-dependent way of reloading an illegitimate address AD
4049 operand. If we find one, push the reload and return the new address.
4050
4051 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4052 and TYPE is the reload type of the current reload. */
4053
4054 rtx
4055 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4056 int opnum, int type)
4057 {
4058 if (!optimize || TARGET_LONG_DISPLACEMENT)
4059 return NULL_RTX;
4060
4061 if (GET_CODE (ad) == PLUS)
4062 {
4063 rtx tem = simplify_binary_operation (PLUS, Pmode,
4064 XEXP (ad, 0), XEXP (ad, 1));
4065 if (tem)
4066 ad = tem;
4067 }
4068
4069 if (GET_CODE (ad) == PLUS
4070 && GET_CODE (XEXP (ad, 0)) == REG
4071 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4072 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4073 {
4074 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4075 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4076 rtx cst, tem, new_rtx;
4077
4078 cst = GEN_INT (upper);
4079 if (!legitimate_reload_constant_p (cst))
4080 cst = force_const_mem (Pmode, cst);
4081
4082 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4083 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4084
4085 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4086 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4087 opnum, (enum reload_type) type);
4088 return new_rtx;
4089 }
4090
4091 return NULL_RTX;
4092 }
4093
4094 /* Emit code to move LEN bytes from DST to SRC. */
4095
4096 bool
4097 s390_expand_movmem (rtx dst, rtx src, rtx len)
4098 {
4099 /* When tuning for z10 or higher we rely on the Glibc functions to
4100 do the right thing. Only for constant lengths below 64k we will
4101 generate inline code. */
4102 if (s390_tune >= PROCESSOR_2097_Z10
4103 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4104 return false;
4105
4106 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4107 {
4108 if (INTVAL (len) > 0)
4109 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4110 }
4111
4112 else if (TARGET_MVCLE)
4113 {
4114 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4115 }
4116
4117 else
4118 {
4119 rtx dst_addr, src_addr, count, blocks, temp;
4120 rtx loop_start_label = gen_label_rtx ();
4121 rtx loop_end_label = gen_label_rtx ();
4122 rtx end_label = gen_label_rtx ();
4123 enum machine_mode mode;
4124
4125 mode = GET_MODE (len);
4126 if (mode == VOIDmode)
4127 mode = Pmode;
4128
4129 dst_addr = gen_reg_rtx (Pmode);
4130 src_addr = gen_reg_rtx (Pmode);
4131 count = gen_reg_rtx (mode);
4132 blocks = gen_reg_rtx (mode);
4133
4134 convert_move (count, len, 1);
4135 emit_cmp_and_jump_insns (count, const0_rtx,
4136 EQ, NULL_RTX, mode, 1, end_label);
4137
4138 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4139 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4140 dst = change_address (dst, VOIDmode, dst_addr);
4141 src = change_address (src, VOIDmode, src_addr);
4142
4143 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4144 OPTAB_DIRECT);
4145 if (temp != count)
4146 emit_move_insn (count, temp);
4147
4148 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4149 OPTAB_DIRECT);
4150 if (temp != blocks)
4151 emit_move_insn (blocks, temp);
4152
4153 emit_cmp_and_jump_insns (blocks, const0_rtx,
4154 EQ, NULL_RTX, mode, 1, loop_end_label);
4155
4156 emit_label (loop_start_label);
4157
4158 if (TARGET_Z10
4159 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4160 {
4161 rtx prefetch;
4162
4163 /* Issue a read prefetch for the +3 cache line. */
4164 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4165 const0_rtx, const0_rtx);
4166 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4167 emit_insn (prefetch);
4168
4169 /* Issue a write prefetch for the +3 cache line. */
4170 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4171 const1_rtx, const0_rtx);
4172 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4173 emit_insn (prefetch);
4174 }
4175
4176 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4177 s390_load_address (dst_addr,
4178 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4179 s390_load_address (src_addr,
4180 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4181
4182 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4183 OPTAB_DIRECT);
4184 if (temp != blocks)
4185 emit_move_insn (blocks, temp);
4186
4187 emit_cmp_and_jump_insns (blocks, const0_rtx,
4188 EQ, NULL_RTX, mode, 1, loop_end_label);
4189
4190 emit_jump (loop_start_label);
4191 emit_label (loop_end_label);
4192
4193 emit_insn (gen_movmem_short (dst, src,
4194 convert_to_mode (Pmode, count, 1)));
4195 emit_label (end_label);
4196 }
4197 return true;
4198 }
4199
4200 /* Emit code to set LEN bytes at DST to VAL.
4201 Make use of clrmem if VAL is zero. */
4202
4203 void
4204 s390_expand_setmem (rtx dst, rtx len, rtx val)
4205 {
4206 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4207 return;
4208
4209 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4210
4211 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4212 {
4213 if (val == const0_rtx && INTVAL (len) <= 256)
4214 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4215 else
4216 {
4217 /* Initialize memory by storing the first byte. */
4218 emit_move_insn (adjust_address (dst, QImode, 0), val);
4219
4220 if (INTVAL (len) > 1)
4221 {
4222 /* Initiate 1 byte overlap move.
4223 The first byte of DST is propagated through DSTP1.
4224 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4225 DST is set to size 1 so the rest of the memory location
4226 does not count as source operand. */
4227 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4228 set_mem_size (dst, 1);
4229
4230 emit_insn (gen_movmem_short (dstp1, dst,
4231 GEN_INT (INTVAL (len) - 2)));
4232 }
4233 }
4234 }
4235
4236 else if (TARGET_MVCLE)
4237 {
4238 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4239 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4240 }
4241
4242 else
4243 {
4244 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4245 rtx loop_start_label = gen_label_rtx ();
4246 rtx loop_end_label = gen_label_rtx ();
4247 rtx end_label = gen_label_rtx ();
4248 enum machine_mode mode;
4249
4250 mode = GET_MODE (len);
4251 if (mode == VOIDmode)
4252 mode = Pmode;
4253
4254 dst_addr = gen_reg_rtx (Pmode);
4255 count = gen_reg_rtx (mode);
4256 blocks = gen_reg_rtx (mode);
4257
4258 convert_move (count, len, 1);
4259 emit_cmp_and_jump_insns (count, const0_rtx,
4260 EQ, NULL_RTX, mode, 1, end_label);
4261
4262 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4263 dst = change_address (dst, VOIDmode, dst_addr);
4264
4265 if (val == const0_rtx)
4266 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4267 OPTAB_DIRECT);
4268 else
4269 {
4270 dstp1 = adjust_address (dst, VOIDmode, 1);
4271 set_mem_size (dst, 1);
4272
4273 /* Initialize memory by storing the first byte. */
4274 emit_move_insn (adjust_address (dst, QImode, 0), val);
4275
4276 /* If count is 1 we are done. */
4277 emit_cmp_and_jump_insns (count, const1_rtx,
4278 EQ, NULL_RTX, mode, 1, end_label);
4279
4280 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4281 OPTAB_DIRECT);
4282 }
4283 if (temp != count)
4284 emit_move_insn (count, temp);
4285
4286 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4287 OPTAB_DIRECT);
4288 if (temp != blocks)
4289 emit_move_insn (blocks, temp);
4290
4291 emit_cmp_and_jump_insns (blocks, const0_rtx,
4292 EQ, NULL_RTX, mode, 1, loop_end_label);
4293
4294 emit_label (loop_start_label);
4295
4296 if (TARGET_Z10
4297 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4298 {
4299 /* Issue a write prefetch for the +4 cache line. */
4300 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4301 GEN_INT (1024)),
4302 const1_rtx, const0_rtx);
4303 emit_insn (prefetch);
4304 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4305 }
4306
4307 if (val == const0_rtx)
4308 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4309 else
4310 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4311 s390_load_address (dst_addr,
4312 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4313
4314 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4315 OPTAB_DIRECT);
4316 if (temp != blocks)
4317 emit_move_insn (blocks, temp);
4318
4319 emit_cmp_and_jump_insns (blocks, const0_rtx,
4320 EQ, NULL_RTX, mode, 1, loop_end_label);
4321
4322 emit_jump (loop_start_label);
4323 emit_label (loop_end_label);
4324
4325 if (val == const0_rtx)
4326 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4327 else
4328 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4329 emit_label (end_label);
4330 }
4331 }
4332
4333 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4334 and return the result in TARGET. */
4335
4336 bool
4337 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4338 {
4339 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4340 rtx tmp;
4341
4342 /* When tuning for z10 or higher we rely on the Glibc functions to
4343 do the right thing. Only for constant lengths below 64k we will
4344 generate inline code. */
4345 if (s390_tune >= PROCESSOR_2097_Z10
4346 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4347 return false;
4348
4349 /* As the result of CMPINT is inverted compared to what we need,
4350 we have to swap the operands. */
4351 tmp = op0; op0 = op1; op1 = tmp;
4352
4353 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4354 {
4355 if (INTVAL (len) > 0)
4356 {
4357 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4358 emit_insn (gen_cmpint (target, ccreg));
4359 }
4360 else
4361 emit_move_insn (target, const0_rtx);
4362 }
4363 else if (TARGET_MVCLE)
4364 {
4365 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4366 emit_insn (gen_cmpint (target, ccreg));
4367 }
4368 else
4369 {
4370 rtx addr0, addr1, count, blocks, temp;
4371 rtx loop_start_label = gen_label_rtx ();
4372 rtx loop_end_label = gen_label_rtx ();
4373 rtx end_label = gen_label_rtx ();
4374 enum machine_mode mode;
4375
4376 mode = GET_MODE (len);
4377 if (mode == VOIDmode)
4378 mode = Pmode;
4379
4380 addr0 = gen_reg_rtx (Pmode);
4381 addr1 = gen_reg_rtx (Pmode);
4382 count = gen_reg_rtx (mode);
4383 blocks = gen_reg_rtx (mode);
4384
4385 convert_move (count, len, 1);
4386 emit_cmp_and_jump_insns (count, const0_rtx,
4387 EQ, NULL_RTX, mode, 1, end_label);
4388
4389 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4390 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4391 op0 = change_address (op0, VOIDmode, addr0);
4392 op1 = change_address (op1, VOIDmode, addr1);
4393
4394 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4395 OPTAB_DIRECT);
4396 if (temp != count)
4397 emit_move_insn (count, temp);
4398
4399 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4400 OPTAB_DIRECT);
4401 if (temp != blocks)
4402 emit_move_insn (blocks, temp);
4403
4404 emit_cmp_and_jump_insns (blocks, const0_rtx,
4405 EQ, NULL_RTX, mode, 1, loop_end_label);
4406
4407 emit_label (loop_start_label);
4408
4409 if (TARGET_Z10
4410 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4411 {
4412 rtx prefetch;
4413
4414 /* Issue a read prefetch for the +2 cache line of operand 1. */
4415 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4416 const0_rtx, const0_rtx);
4417 emit_insn (prefetch);
4418 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4419
4420 /* Issue a read prefetch for the +2 cache line of operand 2. */
4421 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4422 const0_rtx, const0_rtx);
4423 emit_insn (prefetch);
4424 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4425 }
4426
4427 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4428 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4429 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4430 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4431 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4432 emit_jump_insn (temp);
4433
4434 s390_load_address (addr0,
4435 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4436 s390_load_address (addr1,
4437 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4438
4439 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4440 OPTAB_DIRECT);
4441 if (temp != blocks)
4442 emit_move_insn (blocks, temp);
4443
4444 emit_cmp_and_jump_insns (blocks, const0_rtx,
4445 EQ, NULL_RTX, mode, 1, loop_end_label);
4446
4447 emit_jump (loop_start_label);
4448 emit_label (loop_end_label);
4449
4450 emit_insn (gen_cmpmem_short (op0, op1,
4451 convert_to_mode (Pmode, count, 1)));
4452 emit_label (end_label);
4453
4454 emit_insn (gen_cmpint (target, ccreg));
4455 }
4456 return true;
4457 }
4458
4459
4460 /* Expand conditional increment or decrement using alc/slb instructions.
4461 Should generate code setting DST to either SRC or SRC + INCREMENT,
4462 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4463 Returns true if successful, false otherwise.
4464
4465 That makes it possible to implement some if-constructs without jumps e.g.:
4466 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4467 unsigned int a, b, c;
4468 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4469 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4470 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4471 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4472
4473 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4474 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4475 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4476 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4477 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4478
4479 bool
4480 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4481 rtx dst, rtx src, rtx increment)
4482 {
4483 enum machine_mode cmp_mode;
4484 enum machine_mode cc_mode;
4485 rtx op_res;
4486 rtx insn;
4487 rtvec p;
4488 int ret;
4489
4490 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4491 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4492 cmp_mode = SImode;
4493 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4494 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4495 cmp_mode = DImode;
4496 else
4497 return false;
4498
4499 /* Try ADD LOGICAL WITH CARRY. */
4500 if (increment == const1_rtx)
4501 {
4502 /* Determine CC mode to use. */
4503 if (cmp_code == EQ || cmp_code == NE)
4504 {
4505 if (cmp_op1 != const0_rtx)
4506 {
4507 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4508 NULL_RTX, 0, OPTAB_WIDEN);
4509 cmp_op1 = const0_rtx;
4510 }
4511
4512 cmp_code = cmp_code == EQ ? LEU : GTU;
4513 }
4514
4515 if (cmp_code == LTU || cmp_code == LEU)
4516 {
4517 rtx tem = cmp_op0;
4518 cmp_op0 = cmp_op1;
4519 cmp_op1 = tem;
4520 cmp_code = swap_condition (cmp_code);
4521 }
4522
4523 switch (cmp_code)
4524 {
4525 case GTU:
4526 cc_mode = CCUmode;
4527 break;
4528
4529 case GEU:
4530 cc_mode = CCL3mode;
4531 break;
4532
4533 default:
4534 return false;
4535 }
4536
4537 /* Emit comparison instruction pattern. */
4538 if (!register_operand (cmp_op0, cmp_mode))
4539 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4540
4541 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4542 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4543 /* We use insn_invalid_p here to add clobbers if required. */
4544 ret = insn_invalid_p (emit_insn (insn), false);
4545 gcc_assert (!ret);
4546
4547 /* Emit ALC instruction pattern. */
4548 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4549 gen_rtx_REG (cc_mode, CC_REGNUM),
4550 const0_rtx);
4551
4552 if (src != const0_rtx)
4553 {
4554 if (!register_operand (src, GET_MODE (dst)))
4555 src = force_reg (GET_MODE (dst), src);
4556
4557 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4558 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4559 }
4560
4561 p = rtvec_alloc (2);
4562 RTVEC_ELT (p, 0) =
4563 gen_rtx_SET (VOIDmode, dst, op_res);
4564 RTVEC_ELT (p, 1) =
4565 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4566 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4567
4568 return true;
4569 }
4570
4571 /* Try SUBTRACT LOGICAL WITH BORROW. */
4572 if (increment == constm1_rtx)
4573 {
4574 /* Determine CC mode to use. */
4575 if (cmp_code == EQ || cmp_code == NE)
4576 {
4577 if (cmp_op1 != const0_rtx)
4578 {
4579 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4580 NULL_RTX, 0, OPTAB_WIDEN);
4581 cmp_op1 = const0_rtx;
4582 }
4583
4584 cmp_code = cmp_code == EQ ? LEU : GTU;
4585 }
4586
4587 if (cmp_code == GTU || cmp_code == GEU)
4588 {
4589 rtx tem = cmp_op0;
4590 cmp_op0 = cmp_op1;
4591 cmp_op1 = tem;
4592 cmp_code = swap_condition (cmp_code);
4593 }
4594
4595 switch (cmp_code)
4596 {
4597 case LEU:
4598 cc_mode = CCUmode;
4599 break;
4600
4601 case LTU:
4602 cc_mode = CCL3mode;
4603 break;
4604
4605 default:
4606 return false;
4607 }
4608
4609 /* Emit comparison instruction pattern. */
4610 if (!register_operand (cmp_op0, cmp_mode))
4611 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4612
4613 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4614 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4615 /* We use insn_invalid_p here to add clobbers if required. */
4616 ret = insn_invalid_p (emit_insn (insn), false);
4617 gcc_assert (!ret);
4618
4619 /* Emit SLB instruction pattern. */
4620 if (!register_operand (src, GET_MODE (dst)))
4621 src = force_reg (GET_MODE (dst), src);
4622
4623 op_res = gen_rtx_MINUS (GET_MODE (dst),
4624 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4625 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4626 gen_rtx_REG (cc_mode, CC_REGNUM),
4627 const0_rtx));
4628 p = rtvec_alloc (2);
4629 RTVEC_ELT (p, 0) =
4630 gen_rtx_SET (VOIDmode, dst, op_res);
4631 RTVEC_ELT (p, 1) =
4632 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4633 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4634
4635 return true;
4636 }
4637
4638 return false;
4639 }
4640
4641 /* Expand code for the insv template. Return true if successful. */
4642
4643 bool
4644 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4645 {
4646 int bitsize = INTVAL (op1);
4647 int bitpos = INTVAL (op2);
4648 enum machine_mode mode = GET_MODE (dest);
4649 enum machine_mode smode;
4650 int smode_bsize, mode_bsize;
4651 rtx op, clobber;
4652
4653 /* Generate INSERT IMMEDIATE (IILL et al). */
4654 /* (set (ze (reg)) (const_int)). */
4655 if (TARGET_ZARCH
4656 && register_operand (dest, word_mode)
4657 && (bitpos % 16) == 0
4658 && (bitsize % 16) == 0
4659 && const_int_operand (src, VOIDmode))
4660 {
4661 HOST_WIDE_INT val = INTVAL (src);
4662 int regpos = bitpos + bitsize;
4663
4664 while (regpos > bitpos)
4665 {
4666 enum machine_mode putmode;
4667 int putsize;
4668
4669 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4670 putmode = SImode;
4671 else
4672 putmode = HImode;
4673
4674 putsize = GET_MODE_BITSIZE (putmode);
4675 regpos -= putsize;
4676 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4677 GEN_INT (putsize),
4678 GEN_INT (regpos)),
4679 gen_int_mode (val, putmode));
4680 val >>= putsize;
4681 }
4682 gcc_assert (regpos == bitpos);
4683 return true;
4684 }
4685
4686 smode = smallest_mode_for_size (bitsize, MODE_INT);
4687 smode_bsize = GET_MODE_BITSIZE (smode);
4688 mode_bsize = GET_MODE_BITSIZE (mode);
4689
4690 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4691 if (bitpos == 0
4692 && (bitsize % BITS_PER_UNIT) == 0
4693 && MEM_P (dest)
4694 && (register_operand (src, word_mode)
4695 || const_int_operand (src, VOIDmode)))
4696 {
4697 /* Emit standard pattern if possible. */
4698 if (smode_bsize == bitsize)
4699 {
4700 emit_move_insn (adjust_address (dest, smode, 0),
4701 gen_lowpart (smode, src));
4702 return true;
4703 }
4704
4705 /* (set (ze (mem)) (const_int)). */
4706 else if (const_int_operand (src, VOIDmode))
4707 {
4708 int size = bitsize / BITS_PER_UNIT;
4709 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4710 BLKmode,
4711 UNITS_PER_WORD - size);
4712
4713 dest = adjust_address (dest, BLKmode, 0);
4714 set_mem_size (dest, size);
4715 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4716 return true;
4717 }
4718
4719 /* (set (ze (mem)) (reg)). */
4720 else if (register_operand (src, word_mode))
4721 {
4722 if (bitsize <= 32)
4723 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4724 const0_rtx), src);
4725 else
4726 {
4727 /* Emit st,stcmh sequence. */
4728 int stcmh_width = bitsize - 32;
4729 int size = stcmh_width / BITS_PER_UNIT;
4730
4731 emit_move_insn (adjust_address (dest, SImode, size),
4732 gen_lowpart (SImode, src));
4733 set_mem_size (dest, size);
4734 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4735 GEN_INT (stcmh_width),
4736 const0_rtx),
4737 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4738 }
4739 return true;
4740 }
4741 }
4742
4743 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4744 if ((bitpos % BITS_PER_UNIT) == 0
4745 && (bitsize % BITS_PER_UNIT) == 0
4746 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4747 && MEM_P (src)
4748 && (mode == DImode || mode == SImode)
4749 && register_operand (dest, mode))
4750 {
4751 /* Emit a strict_low_part pattern if possible. */
4752 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4753 {
4754 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4755 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4756 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4757 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4758 return true;
4759 }
4760
4761 /* ??? There are more powerful versions of ICM that are not
4762 completely represented in the md file. */
4763 }
4764
4765 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4766 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4767 {
4768 enum machine_mode mode_s = GET_MODE (src);
4769
4770 if (mode_s == VOIDmode)
4771 {
4772 /* Assume const_int etc already in the proper mode. */
4773 src = force_reg (mode, src);
4774 }
4775 else if (mode_s != mode)
4776 {
4777 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4778 src = force_reg (mode_s, src);
4779 src = gen_lowpart (mode, src);
4780 }
4781
4782 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4783 op = gen_rtx_SET (VOIDmode, op, src);
4784
4785 if (!TARGET_ZEC12)
4786 {
4787 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4788 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4789 }
4790 emit_insn (op);
4791
4792 return true;
4793 }
4794
4795 return false;
4796 }
4797
4798 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4799 register that holds VAL of mode MODE shifted by COUNT bits. */
4800
4801 static inline rtx
4802 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4803 {
4804 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4805 NULL_RTX, 1, OPTAB_DIRECT);
4806 return expand_simple_binop (SImode, ASHIFT, val, count,
4807 NULL_RTX, 1, OPTAB_DIRECT);
4808 }
4809
4810 /* Structure to hold the initial parameters for a compare_and_swap operation
4811 in HImode and QImode. */
4812
4813 struct alignment_context
4814 {
4815 rtx memsi; /* SI aligned memory location. */
4816 rtx shift; /* Bit offset with regard to lsb. */
4817 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4818 rtx modemaski; /* ~modemask */
4819 bool aligned; /* True if memory is aligned, false else. */
4820 };
4821
4822 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4823 structure AC for transparent simplifying, if the memory alignment is known
4824 to be at least 32bit. MEM is the memory location for the actual operation
4825 and MODE its mode. */
4826
4827 static void
4828 init_alignment_context (struct alignment_context *ac, rtx mem,
4829 enum machine_mode mode)
4830 {
4831 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4832 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4833
4834 if (ac->aligned)
4835 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4836 else
4837 {
4838 /* Alignment is unknown. */
4839 rtx byteoffset, addr, align;
4840
4841 /* Force the address into a register. */
4842 addr = force_reg (Pmode, XEXP (mem, 0));
4843
4844 /* Align it to SImode. */
4845 align = expand_simple_binop (Pmode, AND, addr,
4846 GEN_INT (-GET_MODE_SIZE (SImode)),
4847 NULL_RTX, 1, OPTAB_DIRECT);
4848 /* Generate MEM. */
4849 ac->memsi = gen_rtx_MEM (SImode, align);
4850 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4851 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4852 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4853
4854 /* Calculate shiftcount. */
4855 byteoffset = expand_simple_binop (Pmode, AND, addr,
4856 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4857 NULL_RTX, 1, OPTAB_DIRECT);
4858 /* As we already have some offset, evaluate the remaining distance. */
4859 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4860 NULL_RTX, 1, OPTAB_DIRECT);
4861 }
4862
4863 /* Shift is the byte count, but we need the bitcount. */
4864 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4865 NULL_RTX, 1, OPTAB_DIRECT);
4866
4867 /* Calculate masks. */
4868 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4869 GEN_INT (GET_MODE_MASK (mode)),
4870 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4871 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4872 NULL_RTX, 1);
4873 }
4874
4875 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4876 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4877 perform the merge in SEQ2. */
4878
4879 static rtx
4880 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4881 enum machine_mode mode, rtx val, rtx ins)
4882 {
4883 rtx tmp;
4884
4885 if (ac->aligned)
4886 {
4887 start_sequence ();
4888 tmp = copy_to_mode_reg (SImode, val);
4889 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4890 const0_rtx, ins))
4891 {
4892 *seq1 = NULL;
4893 *seq2 = get_insns ();
4894 end_sequence ();
4895 return tmp;
4896 }
4897 end_sequence ();
4898 }
4899
4900 /* Failed to use insv. Generate a two part shift and mask. */
4901 start_sequence ();
4902 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4903 *seq1 = get_insns ();
4904 end_sequence ();
4905
4906 start_sequence ();
4907 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4908 *seq2 = get_insns ();
4909 end_sequence ();
4910
4911 return tmp;
4912 }
4913
4914 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4915 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4916 value to set if CMP == MEM. */
4917
4918 void
4919 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4920 rtx cmp, rtx new_rtx, bool is_weak)
4921 {
4922 struct alignment_context ac;
4923 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4924 rtx res = gen_reg_rtx (SImode);
4925 rtx csloop = NULL, csend = NULL;
4926
4927 gcc_assert (MEM_P (mem));
4928
4929 init_alignment_context (&ac, mem, mode);
4930
4931 /* Load full word. Subsequent loads are performed by CS. */
4932 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4933 NULL_RTX, 1, OPTAB_DIRECT);
4934
4935 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4936 possible, we try to use insv to make this happen efficiently. If
4937 that fails we'll generate code both inside and outside the loop. */
4938 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4939 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4940
4941 if (seq0)
4942 emit_insn (seq0);
4943 if (seq1)
4944 emit_insn (seq1);
4945
4946 /* Start CS loop. */
4947 if (!is_weak)
4948 {
4949 /* Begin assuming success. */
4950 emit_move_insn (btarget, const1_rtx);
4951
4952 csloop = gen_label_rtx ();
4953 csend = gen_label_rtx ();
4954 emit_label (csloop);
4955 }
4956
4957 /* val = "<mem>00..0<mem>"
4958 * cmp = "00..0<cmp>00..0"
4959 * new = "00..0<new>00..0"
4960 */
4961
4962 emit_insn (seq2);
4963 emit_insn (seq3);
4964
4965 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
4966 if (is_weak)
4967 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
4968 else
4969 {
4970 rtx tmp;
4971
4972 /* Jump to end if we're done (likely?). */
4973 s390_emit_jump (csend, cc);
4974
4975 /* Check for changes outside mode, and loop internal if so.
4976 Arrange the moves so that the compare is adjacent to the
4977 branch so that we can generate CRJ. */
4978 tmp = copy_to_reg (val);
4979 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
4980 1, OPTAB_DIRECT);
4981 cc = s390_emit_compare (NE, val, tmp);
4982 s390_emit_jump (csloop, cc);
4983
4984 /* Failed. */
4985 emit_move_insn (btarget, const0_rtx);
4986 emit_label (csend);
4987 }
4988
4989 /* Return the correct part of the bitfield. */
4990 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
4991 NULL_RTX, 1, OPTAB_DIRECT), 1);
4992 }
4993
4994 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
4995 and VAL the value to play with. If AFTER is true then store the value
4996 MEM holds after the operation, if AFTER is false then store the value MEM
4997 holds before the operation. If TARGET is zero then discard that value, else
4998 store it to TARGET. */
4999
5000 void
5001 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
5002 rtx target, rtx mem, rtx val, bool after)
5003 {
5004 struct alignment_context ac;
5005 rtx cmp;
5006 rtx new_rtx = gen_reg_rtx (SImode);
5007 rtx orig = gen_reg_rtx (SImode);
5008 rtx csloop = gen_label_rtx ();
5009
5010 gcc_assert (!target || register_operand (target, VOIDmode));
5011 gcc_assert (MEM_P (mem));
5012
5013 init_alignment_context (&ac, mem, mode);
5014
5015 /* Shift val to the correct bit positions.
5016 Preserve "icm", but prevent "ex icm". */
5017 if (!(ac.aligned && code == SET && MEM_P (val)))
5018 val = s390_expand_mask_and_shift (val, mode, ac.shift);
5019
5020 /* Further preparation insns. */
5021 if (code == PLUS || code == MINUS)
5022 emit_move_insn (orig, val);
5023 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
5024 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
5025 NULL_RTX, 1, OPTAB_DIRECT);
5026
5027 /* Load full word. Subsequent loads are performed by CS. */
5028 cmp = force_reg (SImode, ac.memsi);
5029
5030 /* Start CS loop. */
5031 emit_label (csloop);
5032 emit_move_insn (new_rtx, cmp);
5033
5034 /* Patch new with val at correct position. */
5035 switch (code)
5036 {
5037 case PLUS:
5038 case MINUS:
5039 val = expand_simple_binop (SImode, code, new_rtx, orig,
5040 NULL_RTX, 1, OPTAB_DIRECT);
5041 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5042 NULL_RTX, 1, OPTAB_DIRECT);
5043 /* FALLTHRU */
5044 case SET:
5045 if (ac.aligned && MEM_P (val))
5046 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5047 0, 0, SImode, val);
5048 else
5049 {
5050 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5051 NULL_RTX, 1, OPTAB_DIRECT);
5052 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5053 NULL_RTX, 1, OPTAB_DIRECT);
5054 }
5055 break;
5056 case AND:
5057 case IOR:
5058 case XOR:
5059 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5060 NULL_RTX, 1, OPTAB_DIRECT);
5061 break;
5062 case MULT: /* NAND */
5063 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5064 NULL_RTX, 1, OPTAB_DIRECT);
5065 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5066 NULL_RTX, 1, OPTAB_DIRECT);
5067 break;
5068 default:
5069 gcc_unreachable ();
5070 }
5071
5072 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5073 ac.memsi, cmp, new_rtx));
5074
5075 /* Return the correct part of the bitfield. */
5076 if (target)
5077 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5078 after ? new_rtx : cmp, ac.shift,
5079 NULL_RTX, 1, OPTAB_DIRECT), 1);
5080 }
5081
5082 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5083 We need to emit DTP-relative relocations. */
5084
5085 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5086
5087 static void
5088 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5089 {
5090 switch (size)
5091 {
5092 case 4:
5093 fputs ("\t.long\t", file);
5094 break;
5095 case 8:
5096 fputs ("\t.quad\t", file);
5097 break;
5098 default:
5099 gcc_unreachable ();
5100 }
5101 output_addr_const (file, x);
5102 fputs ("@DTPOFF", file);
5103 }
5104
5105 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5106 /* Implement TARGET_MANGLE_TYPE. */
5107
5108 static const char *
5109 s390_mangle_type (const_tree type)
5110 {
5111 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5112 && TARGET_LONG_DOUBLE_128)
5113 return "g";
5114
5115 /* For all other types, use normal C++ mangling. */
5116 return NULL;
5117 }
5118 #endif
5119
5120 /* In the name of slightly smaller debug output, and to cater to
5121 general assembler lossage, recognize various UNSPEC sequences
5122 and turn them back into a direct symbol reference. */
5123
5124 static rtx
5125 s390_delegitimize_address (rtx orig_x)
5126 {
5127 rtx x, y;
5128
5129 orig_x = delegitimize_mem_from_attrs (orig_x);
5130 x = orig_x;
5131
5132 /* Extract the symbol ref from:
5133 (plus:SI (reg:SI 12 %r12)
5134 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5135 UNSPEC_GOTOFF/PLTOFF)))
5136 and
5137 (plus:SI (reg:SI 12 %r12)
5138 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5139 UNSPEC_GOTOFF/PLTOFF)
5140 (const_int 4 [0x4])))) */
5141 if (GET_CODE (x) == PLUS
5142 && REG_P (XEXP (x, 0))
5143 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5144 && GET_CODE (XEXP (x, 1)) == CONST)
5145 {
5146 HOST_WIDE_INT offset = 0;
5147
5148 /* The const operand. */
5149 y = XEXP (XEXP (x, 1), 0);
5150
5151 if (GET_CODE (y) == PLUS
5152 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5153 {
5154 offset = INTVAL (XEXP (y, 1));
5155 y = XEXP (y, 0);
5156 }
5157
5158 if (GET_CODE (y) == UNSPEC
5159 && (XINT (y, 1) == UNSPEC_GOTOFF
5160 || XINT (y, 1) == UNSPEC_PLTOFF))
5161 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5162 }
5163
5164 if (GET_CODE (x) != MEM)
5165 return orig_x;
5166
5167 x = XEXP (x, 0);
5168 if (GET_CODE (x) == PLUS
5169 && GET_CODE (XEXP (x, 1)) == CONST
5170 && GET_CODE (XEXP (x, 0)) == REG
5171 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5172 {
5173 y = XEXP (XEXP (x, 1), 0);
5174 if (GET_CODE (y) == UNSPEC
5175 && XINT (y, 1) == UNSPEC_GOT)
5176 y = XVECEXP (y, 0, 0);
5177 else
5178 return orig_x;
5179 }
5180 else if (GET_CODE (x) == CONST)
5181 {
5182 /* Extract the symbol ref from:
5183 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5184 UNSPEC_PLT/GOTENT))) */
5185
5186 y = XEXP (x, 0);
5187 if (GET_CODE (y) == UNSPEC
5188 && (XINT (y, 1) == UNSPEC_GOTENT
5189 || XINT (y, 1) == UNSPEC_PLT))
5190 y = XVECEXP (y, 0, 0);
5191 else
5192 return orig_x;
5193 }
5194 else
5195 return orig_x;
5196
5197 if (GET_MODE (orig_x) != Pmode)
5198 {
5199 if (GET_MODE (orig_x) == BLKmode)
5200 return orig_x;
5201 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5202 if (y == NULL_RTX)
5203 return orig_x;
5204 }
5205 return y;
5206 }
5207
5208 /* Output operand OP to stdio stream FILE.
5209 OP is an address (register + offset) which is not used to address data;
5210 instead the rightmost bits are interpreted as the value. */
5211
5212 static void
5213 print_shift_count_operand (FILE *file, rtx op)
5214 {
5215 HOST_WIDE_INT offset;
5216 rtx base;
5217
5218 /* Extract base register and offset. */
5219 if (!s390_decompose_shift_count (op, &base, &offset))
5220 gcc_unreachable ();
5221
5222 /* Sanity check. */
5223 if (base)
5224 {
5225 gcc_assert (GET_CODE (base) == REG);
5226 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5227 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5228 }
5229
5230 /* Offsets are constricted to twelve bits. */
5231 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5232 if (base)
5233 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5234 }
5235
5236 /* See 'get_some_local_dynamic_name'. */
5237
5238 static int
5239 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5240 {
5241 rtx x = *px;
5242
5243 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5244 {
5245 x = get_pool_constant (x);
5246 return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
5247 }
5248
5249 if (GET_CODE (x) == SYMBOL_REF
5250 && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
5251 {
5252 cfun->machine->some_ld_name = XSTR (x, 0);
5253 return 1;
5254 }
5255
5256 return 0;
5257 }
5258
5259 /* Locate some local-dynamic symbol still in use by this function
5260 so that we can print its name in local-dynamic base patterns. */
5261
5262 static const char *
5263 get_some_local_dynamic_name (void)
5264 {
5265 rtx insn;
5266
5267 if (cfun->machine->some_ld_name)
5268 return cfun->machine->some_ld_name;
5269
5270 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5271 if (INSN_P (insn)
5272 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5273 return cfun->machine->some_ld_name;
5274
5275 gcc_unreachable ();
5276 }
5277
5278 /* Output machine-dependent UNSPECs occurring in address constant X
5279 in assembler syntax to stdio stream FILE. Returns true if the
5280 constant X could be recognized, false otherwise. */
5281
5282 static bool
5283 s390_output_addr_const_extra (FILE *file, rtx x)
5284 {
5285 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5286 switch (XINT (x, 1))
5287 {
5288 case UNSPEC_GOTENT:
5289 output_addr_const (file, XVECEXP (x, 0, 0));
5290 fprintf (file, "@GOTENT");
5291 return true;
5292 case UNSPEC_GOT:
5293 output_addr_const (file, XVECEXP (x, 0, 0));
5294 fprintf (file, "@GOT");
5295 return true;
5296 case UNSPEC_GOTOFF:
5297 output_addr_const (file, XVECEXP (x, 0, 0));
5298 fprintf (file, "@GOTOFF");
5299 return true;
5300 case UNSPEC_PLT:
5301 output_addr_const (file, XVECEXP (x, 0, 0));
5302 fprintf (file, "@PLT");
5303 return true;
5304 case UNSPEC_PLTOFF:
5305 output_addr_const (file, XVECEXP (x, 0, 0));
5306 fprintf (file, "@PLTOFF");
5307 return true;
5308 case UNSPEC_TLSGD:
5309 output_addr_const (file, XVECEXP (x, 0, 0));
5310 fprintf (file, "@TLSGD");
5311 return true;
5312 case UNSPEC_TLSLDM:
5313 assemble_name (file, get_some_local_dynamic_name ());
5314 fprintf (file, "@TLSLDM");
5315 return true;
5316 case UNSPEC_DTPOFF:
5317 output_addr_const (file, XVECEXP (x, 0, 0));
5318 fprintf (file, "@DTPOFF");
5319 return true;
5320 case UNSPEC_NTPOFF:
5321 output_addr_const (file, XVECEXP (x, 0, 0));
5322 fprintf (file, "@NTPOFF");
5323 return true;
5324 case UNSPEC_GOTNTPOFF:
5325 output_addr_const (file, XVECEXP (x, 0, 0));
5326 fprintf (file, "@GOTNTPOFF");
5327 return true;
5328 case UNSPEC_INDNTPOFF:
5329 output_addr_const (file, XVECEXP (x, 0, 0));
5330 fprintf (file, "@INDNTPOFF");
5331 return true;
5332 }
5333
5334 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5335 switch (XINT (x, 1))
5336 {
5337 case UNSPEC_POOL_OFFSET:
5338 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5339 output_addr_const (file, x);
5340 return true;
5341 }
5342 return false;
5343 }
5344
5345 /* Output address operand ADDR in assembler syntax to
5346 stdio stream FILE. */
5347
5348 void
5349 print_operand_address (FILE *file, rtx addr)
5350 {
5351 struct s390_address ad;
5352
5353 if (s390_loadrelative_operand_p (addr))
5354 {
5355 if (!TARGET_Z10)
5356 {
5357 output_operand_lossage ("symbolic memory references are "
5358 "only supported on z10 or later");
5359 return;
5360 }
5361 output_addr_const (file, addr);
5362 return;
5363 }
5364
5365 if (!s390_decompose_address (addr, &ad)
5366 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5367 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5368 output_operand_lossage ("cannot decompose address");
5369
5370 if (ad.disp)
5371 output_addr_const (file, ad.disp);
5372 else
5373 fprintf (file, "0");
5374
5375 if (ad.base && ad.indx)
5376 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5377 reg_names[REGNO (ad.base)]);
5378 else if (ad.base)
5379 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5380 }
5381
5382 /* Output operand X in assembler syntax to stdio stream FILE.
5383 CODE specified the format flag. The following format flags
5384 are recognized:
5385
5386 'C': print opcode suffix for branch condition.
5387 'D': print opcode suffix for inverse branch condition.
5388 'E': print opcode suffix for branch on index instruction.
5389 'G': print the size of the operand in bytes.
5390 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5391 'M': print the second word of a TImode operand.
5392 'N': print the second word of a DImode operand.
5393 'O': print only the displacement of a memory reference.
5394 'R': print only the base register of a memory reference.
5395 'S': print S-type memory reference (base+displacement).
5396 'Y': print shift count operand.
5397
5398 'b': print integer X as if it's an unsigned byte.
5399 'c': print integer X as if it's an signed byte.
5400 'e': "end" of DImode contiguous bitmask X.
5401 'f': "end" of SImode contiguous bitmask X.
5402 'h': print integer X as if it's a signed halfword.
5403 'i': print the first nonzero HImode part of X.
5404 'j': print the first HImode part unequal to -1 of X.
5405 'k': print the first nonzero SImode part of X.
5406 'm': print the first SImode part unequal to -1 of X.
5407 'o': print integer X as if it's an unsigned 32bit word.
5408 's': "start" of DImode contiguous bitmask X.
5409 't': "start" of SImode contiguous bitmask X.
5410 'x': print integer X as if it's an unsigned halfword.
5411 */
5412
5413 void
5414 print_operand (FILE *file, rtx x, int code)
5415 {
5416 HOST_WIDE_INT ival;
5417
5418 switch (code)
5419 {
5420 case 'C':
5421 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5422 return;
5423
5424 case 'D':
5425 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5426 return;
5427
5428 case 'E':
5429 if (GET_CODE (x) == LE)
5430 fprintf (file, "l");
5431 else if (GET_CODE (x) == GT)
5432 fprintf (file, "h");
5433 else
5434 output_operand_lossage ("invalid comparison operator "
5435 "for 'E' output modifier");
5436 return;
5437
5438 case 'J':
5439 if (GET_CODE (x) == SYMBOL_REF)
5440 {
5441 fprintf (file, "%s", ":tls_load:");
5442 output_addr_const (file, x);
5443 }
5444 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5445 {
5446 fprintf (file, "%s", ":tls_gdcall:");
5447 output_addr_const (file, XVECEXP (x, 0, 0));
5448 }
5449 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5450 {
5451 fprintf (file, "%s", ":tls_ldcall:");
5452 assemble_name (file, get_some_local_dynamic_name ());
5453 }
5454 else
5455 output_operand_lossage ("invalid reference for 'J' output modifier");
5456 return;
5457
5458 case 'G':
5459 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5460 return;
5461
5462 case 'O':
5463 {
5464 struct s390_address ad;
5465 int ret;
5466
5467 if (!MEM_P (x))
5468 {
5469 output_operand_lossage ("memory reference expected for "
5470 "'O' output modifier");
5471 return;
5472 }
5473
5474 ret = s390_decompose_address (XEXP (x, 0), &ad);
5475
5476 if (!ret
5477 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5478 || ad.indx)
5479 {
5480 output_operand_lossage ("invalid address for 'O' output modifier");
5481 return;
5482 }
5483
5484 if (ad.disp)
5485 output_addr_const (file, ad.disp);
5486 else
5487 fprintf (file, "0");
5488 }
5489 return;
5490
5491 case 'R':
5492 {
5493 struct s390_address ad;
5494 int ret;
5495
5496 if (!MEM_P (x))
5497 {
5498 output_operand_lossage ("memory reference expected for "
5499 "'R' output modifier");
5500 return;
5501 }
5502
5503 ret = s390_decompose_address (XEXP (x, 0), &ad);
5504
5505 if (!ret
5506 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5507 || ad.indx)
5508 {
5509 output_operand_lossage ("invalid address for 'R' output modifier");
5510 return;
5511 }
5512
5513 if (ad.base)
5514 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5515 else
5516 fprintf (file, "0");
5517 }
5518 return;
5519
5520 case 'S':
5521 {
5522 struct s390_address ad;
5523 int ret;
5524
5525 if (!MEM_P (x))
5526 {
5527 output_operand_lossage ("memory reference expected for "
5528 "'S' output modifier");
5529 return;
5530 }
5531 ret = s390_decompose_address (XEXP (x, 0), &ad);
5532
5533 if (!ret
5534 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5535 || ad.indx)
5536 {
5537 output_operand_lossage ("invalid address for 'S' output modifier");
5538 return;
5539 }
5540
5541 if (ad.disp)
5542 output_addr_const (file, ad.disp);
5543 else
5544 fprintf (file, "0");
5545
5546 if (ad.base)
5547 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5548 }
5549 return;
5550
5551 case 'N':
5552 if (GET_CODE (x) == REG)
5553 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5554 else if (GET_CODE (x) == MEM)
5555 x = change_address (x, VOIDmode,
5556 plus_constant (Pmode, XEXP (x, 0), 4));
5557 else
5558 output_operand_lossage ("register or memory expression expected "
5559 "for 'N' output modifier");
5560 break;
5561
5562 case 'M':
5563 if (GET_CODE (x) == REG)
5564 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5565 else if (GET_CODE (x) == MEM)
5566 x = change_address (x, VOIDmode,
5567 plus_constant (Pmode, XEXP (x, 0), 8));
5568 else
5569 output_operand_lossage ("register or memory expression expected "
5570 "for 'M' output modifier");
5571 break;
5572
5573 case 'Y':
5574 print_shift_count_operand (file, x);
5575 return;
5576 }
5577
5578 switch (GET_CODE (x))
5579 {
5580 case REG:
5581 fprintf (file, "%s", reg_names[REGNO (x)]);
5582 break;
5583
5584 case MEM:
5585 output_address (XEXP (x, 0));
5586 break;
5587
5588 case CONST:
5589 case CODE_LABEL:
5590 case LABEL_REF:
5591 case SYMBOL_REF:
5592 output_addr_const (file, x);
5593 break;
5594
5595 case CONST_INT:
5596 ival = INTVAL (x);
5597 switch (code)
5598 {
5599 case 0:
5600 break;
5601 case 'b':
5602 ival &= 0xff;
5603 break;
5604 case 'c':
5605 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5606 break;
5607 case 'x':
5608 ival &= 0xffff;
5609 break;
5610 case 'h':
5611 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5612 break;
5613 case 'i':
5614 ival = s390_extract_part (x, HImode, 0);
5615 break;
5616 case 'j':
5617 ival = s390_extract_part (x, HImode, -1);
5618 break;
5619 case 'k':
5620 ival = s390_extract_part (x, SImode, 0);
5621 break;
5622 case 'm':
5623 ival = s390_extract_part (x, SImode, -1);
5624 break;
5625 case 'o':
5626 ival &= 0xffffffff;
5627 break;
5628 case 'e': case 'f':
5629 case 's': case 't':
5630 {
5631 int pos, len;
5632 bool ok;
5633
5634 len = (code == 's' || code == 'e' ? 64 : 32);
5635 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5636 gcc_assert (ok);
5637 if (code == 's' || code == 't')
5638 ival = 64 - pos - len;
5639 else
5640 ival = 64 - 1 - pos;
5641 }
5642 break;
5643 default:
5644 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5645 }
5646 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5647 break;
5648
5649 case CONST_DOUBLE:
5650 gcc_assert (GET_MODE (x) == VOIDmode);
5651 if (code == 'b')
5652 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5653 else if (code == 'x')
5654 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5655 else if (code == 'h')
5656 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5657 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5658 else
5659 {
5660 if (code == 0)
5661 output_operand_lossage ("invalid constant - try using "
5662 "an output modifier");
5663 else
5664 output_operand_lossage ("invalid constant for output modifier '%c'",
5665 code);
5666 }
5667 break;
5668
5669 default:
5670 if (code == 0)
5671 output_operand_lossage ("invalid expression - try using "
5672 "an output modifier");
5673 else
5674 output_operand_lossage ("invalid expression for output "
5675 "modifier '%c'", code);
5676 break;
5677 }
5678 }
5679
5680 /* Target hook for assembling integer objects. We need to define it
5681 here to work a round a bug in some versions of GAS, which couldn't
5682 handle values smaller than INT_MIN when printed in decimal. */
5683
5684 static bool
5685 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5686 {
5687 if (size == 8 && aligned_p
5688 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5689 {
5690 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5691 INTVAL (x));
5692 return true;
5693 }
5694 return default_assemble_integer (x, size, aligned_p);
5695 }
5696
5697 /* Returns true if register REGNO is used for forming
5698 a memory address in expression X. */
5699
5700 static bool
5701 reg_used_in_mem_p (int regno, rtx x)
5702 {
5703 enum rtx_code code = GET_CODE (x);
5704 int i, j;
5705 const char *fmt;
5706
5707 if (code == MEM)
5708 {
5709 if (refers_to_regno_p (regno, regno+1,
5710 XEXP (x, 0), 0))
5711 return true;
5712 }
5713 else if (code == SET
5714 && GET_CODE (SET_DEST (x)) == PC)
5715 {
5716 if (refers_to_regno_p (regno, regno+1,
5717 SET_SRC (x), 0))
5718 return true;
5719 }
5720
5721 fmt = GET_RTX_FORMAT (code);
5722 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5723 {
5724 if (fmt[i] == 'e'
5725 && reg_used_in_mem_p (regno, XEXP (x, i)))
5726 return true;
5727
5728 else if (fmt[i] == 'E')
5729 for (j = 0; j < XVECLEN (x, i); j++)
5730 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5731 return true;
5732 }
5733 return false;
5734 }
5735
5736 /* Returns true if expression DEP_RTX sets an address register
5737 used by instruction INSN to address memory. */
5738
5739 static bool
5740 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5741 {
5742 rtx target, pat;
5743
5744 if (GET_CODE (dep_rtx) == INSN)
5745 dep_rtx = PATTERN (dep_rtx);
5746
5747 if (GET_CODE (dep_rtx) == SET)
5748 {
5749 target = SET_DEST (dep_rtx);
5750 if (GET_CODE (target) == STRICT_LOW_PART)
5751 target = XEXP (target, 0);
5752 while (GET_CODE (target) == SUBREG)
5753 target = SUBREG_REG (target);
5754
5755 if (GET_CODE (target) == REG)
5756 {
5757 int regno = REGNO (target);
5758
5759 if (s390_safe_attr_type (insn) == TYPE_LA)
5760 {
5761 pat = PATTERN (insn);
5762 if (GET_CODE (pat) == PARALLEL)
5763 {
5764 gcc_assert (XVECLEN (pat, 0) == 2);
5765 pat = XVECEXP (pat, 0, 0);
5766 }
5767 gcc_assert (GET_CODE (pat) == SET);
5768 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5769 }
5770 else if (get_attr_atype (insn) == ATYPE_AGEN)
5771 return reg_used_in_mem_p (regno, PATTERN (insn));
5772 }
5773 }
5774 return false;
5775 }
5776
5777 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5778
5779 int
5780 s390_agen_dep_p (rtx dep_insn, rtx insn)
5781 {
5782 rtx dep_rtx = PATTERN (dep_insn);
5783 int i;
5784
5785 if (GET_CODE (dep_rtx) == SET
5786 && addr_generation_dependency_p (dep_rtx, insn))
5787 return 1;
5788 else if (GET_CODE (dep_rtx) == PARALLEL)
5789 {
5790 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5791 {
5792 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5793 return 1;
5794 }
5795 }
5796 return 0;
5797 }
5798
5799
5800 /* A C statement (sans semicolon) to update the integer scheduling priority
5801 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5802 reduce the priority to execute INSN later. Do not define this macro if
5803 you do not need to adjust the scheduling priorities of insns.
5804
5805 A STD instruction should be scheduled earlier,
5806 in order to use the bypass. */
5807 static int
5808 s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
5809 {
5810 if (! INSN_P (insn))
5811 return priority;
5812
5813 if (s390_tune != PROCESSOR_2084_Z990
5814 && s390_tune != PROCESSOR_2094_Z9_109
5815 && s390_tune != PROCESSOR_2097_Z10
5816 && s390_tune != PROCESSOR_2817_Z196
5817 && s390_tune != PROCESSOR_2827_ZEC12)
5818 return priority;
5819
5820 switch (s390_safe_attr_type (insn))
5821 {
5822 case TYPE_FSTOREDF:
5823 case TYPE_FSTORESF:
5824 priority = priority << 3;
5825 break;
5826 case TYPE_STORE:
5827 case TYPE_STM:
5828 priority = priority << 1;
5829 break;
5830 default:
5831 break;
5832 }
5833 return priority;
5834 }
5835
5836
5837 /* The number of instructions that can be issued per cycle. */
5838
5839 static int
5840 s390_issue_rate (void)
5841 {
5842 switch (s390_tune)
5843 {
5844 case PROCESSOR_2084_Z990:
5845 case PROCESSOR_2094_Z9_109:
5846 case PROCESSOR_2817_Z196:
5847 return 3;
5848 case PROCESSOR_2097_Z10:
5849 case PROCESSOR_2827_ZEC12:
5850 return 2;
5851 default:
5852 return 1;
5853 }
5854 }
5855
5856 static int
5857 s390_first_cycle_multipass_dfa_lookahead (void)
5858 {
5859 return 4;
5860 }
5861
5862 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5863 Fix up MEMs as required. */
5864
5865 static void
5866 annotate_constant_pool_refs (rtx *x)
5867 {
5868 int i, j;
5869 const char *fmt;
5870
5871 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5872 || !CONSTANT_POOL_ADDRESS_P (*x));
5873
5874 /* Literal pool references can only occur inside a MEM ... */
5875 if (GET_CODE (*x) == MEM)
5876 {
5877 rtx memref = XEXP (*x, 0);
5878
5879 if (GET_CODE (memref) == SYMBOL_REF
5880 && CONSTANT_POOL_ADDRESS_P (memref))
5881 {
5882 rtx base = cfun->machine->base_reg;
5883 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5884 UNSPEC_LTREF);
5885
5886 *x = replace_equiv_address (*x, addr);
5887 return;
5888 }
5889
5890 if (GET_CODE (memref) == CONST
5891 && GET_CODE (XEXP (memref, 0)) == PLUS
5892 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5893 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5894 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5895 {
5896 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5897 rtx sym = XEXP (XEXP (memref, 0), 0);
5898 rtx base = cfun->machine->base_reg;
5899 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5900 UNSPEC_LTREF);
5901
5902 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5903 return;
5904 }
5905 }
5906
5907 /* ... or a load-address type pattern. */
5908 if (GET_CODE (*x) == SET)
5909 {
5910 rtx addrref = SET_SRC (*x);
5911
5912 if (GET_CODE (addrref) == SYMBOL_REF
5913 && CONSTANT_POOL_ADDRESS_P (addrref))
5914 {
5915 rtx base = cfun->machine->base_reg;
5916 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5917 UNSPEC_LTREF);
5918
5919 SET_SRC (*x) = addr;
5920 return;
5921 }
5922
5923 if (GET_CODE (addrref) == CONST
5924 && GET_CODE (XEXP (addrref, 0)) == PLUS
5925 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5926 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5927 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5928 {
5929 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5930 rtx sym = XEXP (XEXP (addrref, 0), 0);
5931 rtx base = cfun->machine->base_reg;
5932 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5933 UNSPEC_LTREF);
5934
5935 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5936 return;
5937 }
5938 }
5939
5940 /* Annotate LTREL_BASE as well. */
5941 if (GET_CODE (*x) == UNSPEC
5942 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5943 {
5944 rtx base = cfun->machine->base_reg;
5945 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
5946 UNSPEC_LTREL_BASE);
5947 return;
5948 }
5949
5950 fmt = GET_RTX_FORMAT (GET_CODE (*x));
5951 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
5952 {
5953 if (fmt[i] == 'e')
5954 {
5955 annotate_constant_pool_refs (&XEXP (*x, i));
5956 }
5957 else if (fmt[i] == 'E')
5958 {
5959 for (j = 0; j < XVECLEN (*x, i); j++)
5960 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
5961 }
5962 }
5963 }
5964
5965 /* Split all branches that exceed the maximum distance.
5966 Returns true if this created a new literal pool entry. */
5967
5968 static int
5969 s390_split_branches (void)
5970 {
5971 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
5972 int new_literal = 0, ret;
5973 rtx insn, pat, tmp, target;
5974 rtx *label;
5975
5976 /* We need correct insn addresses. */
5977
5978 shorten_branches (get_insns ());
5979
5980 /* Find all branches that exceed 64KB, and split them. */
5981
5982 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5983 {
5984 if (GET_CODE (insn) != JUMP_INSN)
5985 continue;
5986
5987 pat = PATTERN (insn);
5988 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
5989 pat = XVECEXP (pat, 0, 0);
5990 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
5991 continue;
5992
5993 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
5994 {
5995 label = &SET_SRC (pat);
5996 }
5997 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
5998 {
5999 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6000 label = &XEXP (SET_SRC (pat), 1);
6001 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6002 label = &XEXP (SET_SRC (pat), 2);
6003 else
6004 continue;
6005 }
6006 else
6007 continue;
6008
6009 if (get_attr_length (insn) <= 4)
6010 continue;
6011
6012 /* We are going to use the return register as scratch register,
6013 make sure it will be saved/restored by the prologue/epilogue. */
6014 cfun_frame_layout.save_return_addr_p = 1;
6015
6016 if (!flag_pic)
6017 {
6018 new_literal = 1;
6019 tmp = force_const_mem (Pmode, *label);
6020 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
6021 INSN_ADDRESSES_NEW (tmp, -1);
6022 annotate_constant_pool_refs (&PATTERN (tmp));
6023
6024 target = temp_reg;
6025 }
6026 else
6027 {
6028 new_literal = 1;
6029 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6030 UNSPEC_LTREL_OFFSET);
6031 target = gen_rtx_CONST (Pmode, target);
6032 target = force_const_mem (Pmode, target);
6033 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6034 INSN_ADDRESSES_NEW (tmp, -1);
6035 annotate_constant_pool_refs (&PATTERN (tmp));
6036
6037 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6038 cfun->machine->base_reg),
6039 UNSPEC_LTREL_BASE);
6040 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6041 }
6042
6043 ret = validate_change (insn, label, target, 0);
6044 gcc_assert (ret);
6045 }
6046
6047 return new_literal;
6048 }
6049
6050
6051 /* Find an annotated literal pool symbol referenced in RTX X,
6052 and store it at REF. Will abort if X contains references to
6053 more than one such pool symbol; multiple references to the same
6054 symbol are allowed, however.
6055
6056 The rtx pointed to by REF must be initialized to NULL_RTX
6057 by the caller before calling this routine. */
6058
6059 static void
6060 find_constant_pool_ref (rtx x, rtx *ref)
6061 {
6062 int i, j;
6063 const char *fmt;
6064
6065 /* Ignore LTREL_BASE references. */
6066 if (GET_CODE (x) == UNSPEC
6067 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6068 return;
6069 /* Likewise POOL_ENTRY insns. */
6070 if (GET_CODE (x) == UNSPEC_VOLATILE
6071 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6072 return;
6073
6074 gcc_assert (GET_CODE (x) != SYMBOL_REF
6075 || !CONSTANT_POOL_ADDRESS_P (x));
6076
6077 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6078 {
6079 rtx sym = XVECEXP (x, 0, 0);
6080 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6081 && CONSTANT_POOL_ADDRESS_P (sym));
6082
6083 if (*ref == NULL_RTX)
6084 *ref = sym;
6085 else
6086 gcc_assert (*ref == sym);
6087
6088 return;
6089 }
6090
6091 fmt = GET_RTX_FORMAT (GET_CODE (x));
6092 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6093 {
6094 if (fmt[i] == 'e')
6095 {
6096 find_constant_pool_ref (XEXP (x, i), ref);
6097 }
6098 else if (fmt[i] == 'E')
6099 {
6100 for (j = 0; j < XVECLEN (x, i); j++)
6101 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6102 }
6103 }
6104 }
6105
6106 /* Replace every reference to the annotated literal pool
6107 symbol REF in X by its base plus OFFSET. */
6108
6109 static void
6110 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6111 {
6112 int i, j;
6113 const char *fmt;
6114
6115 gcc_assert (*x != ref);
6116
6117 if (GET_CODE (*x) == UNSPEC
6118 && XINT (*x, 1) == UNSPEC_LTREF
6119 && XVECEXP (*x, 0, 0) == ref)
6120 {
6121 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6122 return;
6123 }
6124
6125 if (GET_CODE (*x) == PLUS
6126 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6127 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6128 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6129 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6130 {
6131 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6132 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6133 return;
6134 }
6135
6136 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6137 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6138 {
6139 if (fmt[i] == 'e')
6140 {
6141 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6142 }
6143 else if (fmt[i] == 'E')
6144 {
6145 for (j = 0; j < XVECLEN (*x, i); j++)
6146 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6147 }
6148 }
6149 }
6150
6151 /* Check whether X contains an UNSPEC_LTREL_BASE.
6152 Return its constant pool symbol if found, NULL_RTX otherwise. */
6153
6154 static rtx
6155 find_ltrel_base (rtx x)
6156 {
6157 int i, j;
6158 const char *fmt;
6159
6160 if (GET_CODE (x) == UNSPEC
6161 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6162 return XVECEXP (x, 0, 0);
6163
6164 fmt = GET_RTX_FORMAT (GET_CODE (x));
6165 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6166 {
6167 if (fmt[i] == 'e')
6168 {
6169 rtx fnd = find_ltrel_base (XEXP (x, i));
6170 if (fnd)
6171 return fnd;
6172 }
6173 else if (fmt[i] == 'E')
6174 {
6175 for (j = 0; j < XVECLEN (x, i); j++)
6176 {
6177 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6178 if (fnd)
6179 return fnd;
6180 }
6181 }
6182 }
6183
6184 return NULL_RTX;
6185 }
6186
6187 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6188
6189 static void
6190 replace_ltrel_base (rtx *x)
6191 {
6192 int i, j;
6193 const char *fmt;
6194
6195 if (GET_CODE (*x) == UNSPEC
6196 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6197 {
6198 *x = XVECEXP (*x, 0, 1);
6199 return;
6200 }
6201
6202 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6203 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6204 {
6205 if (fmt[i] == 'e')
6206 {
6207 replace_ltrel_base (&XEXP (*x, i));
6208 }
6209 else if (fmt[i] == 'E')
6210 {
6211 for (j = 0; j < XVECLEN (*x, i); j++)
6212 replace_ltrel_base (&XVECEXP (*x, i, j));
6213 }
6214 }
6215 }
6216
6217
6218 /* We keep a list of constants which we have to add to internal
6219 constant tables in the middle of large functions. */
6220
6221 #define NR_C_MODES 11
6222 enum machine_mode constant_modes[NR_C_MODES] =
6223 {
6224 TFmode, TImode, TDmode,
6225 DFmode, DImode, DDmode,
6226 SFmode, SImode, SDmode,
6227 HImode,
6228 QImode
6229 };
6230
6231 struct constant
6232 {
6233 struct constant *next;
6234 rtx value;
6235 rtx label;
6236 };
6237
6238 struct constant_pool
6239 {
6240 struct constant_pool *next;
6241 rtx first_insn;
6242 rtx pool_insn;
6243 bitmap insns;
6244 rtx emit_pool_after;
6245
6246 struct constant *constants[NR_C_MODES];
6247 struct constant *execute;
6248 rtx label;
6249 int size;
6250 };
6251
6252 /* Allocate new constant_pool structure. */
6253
6254 static struct constant_pool *
6255 s390_alloc_pool (void)
6256 {
6257 struct constant_pool *pool;
6258 int i;
6259
6260 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6261 pool->next = NULL;
6262 for (i = 0; i < NR_C_MODES; i++)
6263 pool->constants[i] = NULL;
6264
6265 pool->execute = NULL;
6266 pool->label = gen_label_rtx ();
6267 pool->first_insn = NULL_RTX;
6268 pool->pool_insn = NULL_RTX;
6269 pool->insns = BITMAP_ALLOC (NULL);
6270 pool->size = 0;
6271 pool->emit_pool_after = NULL_RTX;
6272
6273 return pool;
6274 }
6275
6276 /* Create new constant pool covering instructions starting at INSN
6277 and chain it to the end of POOL_LIST. */
6278
6279 static struct constant_pool *
6280 s390_start_pool (struct constant_pool **pool_list, rtx insn)
6281 {
6282 struct constant_pool *pool, **prev;
6283
6284 pool = s390_alloc_pool ();
6285 pool->first_insn = insn;
6286
6287 for (prev = pool_list; *prev; prev = &(*prev)->next)
6288 ;
6289 *prev = pool;
6290
6291 return pool;
6292 }
6293
6294 /* End range of instructions covered by POOL at INSN and emit
6295 placeholder insn representing the pool. */
6296
6297 static void
6298 s390_end_pool (struct constant_pool *pool, rtx insn)
6299 {
6300 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6301
6302 if (!insn)
6303 insn = get_last_insn ();
6304
6305 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6306 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6307 }
6308
6309 /* Add INSN to the list of insns covered by POOL. */
6310
6311 static void
6312 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6313 {
6314 bitmap_set_bit (pool->insns, INSN_UID (insn));
6315 }
6316
6317 /* Return pool out of POOL_LIST that covers INSN. */
6318
6319 static struct constant_pool *
6320 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6321 {
6322 struct constant_pool *pool;
6323
6324 for (pool = pool_list; pool; pool = pool->next)
6325 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6326 break;
6327
6328 return pool;
6329 }
6330
6331 /* Add constant VAL of mode MODE to the constant pool POOL. */
6332
6333 static void
6334 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6335 {
6336 struct constant *c;
6337 int i;
6338
6339 for (i = 0; i < NR_C_MODES; i++)
6340 if (constant_modes[i] == mode)
6341 break;
6342 gcc_assert (i != NR_C_MODES);
6343
6344 for (c = pool->constants[i]; c != NULL; c = c->next)
6345 if (rtx_equal_p (val, c->value))
6346 break;
6347
6348 if (c == NULL)
6349 {
6350 c = (struct constant *) xmalloc (sizeof *c);
6351 c->value = val;
6352 c->label = gen_label_rtx ();
6353 c->next = pool->constants[i];
6354 pool->constants[i] = c;
6355 pool->size += GET_MODE_SIZE (mode);
6356 }
6357 }
6358
6359 /* Return an rtx that represents the offset of X from the start of
6360 pool POOL. */
6361
6362 static rtx
6363 s390_pool_offset (struct constant_pool *pool, rtx x)
6364 {
6365 rtx label;
6366
6367 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6368 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6369 UNSPEC_POOL_OFFSET);
6370 return gen_rtx_CONST (GET_MODE (x), x);
6371 }
6372
6373 /* Find constant VAL of mode MODE in the constant pool POOL.
6374 Return an RTX describing the distance from the start of
6375 the pool to the location of the new constant. */
6376
6377 static rtx
6378 s390_find_constant (struct constant_pool *pool, rtx val,
6379 enum machine_mode mode)
6380 {
6381 struct constant *c;
6382 int i;
6383
6384 for (i = 0; i < NR_C_MODES; i++)
6385 if (constant_modes[i] == mode)
6386 break;
6387 gcc_assert (i != NR_C_MODES);
6388
6389 for (c = pool->constants[i]; c != NULL; c = c->next)
6390 if (rtx_equal_p (val, c->value))
6391 break;
6392
6393 gcc_assert (c);
6394
6395 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6396 }
6397
6398 /* Check whether INSN is an execute. Return the label_ref to its
6399 execute target template if so, NULL_RTX otherwise. */
6400
6401 static rtx
6402 s390_execute_label (rtx insn)
6403 {
6404 if (GET_CODE (insn) == INSN
6405 && GET_CODE (PATTERN (insn)) == PARALLEL
6406 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6407 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6408 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6409
6410 return NULL_RTX;
6411 }
6412
6413 /* Add execute target for INSN to the constant pool POOL. */
6414
6415 static void
6416 s390_add_execute (struct constant_pool *pool, rtx insn)
6417 {
6418 struct constant *c;
6419
6420 for (c = pool->execute; c != NULL; c = c->next)
6421 if (INSN_UID (insn) == INSN_UID (c->value))
6422 break;
6423
6424 if (c == NULL)
6425 {
6426 c = (struct constant *) xmalloc (sizeof *c);
6427 c->value = insn;
6428 c->label = gen_label_rtx ();
6429 c->next = pool->execute;
6430 pool->execute = c;
6431 pool->size += 6;
6432 }
6433 }
6434
6435 /* Find execute target for INSN in the constant pool POOL.
6436 Return an RTX describing the distance from the start of
6437 the pool to the location of the execute target. */
6438
6439 static rtx
6440 s390_find_execute (struct constant_pool *pool, rtx insn)
6441 {
6442 struct constant *c;
6443
6444 for (c = pool->execute; c != NULL; c = c->next)
6445 if (INSN_UID (insn) == INSN_UID (c->value))
6446 break;
6447
6448 gcc_assert (c);
6449
6450 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6451 }
6452
6453 /* For an execute INSN, extract the execute target template. */
6454
6455 static rtx
6456 s390_execute_target (rtx insn)
6457 {
6458 rtx pattern = PATTERN (insn);
6459 gcc_assert (s390_execute_label (insn));
6460
6461 if (XVECLEN (pattern, 0) == 2)
6462 {
6463 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6464 }
6465 else
6466 {
6467 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6468 int i;
6469
6470 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6471 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6472
6473 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6474 }
6475
6476 return pattern;
6477 }
6478
6479 /* Indicate that INSN cannot be duplicated. This is the case for
6480 execute insns that carry a unique label. */
6481
6482 static bool
6483 s390_cannot_copy_insn_p (rtx insn)
6484 {
6485 rtx label = s390_execute_label (insn);
6486 return label && label != const0_rtx;
6487 }
6488
6489 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6490 do not emit the pool base label. */
6491
6492 static void
6493 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6494 {
6495 struct constant *c;
6496 rtx insn = pool->pool_insn;
6497 int i;
6498
6499 /* Switch to rodata section. */
6500 if (TARGET_CPU_ZARCH)
6501 {
6502 insn = emit_insn_after (gen_pool_section_start (), insn);
6503 INSN_ADDRESSES_NEW (insn, -1);
6504 }
6505
6506 /* Ensure minimum pool alignment. */
6507 if (TARGET_CPU_ZARCH)
6508 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6509 else
6510 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6511 INSN_ADDRESSES_NEW (insn, -1);
6512
6513 /* Emit pool base label. */
6514 if (!remote_label)
6515 {
6516 insn = emit_label_after (pool->label, insn);
6517 INSN_ADDRESSES_NEW (insn, -1);
6518 }
6519
6520 /* Dump constants in descending alignment requirement order,
6521 ensuring proper alignment for every constant. */
6522 for (i = 0; i < NR_C_MODES; i++)
6523 for (c = pool->constants[i]; c; c = c->next)
6524 {
6525 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6526 rtx value = copy_rtx (c->value);
6527 if (GET_CODE (value) == CONST
6528 && GET_CODE (XEXP (value, 0)) == UNSPEC
6529 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6530 && XVECLEN (XEXP (value, 0), 0) == 1)
6531 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6532
6533 insn = emit_label_after (c->label, insn);
6534 INSN_ADDRESSES_NEW (insn, -1);
6535
6536 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6537 gen_rtvec (1, value),
6538 UNSPECV_POOL_ENTRY);
6539 insn = emit_insn_after (value, insn);
6540 INSN_ADDRESSES_NEW (insn, -1);
6541 }
6542
6543 /* Ensure minimum alignment for instructions. */
6544 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6545 INSN_ADDRESSES_NEW (insn, -1);
6546
6547 /* Output in-pool execute template insns. */
6548 for (c = pool->execute; c; c = c->next)
6549 {
6550 insn = emit_label_after (c->label, insn);
6551 INSN_ADDRESSES_NEW (insn, -1);
6552
6553 insn = emit_insn_after (s390_execute_target (c->value), insn);
6554 INSN_ADDRESSES_NEW (insn, -1);
6555 }
6556
6557 /* Switch back to previous section. */
6558 if (TARGET_CPU_ZARCH)
6559 {
6560 insn = emit_insn_after (gen_pool_section_end (), insn);
6561 INSN_ADDRESSES_NEW (insn, -1);
6562 }
6563
6564 insn = emit_barrier_after (insn);
6565 INSN_ADDRESSES_NEW (insn, -1);
6566
6567 /* Remove placeholder insn. */
6568 remove_insn (pool->pool_insn);
6569 }
6570
6571 /* Free all memory used by POOL. */
6572
6573 static void
6574 s390_free_pool (struct constant_pool *pool)
6575 {
6576 struct constant *c, *next;
6577 int i;
6578
6579 for (i = 0; i < NR_C_MODES; i++)
6580 for (c = pool->constants[i]; c; c = next)
6581 {
6582 next = c->next;
6583 free (c);
6584 }
6585
6586 for (c = pool->execute; c; c = next)
6587 {
6588 next = c->next;
6589 free (c);
6590 }
6591
6592 BITMAP_FREE (pool->insns);
6593 free (pool);
6594 }
6595
6596
6597 /* Collect main literal pool. Return NULL on overflow. */
6598
6599 static struct constant_pool *
6600 s390_mainpool_start (void)
6601 {
6602 struct constant_pool *pool;
6603 rtx insn;
6604
6605 pool = s390_alloc_pool ();
6606
6607 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6608 {
6609 if (GET_CODE (insn) == INSN
6610 && GET_CODE (PATTERN (insn)) == SET
6611 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6612 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6613 {
6614 gcc_assert (!pool->pool_insn);
6615 pool->pool_insn = insn;
6616 }
6617
6618 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6619 {
6620 s390_add_execute (pool, insn);
6621 }
6622 else if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
6623 {
6624 rtx pool_ref = NULL_RTX;
6625 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6626 if (pool_ref)
6627 {
6628 rtx constant = get_pool_constant (pool_ref);
6629 enum machine_mode mode = get_pool_mode (pool_ref);
6630 s390_add_constant (pool, constant, mode);
6631 }
6632 }
6633
6634 /* If hot/cold partitioning is enabled we have to make sure that
6635 the literal pool is emitted in the same section where the
6636 initialization of the literal pool base pointer takes place.
6637 emit_pool_after is only used in the non-overflow case on non
6638 Z cpus where we can emit the literal pool at the end of the
6639 function body within the text section. */
6640 if (NOTE_P (insn)
6641 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6642 && !pool->emit_pool_after)
6643 pool->emit_pool_after = PREV_INSN (insn);
6644 }
6645
6646 gcc_assert (pool->pool_insn || pool->size == 0);
6647
6648 if (pool->size >= 4096)
6649 {
6650 /* We're going to chunkify the pool, so remove the main
6651 pool placeholder insn. */
6652 remove_insn (pool->pool_insn);
6653
6654 s390_free_pool (pool);
6655 pool = NULL;
6656 }
6657
6658 /* If the functions ends with the section where the literal pool
6659 should be emitted set the marker to its end. */
6660 if (pool && !pool->emit_pool_after)
6661 pool->emit_pool_after = get_last_insn ();
6662
6663 return pool;
6664 }
6665
6666 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6667 Modify the current function to output the pool constants as well as
6668 the pool register setup instruction. */
6669
6670 static void
6671 s390_mainpool_finish (struct constant_pool *pool)
6672 {
6673 rtx base_reg = cfun->machine->base_reg;
6674 rtx insn;
6675
6676 /* If the pool is empty, we're done. */
6677 if (pool->size == 0)
6678 {
6679 /* We don't actually need a base register after all. */
6680 cfun->machine->base_reg = NULL_RTX;
6681
6682 if (pool->pool_insn)
6683 remove_insn (pool->pool_insn);
6684 s390_free_pool (pool);
6685 return;
6686 }
6687
6688 /* We need correct insn addresses. */
6689 shorten_branches (get_insns ());
6690
6691 /* On zSeries, we use a LARL to load the pool register. The pool is
6692 located in the .rodata section, so we emit it after the function. */
6693 if (TARGET_CPU_ZARCH)
6694 {
6695 insn = gen_main_base_64 (base_reg, pool->label);
6696 insn = emit_insn_after (insn, pool->pool_insn);
6697 INSN_ADDRESSES_NEW (insn, -1);
6698 remove_insn (pool->pool_insn);
6699
6700 insn = get_last_insn ();
6701 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6702 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6703
6704 s390_dump_pool (pool, 0);
6705 }
6706
6707 /* On S/390, if the total size of the function's code plus literal pool
6708 does not exceed 4096 bytes, we use BASR to set up a function base
6709 pointer, and emit the literal pool at the end of the function. */
6710 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6711 + pool->size + 8 /* alignment slop */ < 4096)
6712 {
6713 insn = gen_main_base_31_small (base_reg, pool->label);
6714 insn = emit_insn_after (insn, pool->pool_insn);
6715 INSN_ADDRESSES_NEW (insn, -1);
6716 remove_insn (pool->pool_insn);
6717
6718 insn = emit_label_after (pool->label, insn);
6719 INSN_ADDRESSES_NEW (insn, -1);
6720
6721 /* emit_pool_after will be set by s390_mainpool_start to the
6722 last insn of the section where the literal pool should be
6723 emitted. */
6724 insn = pool->emit_pool_after;
6725
6726 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6727 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6728
6729 s390_dump_pool (pool, 1);
6730 }
6731
6732 /* Otherwise, we emit an inline literal pool and use BASR to branch
6733 over it, setting up the pool register at the same time. */
6734 else
6735 {
6736 rtx pool_end = gen_label_rtx ();
6737
6738 insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
6739 insn = emit_jump_insn_after (insn, pool->pool_insn);
6740 JUMP_LABEL (insn) = pool_end;
6741 INSN_ADDRESSES_NEW (insn, -1);
6742 remove_insn (pool->pool_insn);
6743
6744 insn = emit_label_after (pool->label, insn);
6745 INSN_ADDRESSES_NEW (insn, -1);
6746
6747 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6748 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6749
6750 insn = emit_label_after (pool_end, pool->pool_insn);
6751 INSN_ADDRESSES_NEW (insn, -1);
6752
6753 s390_dump_pool (pool, 1);
6754 }
6755
6756
6757 /* Replace all literal pool references. */
6758
6759 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6760 {
6761 if (INSN_P (insn))
6762 replace_ltrel_base (&PATTERN (insn));
6763
6764 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
6765 {
6766 rtx addr, pool_ref = NULL_RTX;
6767 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6768 if (pool_ref)
6769 {
6770 if (s390_execute_label (insn))
6771 addr = s390_find_execute (pool, insn);
6772 else
6773 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6774 get_pool_mode (pool_ref));
6775
6776 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6777 INSN_CODE (insn) = -1;
6778 }
6779 }
6780 }
6781
6782
6783 /* Free the pool. */
6784 s390_free_pool (pool);
6785 }
6786
6787 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6788 We have decided we cannot use this pool, so revert all changes
6789 to the current function that were done by s390_mainpool_start. */
6790 static void
6791 s390_mainpool_cancel (struct constant_pool *pool)
6792 {
6793 /* We didn't actually change the instruction stream, so simply
6794 free the pool memory. */
6795 s390_free_pool (pool);
6796 }
6797
6798
6799 /* Chunkify the literal pool. */
6800
6801 #define S390_POOL_CHUNK_MIN 0xc00
6802 #define S390_POOL_CHUNK_MAX 0xe00
6803
6804 static struct constant_pool *
6805 s390_chunkify_start (void)
6806 {
6807 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6808 int extra_size = 0;
6809 bitmap far_labels;
6810 rtx pending_ltrel = NULL_RTX;
6811 rtx insn;
6812
6813 rtx (*gen_reload_base) (rtx, rtx) =
6814 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6815
6816
6817 /* We need correct insn addresses. */
6818
6819 shorten_branches (get_insns ());
6820
6821 /* Scan all insns and move literals to pool chunks. */
6822
6823 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6824 {
6825 bool section_switch_p = false;
6826
6827 /* Check for pending LTREL_BASE. */
6828 if (INSN_P (insn))
6829 {
6830 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6831 if (ltrel_base)
6832 {
6833 gcc_assert (ltrel_base == pending_ltrel);
6834 pending_ltrel = NULL_RTX;
6835 }
6836 }
6837
6838 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6839 {
6840 if (!curr_pool)
6841 curr_pool = s390_start_pool (&pool_list, insn);
6842
6843 s390_add_execute (curr_pool, insn);
6844 s390_add_pool_insn (curr_pool, insn);
6845 }
6846 else if (GET_CODE (insn) == INSN || CALL_P (insn))
6847 {
6848 rtx pool_ref = NULL_RTX;
6849 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6850 if (pool_ref)
6851 {
6852 rtx constant = get_pool_constant (pool_ref);
6853 enum machine_mode mode = get_pool_mode (pool_ref);
6854
6855 if (!curr_pool)
6856 curr_pool = s390_start_pool (&pool_list, insn);
6857
6858 s390_add_constant (curr_pool, constant, mode);
6859 s390_add_pool_insn (curr_pool, insn);
6860
6861 /* Don't split the pool chunk between a LTREL_OFFSET load
6862 and the corresponding LTREL_BASE. */
6863 if (GET_CODE (constant) == CONST
6864 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6865 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6866 {
6867 gcc_assert (!pending_ltrel);
6868 pending_ltrel = pool_ref;
6869 }
6870 }
6871 }
6872
6873 if (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CODE_LABEL)
6874 {
6875 if (curr_pool)
6876 s390_add_pool_insn (curr_pool, insn);
6877 /* An LTREL_BASE must follow within the same basic block. */
6878 gcc_assert (!pending_ltrel);
6879 }
6880
6881 if (NOTE_P (insn))
6882 switch (NOTE_KIND (insn))
6883 {
6884 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6885 section_switch_p = true;
6886 break;
6887 case NOTE_INSN_VAR_LOCATION:
6888 case NOTE_INSN_CALL_ARG_LOCATION:
6889 continue;
6890 default:
6891 break;
6892 }
6893
6894 if (!curr_pool
6895 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6896 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6897 continue;
6898
6899 if (TARGET_CPU_ZARCH)
6900 {
6901 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6902 continue;
6903
6904 s390_end_pool (curr_pool, NULL_RTX);
6905 curr_pool = NULL;
6906 }
6907 else
6908 {
6909 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6910 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6911 + extra_size;
6912
6913 /* We will later have to insert base register reload insns.
6914 Those will have an effect on code size, which we need to
6915 consider here. This calculation makes rather pessimistic
6916 worst-case assumptions. */
6917 if (GET_CODE (insn) == CODE_LABEL)
6918 extra_size += 6;
6919
6920 if (chunk_size < S390_POOL_CHUNK_MIN
6921 && curr_pool->size < S390_POOL_CHUNK_MIN
6922 && !section_switch_p)
6923 continue;
6924
6925 /* Pool chunks can only be inserted after BARRIERs ... */
6926 if (GET_CODE (insn) == BARRIER)
6927 {
6928 s390_end_pool (curr_pool, insn);
6929 curr_pool = NULL;
6930 extra_size = 0;
6931 }
6932
6933 /* ... so if we don't find one in time, create one. */
6934 else if (chunk_size > S390_POOL_CHUNK_MAX
6935 || curr_pool->size > S390_POOL_CHUNK_MAX
6936 || section_switch_p)
6937 {
6938 rtx label, jump, barrier, next, prev;
6939
6940 if (!section_switch_p)
6941 {
6942 /* We can insert the barrier only after a 'real' insn. */
6943 if (GET_CODE (insn) != INSN && GET_CODE (insn) != CALL_INSN)
6944 continue;
6945 if (get_attr_length (insn) == 0)
6946 continue;
6947 /* Don't separate LTREL_BASE from the corresponding
6948 LTREL_OFFSET load. */
6949 if (pending_ltrel)
6950 continue;
6951 next = insn;
6952 do
6953 {
6954 insn = next;
6955 next = NEXT_INSN (insn);
6956 }
6957 while (next
6958 && NOTE_P (next)
6959 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
6960 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
6961 }
6962 else
6963 {
6964 gcc_assert (!pending_ltrel);
6965
6966 /* The old pool has to end before the section switch
6967 note in order to make it part of the current
6968 section. */
6969 insn = PREV_INSN (insn);
6970 }
6971
6972 label = gen_label_rtx ();
6973 prev = insn;
6974 if (prev && NOTE_P (prev))
6975 prev = prev_nonnote_insn (prev);
6976 if (prev)
6977 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
6978 INSN_LOCATION (prev));
6979 else
6980 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
6981 barrier = emit_barrier_after (jump);
6982 insn = emit_label_after (label, barrier);
6983 JUMP_LABEL (jump) = label;
6984 LABEL_NUSES (label) = 1;
6985
6986 INSN_ADDRESSES_NEW (jump, -1);
6987 INSN_ADDRESSES_NEW (barrier, -1);
6988 INSN_ADDRESSES_NEW (insn, -1);
6989
6990 s390_end_pool (curr_pool, barrier);
6991 curr_pool = NULL;
6992 extra_size = 0;
6993 }
6994 }
6995 }
6996
6997 if (curr_pool)
6998 s390_end_pool (curr_pool, NULL_RTX);
6999 gcc_assert (!pending_ltrel);
7000
7001 /* Find all labels that are branched into
7002 from an insn belonging to a different chunk. */
7003
7004 far_labels = BITMAP_ALLOC (NULL);
7005
7006 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7007 {
7008 /* Labels marked with LABEL_PRESERVE_P can be target
7009 of non-local jumps, so we have to mark them.
7010 The same holds for named labels.
7011
7012 Don't do that, however, if it is the label before
7013 a jump table. */
7014
7015 if (GET_CODE (insn) == CODE_LABEL
7016 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7017 {
7018 rtx vec_insn = next_real_insn (insn);
7019 rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
7020 PATTERN (vec_insn) : NULL_RTX;
7021 if (!vec_pat
7022 || !(GET_CODE (vec_pat) == ADDR_VEC
7023 || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
7024 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7025 }
7026
7027 /* If we have a direct jump (conditional or unconditional)
7028 or a casesi jump, check all potential targets. */
7029 else if (GET_CODE (insn) == JUMP_INSN)
7030 {
7031 rtx pat = PATTERN (insn);
7032 if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
7033 pat = XVECEXP (pat, 0, 0);
7034
7035 if (GET_CODE (pat) == SET)
7036 {
7037 rtx label = JUMP_LABEL (insn);
7038 if (label)
7039 {
7040 if (s390_find_pool (pool_list, label)
7041 != s390_find_pool (pool_list, insn))
7042 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7043 }
7044 }
7045 else if (GET_CODE (pat) == PARALLEL
7046 && XVECLEN (pat, 0) == 2
7047 && GET_CODE (XVECEXP (pat, 0, 0)) == SET
7048 && GET_CODE (XVECEXP (pat, 0, 1)) == USE
7049 && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
7050 {
7051 /* Find the jump table used by this casesi jump. */
7052 rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
7053 rtx vec_insn = next_real_insn (vec_label);
7054 rtx vec_pat = vec_insn && GET_CODE (vec_insn) == JUMP_INSN ?
7055 PATTERN (vec_insn) : NULL_RTX;
7056 if (vec_pat
7057 && (GET_CODE (vec_pat) == ADDR_VEC
7058 || GET_CODE (vec_pat) == ADDR_DIFF_VEC))
7059 {
7060 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7061
7062 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7063 {
7064 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7065
7066 if (s390_find_pool (pool_list, label)
7067 != s390_find_pool (pool_list, insn))
7068 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7069 }
7070 }
7071 }
7072 }
7073 }
7074
7075 /* Insert base register reload insns before every pool. */
7076
7077 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7078 {
7079 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7080 curr_pool->label);
7081 rtx insn = curr_pool->first_insn;
7082 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7083 }
7084
7085 /* Insert base register reload insns at every far label. */
7086
7087 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7088 if (GET_CODE (insn) == CODE_LABEL
7089 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7090 {
7091 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7092 if (pool)
7093 {
7094 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7095 pool->label);
7096 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7097 }
7098 }
7099
7100
7101 BITMAP_FREE (far_labels);
7102
7103
7104 /* Recompute insn addresses. */
7105
7106 init_insn_lengths ();
7107 shorten_branches (get_insns ());
7108
7109 return pool_list;
7110 }
7111
7112 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7113 After we have decided to use this list, finish implementing
7114 all changes to the current function as required. */
7115
7116 static void
7117 s390_chunkify_finish (struct constant_pool *pool_list)
7118 {
7119 struct constant_pool *curr_pool = NULL;
7120 rtx insn;
7121
7122
7123 /* Replace all literal pool references. */
7124
7125 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7126 {
7127 if (INSN_P (insn))
7128 replace_ltrel_base (&PATTERN (insn));
7129
7130 curr_pool = s390_find_pool (pool_list, insn);
7131 if (!curr_pool)
7132 continue;
7133
7134 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
7135 {
7136 rtx addr, pool_ref = NULL_RTX;
7137 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7138 if (pool_ref)
7139 {
7140 if (s390_execute_label (insn))
7141 addr = s390_find_execute (curr_pool, insn);
7142 else
7143 addr = s390_find_constant (curr_pool,
7144 get_pool_constant (pool_ref),
7145 get_pool_mode (pool_ref));
7146
7147 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7148 INSN_CODE (insn) = -1;
7149 }
7150 }
7151 }
7152
7153 /* Dump out all literal pools. */
7154
7155 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7156 s390_dump_pool (curr_pool, 0);
7157
7158 /* Free pool list. */
7159
7160 while (pool_list)
7161 {
7162 struct constant_pool *next = pool_list->next;
7163 s390_free_pool (pool_list);
7164 pool_list = next;
7165 }
7166 }
7167
7168 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7169 We have decided we cannot use this list, so revert all changes
7170 to the current function that were done by s390_chunkify_start. */
7171
7172 static void
7173 s390_chunkify_cancel (struct constant_pool *pool_list)
7174 {
7175 struct constant_pool *curr_pool = NULL;
7176 rtx insn;
7177
7178 /* Remove all pool placeholder insns. */
7179
7180 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7181 {
7182 /* Did we insert an extra barrier? Remove it. */
7183 rtx barrier = PREV_INSN (curr_pool->pool_insn);
7184 rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
7185 rtx label = NEXT_INSN (curr_pool->pool_insn);
7186
7187 if (jump && GET_CODE (jump) == JUMP_INSN
7188 && barrier && GET_CODE (barrier) == BARRIER
7189 && label && GET_CODE (label) == CODE_LABEL
7190 && GET_CODE (PATTERN (jump)) == SET
7191 && SET_DEST (PATTERN (jump)) == pc_rtx
7192 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7193 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7194 {
7195 remove_insn (jump);
7196 remove_insn (barrier);
7197 remove_insn (label);
7198 }
7199
7200 remove_insn (curr_pool->pool_insn);
7201 }
7202
7203 /* Remove all base register reload insns. */
7204
7205 for (insn = get_insns (); insn; )
7206 {
7207 rtx next_insn = NEXT_INSN (insn);
7208
7209 if (GET_CODE (insn) == INSN
7210 && GET_CODE (PATTERN (insn)) == SET
7211 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7212 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7213 remove_insn (insn);
7214
7215 insn = next_insn;
7216 }
7217
7218 /* Free pool list. */
7219
7220 while (pool_list)
7221 {
7222 struct constant_pool *next = pool_list->next;
7223 s390_free_pool (pool_list);
7224 pool_list = next;
7225 }
7226 }
7227
7228 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7229
7230 void
7231 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7232 {
7233 REAL_VALUE_TYPE r;
7234
7235 switch (GET_MODE_CLASS (mode))
7236 {
7237 case MODE_FLOAT:
7238 case MODE_DECIMAL_FLOAT:
7239 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7240
7241 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7242 assemble_real (r, mode, align);
7243 break;
7244
7245 case MODE_INT:
7246 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7247 mark_symbol_refs_as_used (exp);
7248 break;
7249
7250 default:
7251 gcc_unreachable ();
7252 }
7253 }
7254
7255
7256 /* Return an RTL expression representing the value of the return address
7257 for the frame COUNT steps up from the current frame. FRAME is the
7258 frame pointer of that frame. */
7259
7260 rtx
7261 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7262 {
7263 int offset;
7264 rtx addr;
7265
7266 /* Without backchain, we fail for all but the current frame. */
7267
7268 if (!TARGET_BACKCHAIN && count > 0)
7269 return NULL_RTX;
7270
7271 /* For the current frame, we need to make sure the initial
7272 value of RETURN_REGNUM is actually saved. */
7273
7274 if (count == 0)
7275 {
7276 /* On non-z architectures branch splitting could overwrite r14. */
7277 if (TARGET_CPU_ZARCH)
7278 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7279 else
7280 {
7281 cfun_frame_layout.save_return_addr_p = true;
7282 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7283 }
7284 }
7285
7286 if (TARGET_PACKED_STACK)
7287 offset = -2 * UNITS_PER_LONG;
7288 else
7289 offset = RETURN_REGNUM * UNITS_PER_LONG;
7290
7291 addr = plus_constant (Pmode, frame, offset);
7292 addr = memory_address (Pmode, addr);
7293 return gen_rtx_MEM (Pmode, addr);
7294 }
7295
7296 /* Return an RTL expression representing the back chain stored in
7297 the current stack frame. */
7298
7299 rtx
7300 s390_back_chain_rtx (void)
7301 {
7302 rtx chain;
7303
7304 gcc_assert (TARGET_BACKCHAIN);
7305
7306 if (TARGET_PACKED_STACK)
7307 chain = plus_constant (Pmode, stack_pointer_rtx,
7308 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7309 else
7310 chain = stack_pointer_rtx;
7311
7312 chain = gen_rtx_MEM (Pmode, chain);
7313 return chain;
7314 }
7315
7316 /* Find first call clobbered register unused in a function.
7317 This could be used as base register in a leaf function
7318 or for holding the return address before epilogue. */
7319
7320 static int
7321 find_unused_clobbered_reg (void)
7322 {
7323 int i;
7324 for (i = 0; i < 6; i++)
7325 if (!df_regs_ever_live_p (i))
7326 return i;
7327 return 0;
7328 }
7329
7330
7331 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7332 clobbered hard regs in SETREG. */
7333
7334 static void
7335 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7336 {
7337 int *regs_ever_clobbered = (int *)data;
7338 unsigned int i, regno;
7339 enum machine_mode mode = GET_MODE (setreg);
7340
7341 if (GET_CODE (setreg) == SUBREG)
7342 {
7343 rtx inner = SUBREG_REG (setreg);
7344 if (!GENERAL_REG_P (inner))
7345 return;
7346 regno = subreg_regno (setreg);
7347 }
7348 else if (GENERAL_REG_P (setreg))
7349 regno = REGNO (setreg);
7350 else
7351 return;
7352
7353 for (i = regno;
7354 i < regno + HARD_REGNO_NREGS (regno, mode);
7355 i++)
7356 regs_ever_clobbered[i] = 1;
7357 }
7358
7359 /* Walks through all basic blocks of the current function looking
7360 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7361 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7362 each of those regs. */
7363
7364 static void
7365 s390_regs_ever_clobbered (int *regs_ever_clobbered)
7366 {
7367 basic_block cur_bb;
7368 rtx cur_insn;
7369 unsigned int i;
7370
7371 memset (regs_ever_clobbered, 0, 16 * sizeof (int));
7372
7373 /* For non-leaf functions we have to consider all call clobbered regs to be
7374 clobbered. */
7375 if (!crtl->is_leaf)
7376 {
7377 for (i = 0; i < 16; i++)
7378 regs_ever_clobbered[i] = call_really_used_regs[i];
7379 }
7380
7381 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7382 this work is done by liveness analysis (mark_regs_live_at_end).
7383 Special care is needed for functions containing landing pads. Landing pads
7384 may use the eh registers, but the code which sets these registers is not
7385 contained in that function. Hence s390_regs_ever_clobbered is not able to
7386 deal with this automatically. */
7387 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7388 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7389 if (crtl->calls_eh_return
7390 || (cfun->machine->has_landing_pad_p
7391 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7392 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7393
7394 /* For nonlocal gotos all call-saved registers have to be saved.
7395 This flag is also set for the unwinding code in libgcc.
7396 See expand_builtin_unwind_init. For regs_ever_live this is done by
7397 reload. */
7398 if (cfun->has_nonlocal_label)
7399 for (i = 0; i < 16; i++)
7400 if (!call_really_used_regs[i])
7401 regs_ever_clobbered[i] = 1;
7402
7403 FOR_EACH_BB (cur_bb)
7404 {
7405 FOR_BB_INSNS (cur_bb, cur_insn)
7406 {
7407 if (INSN_P (cur_insn))
7408 note_stores (PATTERN (cur_insn),
7409 s390_reg_clobbered_rtx,
7410 regs_ever_clobbered);
7411 }
7412 }
7413 }
7414
7415 /* Determine the frame area which actually has to be accessed
7416 in the function epilogue. The values are stored at the
7417 given pointers AREA_BOTTOM (address of the lowest used stack
7418 address) and AREA_TOP (address of the first item which does
7419 not belong to the stack frame). */
7420
7421 static void
7422 s390_frame_area (int *area_bottom, int *area_top)
7423 {
7424 int b, t;
7425 int i;
7426
7427 b = INT_MAX;
7428 t = INT_MIN;
7429
7430 if (cfun_frame_layout.first_restore_gpr != -1)
7431 {
7432 b = (cfun_frame_layout.gprs_offset
7433 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7434 t = b + (cfun_frame_layout.last_restore_gpr
7435 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7436 }
7437
7438 if (TARGET_64BIT && cfun_save_high_fprs_p)
7439 {
7440 b = MIN (b, cfun_frame_layout.f8_offset);
7441 t = MAX (t, (cfun_frame_layout.f8_offset
7442 + cfun_frame_layout.high_fprs * 8));
7443 }
7444
7445 if (!TARGET_64BIT)
7446 for (i = 2; i < 4; i++)
7447 if (cfun_fpr_bit_p (i))
7448 {
7449 b = MIN (b, cfun_frame_layout.f4_offset + (i - 2) * 8);
7450 t = MAX (t, cfun_frame_layout.f4_offset + (i - 1) * 8);
7451 }
7452
7453 *area_bottom = b;
7454 *area_top = t;
7455 }
7456
7457 /* Fill cfun->machine with info about register usage of current function.
7458 Return in CLOBBERED_REGS which GPRs are currently considered set. */
7459
7460 static void
7461 s390_register_info (int clobbered_regs[])
7462 {
7463 int i, j;
7464
7465 /* fprs 8 - 15 are call saved for 64 Bit ABI. */
7466 cfun_frame_layout.fpr_bitmap = 0;
7467 cfun_frame_layout.high_fprs = 0;
7468 if (TARGET_64BIT)
7469 for (i = 24; i < 32; i++)
7470 if (df_regs_ever_live_p (i) && !global_regs[i])
7471 {
7472 cfun_set_fpr_bit (i - 16);
7473 cfun_frame_layout.high_fprs++;
7474 }
7475
7476 /* Find first and last gpr to be saved. We trust regs_ever_live
7477 data, except that we don't save and restore global registers.
7478
7479 Also, all registers with special meaning to the compiler need
7480 to be handled extra. */
7481
7482 s390_regs_ever_clobbered (clobbered_regs);
7483
7484 for (i = 0; i < 16; i++)
7485 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
7486
7487 if (frame_pointer_needed)
7488 clobbered_regs[HARD_FRAME_POINTER_REGNUM] = 1;
7489
7490 if (flag_pic)
7491 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7492 |= df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7493
7494 clobbered_regs[BASE_REGNUM]
7495 |= (cfun->machine->base_reg
7496 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7497
7498 clobbered_regs[RETURN_REGNUM]
7499 |= (!crtl->is_leaf
7500 || TARGET_TPF_PROFILING
7501 || cfun->machine->split_branches_pending_p
7502 || cfun_frame_layout.save_return_addr_p
7503 || crtl->calls_eh_return
7504 || cfun->stdarg);
7505
7506 clobbered_regs[STACK_POINTER_REGNUM]
7507 |= (!crtl->is_leaf
7508 || TARGET_TPF_PROFILING
7509 || cfun_save_high_fprs_p
7510 || get_frame_size () > 0
7511 || cfun->calls_alloca
7512 || cfun->stdarg);
7513
7514 for (i = 6; i < 16; i++)
7515 if (df_regs_ever_live_p (i) || clobbered_regs[i])
7516 break;
7517 for (j = 15; j > i; j--)
7518 if (df_regs_ever_live_p (j) || clobbered_regs[j])
7519 break;
7520
7521 if (i == 16)
7522 {
7523 /* Nothing to save/restore. */
7524 cfun_frame_layout.first_save_gpr_slot = -1;
7525 cfun_frame_layout.last_save_gpr_slot = -1;
7526 cfun_frame_layout.first_save_gpr = -1;
7527 cfun_frame_layout.first_restore_gpr = -1;
7528 cfun_frame_layout.last_save_gpr = -1;
7529 cfun_frame_layout.last_restore_gpr = -1;
7530 }
7531 else
7532 {
7533 /* Save slots for gprs from i to j. */
7534 cfun_frame_layout.first_save_gpr_slot = i;
7535 cfun_frame_layout.last_save_gpr_slot = j;
7536
7537 for (i = cfun_frame_layout.first_save_gpr_slot;
7538 i < cfun_frame_layout.last_save_gpr_slot + 1;
7539 i++)
7540 if (clobbered_regs[i])
7541 break;
7542
7543 for (j = cfun_frame_layout.last_save_gpr_slot; j > i; j--)
7544 if (clobbered_regs[j])
7545 break;
7546
7547 if (i == cfun_frame_layout.last_save_gpr_slot + 1)
7548 {
7549 /* Nothing to save/restore. */
7550 cfun_frame_layout.first_save_gpr = -1;
7551 cfun_frame_layout.first_restore_gpr = -1;
7552 cfun_frame_layout.last_save_gpr = -1;
7553 cfun_frame_layout.last_restore_gpr = -1;
7554 }
7555 else
7556 {
7557 /* Save / Restore from gpr i to j. */
7558 cfun_frame_layout.first_save_gpr = i;
7559 cfun_frame_layout.first_restore_gpr = i;
7560 cfun_frame_layout.last_save_gpr = j;
7561 cfun_frame_layout.last_restore_gpr = j;
7562 }
7563 }
7564
7565 if (cfun->stdarg)
7566 {
7567 /* Varargs functions need to save gprs 2 to 6. */
7568 if (cfun->va_list_gpr_size
7569 && crtl->args.info.gprs < GP_ARG_NUM_REG)
7570 {
7571 int min_gpr = crtl->args.info.gprs;
7572 int max_gpr = min_gpr + cfun->va_list_gpr_size;
7573 if (max_gpr > GP_ARG_NUM_REG)
7574 max_gpr = GP_ARG_NUM_REG;
7575
7576 if (cfun_frame_layout.first_save_gpr == -1
7577 || cfun_frame_layout.first_save_gpr > 2 + min_gpr)
7578 {
7579 cfun_frame_layout.first_save_gpr = 2 + min_gpr;
7580 cfun_frame_layout.first_save_gpr_slot = 2 + min_gpr;
7581 }
7582
7583 if (cfun_frame_layout.last_save_gpr == -1
7584 || cfun_frame_layout.last_save_gpr < 2 + max_gpr - 1)
7585 {
7586 cfun_frame_layout.last_save_gpr = 2 + max_gpr - 1;
7587 cfun_frame_layout.last_save_gpr_slot = 2 + max_gpr - 1;
7588 }
7589 }
7590
7591 /* Mark f0, f2 for 31 bit and f0-f4 for 64 bit to be saved. */
7592 if (TARGET_HARD_FLOAT && cfun->va_list_fpr_size
7593 && crtl->args.info.fprs < FP_ARG_NUM_REG)
7594 {
7595 int min_fpr = crtl->args.info.fprs;
7596 int max_fpr = min_fpr + cfun->va_list_fpr_size;
7597 if (max_fpr > FP_ARG_NUM_REG)
7598 max_fpr = FP_ARG_NUM_REG;
7599
7600 /* ??? This is currently required to ensure proper location
7601 of the fpr save slots within the va_list save area. */
7602 if (TARGET_PACKED_STACK)
7603 min_fpr = 0;
7604
7605 for (i = min_fpr; i < max_fpr; i++)
7606 cfun_set_fpr_bit (i);
7607 }
7608 }
7609
7610 if (!TARGET_64BIT)
7611 for (i = 2; i < 4; i++)
7612 if (df_regs_ever_live_p (i + 16) && !global_regs[i + 16])
7613 cfun_set_fpr_bit (i);
7614 }
7615
7616 /* Fill cfun->machine with info about frame of current function. */
7617
7618 static void
7619 s390_frame_info (void)
7620 {
7621 int i;
7622
7623 cfun_frame_layout.frame_size = get_frame_size ();
7624 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7625 fatal_error ("total size of local variables exceeds architecture limit");
7626
7627 if (!TARGET_PACKED_STACK)
7628 {
7629 cfun_frame_layout.backchain_offset = 0;
7630 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7631 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7632 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7633 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7634 * UNITS_PER_LONG);
7635 }
7636 else if (TARGET_BACKCHAIN) /* kernel stack layout */
7637 {
7638 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7639 - UNITS_PER_LONG);
7640 cfun_frame_layout.gprs_offset
7641 = (cfun_frame_layout.backchain_offset
7642 - (STACK_POINTER_REGNUM - cfun_frame_layout.first_save_gpr_slot + 1)
7643 * UNITS_PER_LONG);
7644
7645 if (TARGET_64BIT)
7646 {
7647 cfun_frame_layout.f4_offset
7648 = (cfun_frame_layout.gprs_offset
7649 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7650
7651 cfun_frame_layout.f0_offset
7652 = (cfun_frame_layout.f4_offset
7653 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7654 }
7655 else
7656 {
7657 /* On 31 bit we have to care about alignment of the
7658 floating point regs to provide fastest access. */
7659 cfun_frame_layout.f0_offset
7660 = ((cfun_frame_layout.gprs_offset
7661 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1))
7662 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7663
7664 cfun_frame_layout.f4_offset
7665 = (cfun_frame_layout.f0_offset
7666 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7667 }
7668 }
7669 else /* no backchain */
7670 {
7671 cfun_frame_layout.f4_offset
7672 = (STACK_POINTER_OFFSET
7673 - 8 * (cfun_fpr_bit_p (2) + cfun_fpr_bit_p (3)));
7674
7675 cfun_frame_layout.f0_offset
7676 = (cfun_frame_layout.f4_offset
7677 - 8 * (cfun_fpr_bit_p (0) + cfun_fpr_bit_p (1)));
7678
7679 cfun_frame_layout.gprs_offset
7680 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7681 }
7682
7683 if (crtl->is_leaf
7684 && !TARGET_TPF_PROFILING
7685 && cfun_frame_layout.frame_size == 0
7686 && !cfun_save_high_fprs_p
7687 && !cfun->calls_alloca
7688 && !cfun->stdarg)
7689 return;
7690
7691 if (!TARGET_PACKED_STACK)
7692 cfun_frame_layout.frame_size += (STACK_POINTER_OFFSET
7693 + crtl->outgoing_args_size
7694 + cfun_frame_layout.high_fprs * 8);
7695 else
7696 {
7697 if (TARGET_BACKCHAIN)
7698 cfun_frame_layout.frame_size += UNITS_PER_LONG;
7699
7700 /* No alignment trouble here because f8-f15 are only saved under
7701 64 bit. */
7702 cfun_frame_layout.f8_offset = (MIN (MIN (cfun_frame_layout.f0_offset,
7703 cfun_frame_layout.f4_offset),
7704 cfun_frame_layout.gprs_offset)
7705 - cfun_frame_layout.high_fprs * 8);
7706
7707 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7708
7709 for (i = 0; i < 8; i++)
7710 if (cfun_fpr_bit_p (i))
7711 cfun_frame_layout.frame_size += 8;
7712
7713 cfun_frame_layout.frame_size += cfun_gprs_save_area_size;
7714
7715 /* If under 31 bit an odd number of gprs has to be saved we have to adjust
7716 the frame size to sustain 8 byte alignment of stack frames. */
7717 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7718 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7719 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7720
7721 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7722 }
7723 }
7724
7725 /* Generate frame layout. Fills in register and frame data for the current
7726 function in cfun->machine. This routine can be called multiple times;
7727 it will re-do the complete frame layout every time. */
7728
7729 static void
7730 s390_init_frame_layout (void)
7731 {
7732 HOST_WIDE_INT frame_size;
7733 int base_used;
7734 int clobbered_regs[16];
7735
7736 /* On S/390 machines, we may need to perform branch splitting, which
7737 will require both base and return address register. We have no
7738 choice but to assume we're going to need them until right at the
7739 end of the machine dependent reorg phase. */
7740 if (!TARGET_CPU_ZARCH)
7741 cfun->machine->split_branches_pending_p = true;
7742
7743 do
7744 {
7745 frame_size = cfun_frame_layout.frame_size;
7746
7747 /* Try to predict whether we'll need the base register. */
7748 base_used = cfun->machine->split_branches_pending_p
7749 || crtl->uses_const_pool
7750 || (!DISP_IN_RANGE (frame_size)
7751 && !CONST_OK_FOR_K (frame_size));
7752
7753 /* Decide which register to use as literal pool base. In small
7754 leaf functions, try to use an unused call-clobbered register
7755 as base register to avoid save/restore overhead. */
7756 if (!base_used)
7757 cfun->machine->base_reg = NULL_RTX;
7758 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7759 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7760 else
7761 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7762
7763 s390_register_info (clobbered_regs);
7764 s390_frame_info ();
7765 }
7766 while (frame_size != cfun_frame_layout.frame_size);
7767 }
7768
7769 /* Update frame layout. Recompute actual register save data based on
7770 current info and update regs_ever_live for the special registers.
7771 May be called multiple times, but may never cause *more* registers
7772 to be saved than s390_init_frame_layout allocated room for. */
7773
7774 static void
7775 s390_update_frame_layout (void)
7776 {
7777 int clobbered_regs[16];
7778
7779 s390_register_info (clobbered_regs);
7780
7781 df_set_regs_ever_live (BASE_REGNUM,
7782 clobbered_regs[BASE_REGNUM] ? true : false);
7783 df_set_regs_ever_live (RETURN_REGNUM,
7784 clobbered_regs[RETURN_REGNUM] ? true : false);
7785 df_set_regs_ever_live (STACK_POINTER_REGNUM,
7786 clobbered_regs[STACK_POINTER_REGNUM] ? true : false);
7787
7788 if (cfun->machine->base_reg)
7789 df_set_regs_ever_live (REGNO (cfun->machine->base_reg), true);
7790 }
7791
7792 /* Return true if it is legal to put a value with MODE into REGNO. */
7793
7794 bool
7795 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
7796 {
7797 switch (REGNO_REG_CLASS (regno))
7798 {
7799 case FP_REGS:
7800 if (REGNO_PAIR_OK (regno, mode))
7801 {
7802 if (mode == SImode || mode == DImode)
7803 return true;
7804
7805 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
7806 return true;
7807 }
7808 break;
7809 case ADDR_REGS:
7810 if (FRAME_REGNO_P (regno) && mode == Pmode)
7811 return true;
7812
7813 /* fallthrough */
7814 case GENERAL_REGS:
7815 if (REGNO_PAIR_OK (regno, mode))
7816 {
7817 if (TARGET_ZARCH
7818 || (mode != TFmode && mode != TCmode && mode != TDmode))
7819 return true;
7820 }
7821 break;
7822 case CC_REGS:
7823 if (GET_MODE_CLASS (mode) == MODE_CC)
7824 return true;
7825 break;
7826 case ACCESS_REGS:
7827 if (REGNO_PAIR_OK (regno, mode))
7828 {
7829 if (mode == SImode || mode == Pmode)
7830 return true;
7831 }
7832 break;
7833 default:
7834 return false;
7835 }
7836
7837 return false;
7838 }
7839
7840 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
7841
7842 bool
7843 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
7844 {
7845 /* Once we've decided upon a register to use as base register, it must
7846 no longer be used for any other purpose. */
7847 if (cfun->machine->base_reg)
7848 if (REGNO (cfun->machine->base_reg) == old_reg
7849 || REGNO (cfun->machine->base_reg) == new_reg)
7850 return false;
7851
7852 return true;
7853 }
7854
7855 /* Maximum number of registers to represent a value of mode MODE
7856 in a register of class RCLASS. */
7857
7858 int
7859 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
7860 {
7861 switch (rclass)
7862 {
7863 case FP_REGS:
7864 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7865 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
7866 else
7867 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
7868 case ACCESS_REGS:
7869 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
7870 default:
7871 break;
7872 }
7873 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7874 }
7875
7876 /* Return true if register FROM can be eliminated via register TO. */
7877
7878 static bool
7879 s390_can_eliminate (const int from, const int to)
7880 {
7881 /* On zSeries machines, we have not marked the base register as fixed.
7882 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
7883 If a function requires the base register, we say here that this
7884 elimination cannot be performed. This will cause reload to free
7885 up the base register (as if it were fixed). On the other hand,
7886 if the current function does *not* require the base register, we
7887 say here the elimination succeeds, which in turn allows reload
7888 to allocate the base register for any other purpose. */
7889 if (from == BASE_REGNUM && to == BASE_REGNUM)
7890 {
7891 if (TARGET_CPU_ZARCH)
7892 {
7893 s390_init_frame_layout ();
7894 return cfun->machine->base_reg == NULL_RTX;
7895 }
7896
7897 return false;
7898 }
7899
7900 /* Everything else must point into the stack frame. */
7901 gcc_assert (to == STACK_POINTER_REGNUM
7902 || to == HARD_FRAME_POINTER_REGNUM);
7903
7904 gcc_assert (from == FRAME_POINTER_REGNUM
7905 || from == ARG_POINTER_REGNUM
7906 || from == RETURN_ADDRESS_POINTER_REGNUM);
7907
7908 /* Make sure we actually saved the return address. */
7909 if (from == RETURN_ADDRESS_POINTER_REGNUM)
7910 if (!crtl->calls_eh_return
7911 && !cfun->stdarg
7912 && !cfun_frame_layout.save_return_addr_p)
7913 return false;
7914
7915 return true;
7916 }
7917
7918 /* Return offset between register FROM and TO initially after prolog. */
7919
7920 HOST_WIDE_INT
7921 s390_initial_elimination_offset (int from, int to)
7922 {
7923 HOST_WIDE_INT offset;
7924 int index;
7925
7926 /* ??? Why are we called for non-eliminable pairs? */
7927 if (!s390_can_eliminate (from, to))
7928 return 0;
7929
7930 switch (from)
7931 {
7932 case FRAME_POINTER_REGNUM:
7933 offset = (get_frame_size()
7934 + STACK_POINTER_OFFSET
7935 + crtl->outgoing_args_size);
7936 break;
7937
7938 case ARG_POINTER_REGNUM:
7939 s390_init_frame_layout ();
7940 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
7941 break;
7942
7943 case RETURN_ADDRESS_POINTER_REGNUM:
7944 s390_init_frame_layout ();
7945 index = RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot;
7946 gcc_assert (index >= 0);
7947 offset = cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset;
7948 offset += index * UNITS_PER_LONG;
7949 break;
7950
7951 case BASE_REGNUM:
7952 offset = 0;
7953 break;
7954
7955 default:
7956 gcc_unreachable ();
7957 }
7958
7959 return offset;
7960 }
7961
7962 /* Emit insn to save fpr REGNUM at offset OFFSET relative
7963 to register BASE. Return generated insn. */
7964
7965 static rtx
7966 save_fpr (rtx base, int offset, int regnum)
7967 {
7968 rtx addr;
7969 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7970
7971 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
7972 set_mem_alias_set (addr, get_varargs_alias_set ());
7973 else
7974 set_mem_alias_set (addr, get_frame_alias_set ());
7975
7976 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
7977 }
7978
7979 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
7980 to register BASE. Return generated insn. */
7981
7982 static rtx
7983 restore_fpr (rtx base, int offset, int regnum)
7984 {
7985 rtx addr;
7986 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
7987 set_mem_alias_set (addr, get_frame_alias_set ());
7988
7989 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
7990 }
7991
7992 /* Return true if REGNO is a global register, but not one
7993 of the special ones that need to be saved/restored in anyway. */
7994
7995 static inline bool
7996 global_not_special_regno_p (int regno)
7997 {
7998 return (global_regs[regno]
7999 /* These registers are special and need to be
8000 restored in any case. */
8001 && !(regno == STACK_POINTER_REGNUM
8002 || regno == RETURN_REGNUM
8003 || regno == BASE_REGNUM
8004 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
8005 }
8006
8007 /* Generate insn to save registers FIRST to LAST into
8008 the register save area located at offset OFFSET
8009 relative to register BASE. */
8010
8011 static rtx
8012 save_gprs (rtx base, int offset, int first, int last)
8013 {
8014 rtx addr, insn, note;
8015 int i;
8016
8017 addr = plus_constant (Pmode, base, offset);
8018 addr = gen_rtx_MEM (Pmode, addr);
8019
8020 set_mem_alias_set (addr, get_frame_alias_set ());
8021
8022 /* Special-case single register. */
8023 if (first == last)
8024 {
8025 if (TARGET_64BIT)
8026 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8027 else
8028 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8029
8030 if (!global_not_special_regno_p (first))
8031 RTX_FRAME_RELATED_P (insn) = 1;
8032 return insn;
8033 }
8034
8035
8036 insn = gen_store_multiple (addr,
8037 gen_rtx_REG (Pmode, first),
8038 GEN_INT (last - first + 1));
8039
8040 if (first <= 6 && cfun->stdarg)
8041 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8042 {
8043 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8044
8045 if (first + i <= 6)
8046 set_mem_alias_set (mem, get_varargs_alias_set ());
8047 }
8048
8049 /* We need to set the FRAME_RELATED flag on all SETs
8050 inside the store-multiple pattern.
8051
8052 However, we must not emit DWARF records for registers 2..5
8053 if they are stored for use by variable arguments ...
8054
8055 ??? Unfortunately, it is not enough to simply not the
8056 FRAME_RELATED flags for those SETs, because the first SET
8057 of the PARALLEL is always treated as if it had the flag
8058 set, even if it does not. Therefore we emit a new pattern
8059 without those registers as REG_FRAME_RELATED_EXPR note. */
8060
8061 if (first >= 6 && !global_not_special_regno_p (first))
8062 {
8063 rtx pat = PATTERN (insn);
8064
8065 for (i = 0; i < XVECLEN (pat, 0); i++)
8066 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8067 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8068 0, i)))))
8069 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8070
8071 RTX_FRAME_RELATED_P (insn) = 1;
8072 }
8073 else if (last >= 6)
8074 {
8075 int start;
8076
8077 for (start = first >= 6 ? first : 6; start <= last; start++)
8078 if (!global_not_special_regno_p (start))
8079 break;
8080
8081 if (start > last)
8082 return insn;
8083
8084 addr = plus_constant (Pmode, base,
8085 offset + (start - first) * UNITS_PER_LONG);
8086 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8087 gen_rtx_REG (Pmode, start),
8088 GEN_INT (last - start + 1));
8089 note = PATTERN (note);
8090
8091 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8092
8093 for (i = 0; i < XVECLEN (note, 0); i++)
8094 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8095 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8096 0, i)))))
8097 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8098
8099 RTX_FRAME_RELATED_P (insn) = 1;
8100 }
8101
8102 return insn;
8103 }
8104
8105 /* Generate insn to restore registers FIRST to LAST from
8106 the register save area located at offset OFFSET
8107 relative to register BASE. */
8108
8109 static rtx
8110 restore_gprs (rtx base, int offset, int first, int last)
8111 {
8112 rtx addr, insn;
8113
8114 addr = plus_constant (Pmode, base, offset);
8115 addr = gen_rtx_MEM (Pmode, addr);
8116 set_mem_alias_set (addr, get_frame_alias_set ());
8117
8118 /* Special-case single register. */
8119 if (first == last)
8120 {
8121 if (TARGET_64BIT)
8122 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8123 else
8124 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8125
8126 return insn;
8127 }
8128
8129 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8130 addr,
8131 GEN_INT (last - first + 1));
8132 return insn;
8133 }
8134
8135 /* Return insn sequence to load the GOT register. */
8136
8137 static GTY(()) rtx got_symbol;
8138 rtx
8139 s390_load_got (void)
8140 {
8141 rtx insns;
8142
8143 /* We cannot use pic_offset_table_rtx here since we use this
8144 function also for non-pic if __tls_get_offset is called and in
8145 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8146 aren't usable. */
8147 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8148
8149 if (!got_symbol)
8150 {
8151 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8152 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8153 }
8154
8155 start_sequence ();
8156
8157 if (TARGET_CPU_ZARCH)
8158 {
8159 emit_move_insn (got_rtx, got_symbol);
8160 }
8161 else
8162 {
8163 rtx offset;
8164
8165 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8166 UNSPEC_LTREL_OFFSET);
8167 offset = gen_rtx_CONST (Pmode, offset);
8168 offset = force_const_mem (Pmode, offset);
8169
8170 emit_move_insn (got_rtx, offset);
8171
8172 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8173 UNSPEC_LTREL_BASE);
8174 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8175
8176 emit_move_insn (got_rtx, offset);
8177 }
8178
8179 insns = get_insns ();
8180 end_sequence ();
8181 return insns;
8182 }
8183
8184 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8185 and the change to the stack pointer. */
8186
8187 static void
8188 s390_emit_stack_tie (void)
8189 {
8190 rtx mem = gen_frame_mem (BLKmode,
8191 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8192
8193 emit_insn (gen_stack_tie (mem));
8194 }
8195
8196 /* Expand the prologue into a bunch of separate insns. */
8197
8198 void
8199 s390_emit_prologue (void)
8200 {
8201 rtx insn, addr;
8202 rtx temp_reg;
8203 int i;
8204 int offset;
8205 int next_fpr = 0;
8206
8207 /* Complete frame layout. */
8208
8209 s390_update_frame_layout ();
8210
8211 /* Annotate all constant pool references to let the scheduler know
8212 they implicitly use the base register. */
8213
8214 push_topmost_sequence ();
8215
8216 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8217 if (INSN_P (insn))
8218 {
8219 annotate_constant_pool_refs (&PATTERN (insn));
8220 df_insn_rescan (insn);
8221 }
8222
8223 pop_topmost_sequence ();
8224
8225 /* Choose best register to use for temp use within prologue.
8226 See below for why TPF must use the register 1. */
8227
8228 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8229 && !crtl->is_leaf
8230 && !TARGET_TPF_PROFILING)
8231 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8232 else
8233 temp_reg = gen_rtx_REG (Pmode, 1);
8234
8235 /* Save call saved gprs. */
8236 if (cfun_frame_layout.first_save_gpr != -1)
8237 {
8238 insn = save_gprs (stack_pointer_rtx,
8239 cfun_frame_layout.gprs_offset +
8240 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8241 - cfun_frame_layout.first_save_gpr_slot),
8242 cfun_frame_layout.first_save_gpr,
8243 cfun_frame_layout.last_save_gpr);
8244 emit_insn (insn);
8245 }
8246
8247 /* Dummy insn to mark literal pool slot. */
8248
8249 if (cfun->machine->base_reg)
8250 emit_insn (gen_main_pool (cfun->machine->base_reg));
8251
8252 offset = cfun_frame_layout.f0_offset;
8253
8254 /* Save f0 and f2. */
8255 for (i = 0; i < 2; i++)
8256 {
8257 if (cfun_fpr_bit_p (i))
8258 {
8259 save_fpr (stack_pointer_rtx, offset, i + 16);
8260 offset += 8;
8261 }
8262 else if (!TARGET_PACKED_STACK)
8263 offset += 8;
8264 }
8265
8266 /* Save f4 and f6. */
8267 offset = cfun_frame_layout.f4_offset;
8268 for (i = 2; i < 4; i++)
8269 {
8270 if (cfun_fpr_bit_p (i))
8271 {
8272 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8273 offset += 8;
8274
8275 /* If f4 and f6 are call clobbered they are saved due to stdargs and
8276 therefore are not frame related. */
8277 if (!call_really_used_regs[i + 16])
8278 RTX_FRAME_RELATED_P (insn) = 1;
8279 }
8280 else if (!TARGET_PACKED_STACK)
8281 offset += 8;
8282 }
8283
8284 if (TARGET_PACKED_STACK
8285 && cfun_save_high_fprs_p
8286 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8287 {
8288 offset = (cfun_frame_layout.f8_offset
8289 + (cfun_frame_layout.high_fprs - 1) * 8);
8290
8291 for (i = 15; i > 7 && offset >= 0; i--)
8292 if (cfun_fpr_bit_p (i))
8293 {
8294 insn = save_fpr (stack_pointer_rtx, offset, i + 16);
8295
8296 RTX_FRAME_RELATED_P (insn) = 1;
8297 offset -= 8;
8298 }
8299 if (offset >= cfun_frame_layout.f8_offset)
8300 next_fpr = i + 16;
8301 }
8302
8303 if (!TARGET_PACKED_STACK)
8304 next_fpr = cfun_save_high_fprs_p ? 31 : 0;
8305
8306 if (flag_stack_usage_info)
8307 current_function_static_stack_size = cfun_frame_layout.frame_size;
8308
8309 /* Decrement stack pointer. */
8310
8311 if (cfun_frame_layout.frame_size > 0)
8312 {
8313 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8314 rtx real_frame_off;
8315
8316 if (s390_stack_size)
8317 {
8318 HOST_WIDE_INT stack_guard;
8319
8320 if (s390_stack_guard)
8321 stack_guard = s390_stack_guard;
8322 else
8323 {
8324 /* If no value for stack guard is provided the smallest power of 2
8325 larger than the current frame size is chosen. */
8326 stack_guard = 1;
8327 while (stack_guard < cfun_frame_layout.frame_size)
8328 stack_guard <<= 1;
8329 }
8330
8331 if (cfun_frame_layout.frame_size >= s390_stack_size)
8332 {
8333 warning (0, "frame size of function %qs is %wd"
8334 " bytes exceeding user provided stack limit of "
8335 "%d bytes. "
8336 "An unconditional trap is added.",
8337 current_function_name(), cfun_frame_layout.frame_size,
8338 s390_stack_size);
8339 emit_insn (gen_trap ());
8340 }
8341 else
8342 {
8343 /* stack_guard has to be smaller than s390_stack_size.
8344 Otherwise we would emit an AND with zero which would
8345 not match the test under mask pattern. */
8346 if (stack_guard >= s390_stack_size)
8347 {
8348 warning (0, "frame size of function %qs is %wd"
8349 " bytes which is more than half the stack size. "
8350 "The dynamic check would not be reliable. "
8351 "No check emitted for this function.",
8352 current_function_name(),
8353 cfun_frame_layout.frame_size);
8354 }
8355 else
8356 {
8357 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8358 & ~(stack_guard - 1));
8359
8360 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8361 GEN_INT (stack_check_mask));
8362 if (TARGET_64BIT)
8363 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8364 t, const0_rtx),
8365 t, const0_rtx, const0_rtx));
8366 else
8367 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8368 t, const0_rtx),
8369 t, const0_rtx, const0_rtx));
8370 }
8371 }
8372 }
8373
8374 if (s390_warn_framesize > 0
8375 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8376 warning (0, "frame size of %qs is %wd bytes",
8377 current_function_name (), cfun_frame_layout.frame_size);
8378
8379 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8380 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8381
8382 /* Save incoming stack pointer into temp reg. */
8383 if (TARGET_BACKCHAIN || next_fpr)
8384 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8385
8386 /* Subtract frame size from stack pointer. */
8387
8388 if (DISP_IN_RANGE (INTVAL (frame_off)))
8389 {
8390 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8391 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8392 frame_off));
8393 insn = emit_insn (insn);
8394 }
8395 else
8396 {
8397 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8398 frame_off = force_const_mem (Pmode, frame_off);
8399
8400 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8401 annotate_constant_pool_refs (&PATTERN (insn));
8402 }
8403
8404 RTX_FRAME_RELATED_P (insn) = 1;
8405 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8406 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8407 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8408 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8409 real_frame_off)));
8410
8411 /* Set backchain. */
8412
8413 if (TARGET_BACKCHAIN)
8414 {
8415 if (cfun_frame_layout.backchain_offset)
8416 addr = gen_rtx_MEM (Pmode,
8417 plus_constant (Pmode, stack_pointer_rtx,
8418 cfun_frame_layout.backchain_offset));
8419 else
8420 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8421 set_mem_alias_set (addr, get_frame_alias_set ());
8422 insn = emit_insn (gen_move_insn (addr, temp_reg));
8423 }
8424
8425 /* If we support non-call exceptions (e.g. for Java),
8426 we need to make sure the backchain pointer is set up
8427 before any possibly trapping memory access. */
8428 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8429 {
8430 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8431 emit_clobber (addr);
8432 }
8433 }
8434
8435 /* Save fprs 8 - 15 (64 bit ABI). */
8436
8437 if (cfun_save_high_fprs_p && next_fpr)
8438 {
8439 /* If the stack might be accessed through a different register
8440 we have to make sure that the stack pointer decrement is not
8441 moved below the use of the stack slots. */
8442 s390_emit_stack_tie ();
8443
8444 insn = emit_insn (gen_add2_insn (temp_reg,
8445 GEN_INT (cfun_frame_layout.f8_offset)));
8446
8447 offset = 0;
8448
8449 for (i = 24; i <= next_fpr; i++)
8450 if (cfun_fpr_bit_p (i - 16))
8451 {
8452 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8453 cfun_frame_layout.frame_size
8454 + cfun_frame_layout.f8_offset
8455 + offset);
8456
8457 insn = save_fpr (temp_reg, offset, i);
8458 offset += 8;
8459 RTX_FRAME_RELATED_P (insn) = 1;
8460 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8461 gen_rtx_SET (VOIDmode,
8462 gen_rtx_MEM (DFmode, addr),
8463 gen_rtx_REG (DFmode, i)));
8464 }
8465 }
8466
8467 /* Set frame pointer, if needed. */
8468
8469 if (frame_pointer_needed)
8470 {
8471 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8472 RTX_FRAME_RELATED_P (insn) = 1;
8473 }
8474
8475 /* Set up got pointer, if needed. */
8476
8477 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8478 {
8479 rtx insns = s390_load_got ();
8480
8481 for (insn = insns; insn; insn = NEXT_INSN (insn))
8482 annotate_constant_pool_refs (&PATTERN (insn));
8483
8484 emit_insn (insns);
8485 }
8486
8487 if (TARGET_TPF_PROFILING)
8488 {
8489 /* Generate a BAS instruction to serve as a function
8490 entry intercept to facilitate the use of tracing
8491 algorithms located at the branch target. */
8492 emit_insn (gen_prologue_tpf ());
8493
8494 /* Emit a blockage here so that all code
8495 lies between the profiling mechanisms. */
8496 emit_insn (gen_blockage ());
8497 }
8498 }
8499
8500 /* Expand the epilogue into a bunch of separate insns. */
8501
8502 void
8503 s390_emit_epilogue (bool sibcall)
8504 {
8505 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8506 int area_bottom, area_top, offset = 0;
8507 int next_offset;
8508 rtvec p;
8509 int i;
8510
8511 if (TARGET_TPF_PROFILING)
8512 {
8513
8514 /* Generate a BAS instruction to serve as a function
8515 entry intercept to facilitate the use of tracing
8516 algorithms located at the branch target. */
8517
8518 /* Emit a blockage here so that all code
8519 lies between the profiling mechanisms. */
8520 emit_insn (gen_blockage ());
8521
8522 emit_insn (gen_epilogue_tpf ());
8523 }
8524
8525 /* Check whether to use frame or stack pointer for restore. */
8526
8527 frame_pointer = (frame_pointer_needed
8528 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8529
8530 s390_frame_area (&area_bottom, &area_top);
8531
8532 /* Check whether we can access the register save area.
8533 If not, increment the frame pointer as required. */
8534
8535 if (area_top <= area_bottom)
8536 {
8537 /* Nothing to restore. */
8538 }
8539 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8540 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8541 {
8542 /* Area is in range. */
8543 offset = cfun_frame_layout.frame_size;
8544 }
8545 else
8546 {
8547 rtx insn, frame_off, cfa;
8548
8549 offset = area_bottom < 0 ? -area_bottom : 0;
8550 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8551
8552 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8553 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8554 if (DISP_IN_RANGE (INTVAL (frame_off)))
8555 {
8556 insn = gen_rtx_SET (VOIDmode, frame_pointer,
8557 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8558 insn = emit_insn (insn);
8559 }
8560 else
8561 {
8562 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8563 frame_off = force_const_mem (Pmode, frame_off);
8564
8565 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
8566 annotate_constant_pool_refs (&PATTERN (insn));
8567 }
8568 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
8569 RTX_FRAME_RELATED_P (insn) = 1;
8570 }
8571
8572 /* Restore call saved fprs. */
8573
8574 if (TARGET_64BIT)
8575 {
8576 if (cfun_save_high_fprs_p)
8577 {
8578 next_offset = cfun_frame_layout.f8_offset;
8579 for (i = 24; i < 32; i++)
8580 {
8581 if (cfun_fpr_bit_p (i - 16))
8582 {
8583 restore_fpr (frame_pointer,
8584 offset + next_offset, i);
8585 cfa_restores
8586 = alloc_reg_note (REG_CFA_RESTORE,
8587 gen_rtx_REG (DFmode, i), cfa_restores);
8588 next_offset += 8;
8589 }
8590 }
8591 }
8592
8593 }
8594 else
8595 {
8596 next_offset = cfun_frame_layout.f4_offset;
8597 for (i = 18; i < 20; i++)
8598 {
8599 if (cfun_fpr_bit_p (i - 16))
8600 {
8601 restore_fpr (frame_pointer,
8602 offset + next_offset, i);
8603 cfa_restores
8604 = alloc_reg_note (REG_CFA_RESTORE,
8605 gen_rtx_REG (DFmode, i), cfa_restores);
8606 next_offset += 8;
8607 }
8608 else if (!TARGET_PACKED_STACK)
8609 next_offset += 8;
8610 }
8611
8612 }
8613
8614 /* Return register. */
8615
8616 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8617
8618 /* Restore call saved gprs. */
8619
8620 if (cfun_frame_layout.first_restore_gpr != -1)
8621 {
8622 rtx insn, addr;
8623 int i;
8624
8625 /* Check for global register and save them
8626 to stack location from where they get restored. */
8627
8628 for (i = cfun_frame_layout.first_restore_gpr;
8629 i <= cfun_frame_layout.last_restore_gpr;
8630 i++)
8631 {
8632 if (global_not_special_regno_p (i))
8633 {
8634 addr = plus_constant (Pmode, frame_pointer,
8635 offset + cfun_frame_layout.gprs_offset
8636 + (i - cfun_frame_layout.first_save_gpr_slot)
8637 * UNITS_PER_LONG);
8638 addr = gen_rtx_MEM (Pmode, addr);
8639 set_mem_alias_set (addr, get_frame_alias_set ());
8640 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
8641 }
8642 else
8643 cfa_restores
8644 = alloc_reg_note (REG_CFA_RESTORE,
8645 gen_rtx_REG (Pmode, i), cfa_restores);
8646 }
8647
8648 if (! sibcall)
8649 {
8650 /* Fetch return address from stack before load multiple,
8651 this will do good for scheduling. */
8652
8653 if (cfun_frame_layout.save_return_addr_p
8654 || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
8655 && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
8656 {
8657 int return_regnum = find_unused_clobbered_reg();
8658 if (!return_regnum)
8659 return_regnum = 4;
8660 return_reg = gen_rtx_REG (Pmode, return_regnum);
8661
8662 addr = plus_constant (Pmode, frame_pointer,
8663 offset + cfun_frame_layout.gprs_offset
8664 + (RETURN_REGNUM
8665 - cfun_frame_layout.first_save_gpr_slot)
8666 * UNITS_PER_LONG);
8667 addr = gen_rtx_MEM (Pmode, addr);
8668 set_mem_alias_set (addr, get_frame_alias_set ());
8669 emit_move_insn (return_reg, addr);
8670 }
8671 }
8672
8673 insn = restore_gprs (frame_pointer,
8674 offset + cfun_frame_layout.gprs_offset
8675 + (cfun_frame_layout.first_restore_gpr
8676 - cfun_frame_layout.first_save_gpr_slot)
8677 * UNITS_PER_LONG,
8678 cfun_frame_layout.first_restore_gpr,
8679 cfun_frame_layout.last_restore_gpr);
8680 insn = emit_insn (insn);
8681 REG_NOTES (insn) = cfa_restores;
8682 add_reg_note (insn, REG_CFA_DEF_CFA,
8683 plus_constant (Pmode, stack_pointer_rtx,
8684 STACK_POINTER_OFFSET));
8685 RTX_FRAME_RELATED_P (insn) = 1;
8686 }
8687
8688 if (! sibcall)
8689 {
8690
8691 /* Return to caller. */
8692
8693 p = rtvec_alloc (2);
8694
8695 RTVEC_ELT (p, 0) = ret_rtx;
8696 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
8697 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
8698 }
8699 }
8700
8701
8702 /* Return the size in bytes of a function argument of
8703 type TYPE and/or mode MODE. At least one of TYPE or
8704 MODE must be specified. */
8705
8706 static int
8707 s390_function_arg_size (enum machine_mode mode, const_tree type)
8708 {
8709 if (type)
8710 return int_size_in_bytes (type);
8711
8712 /* No type info available for some library calls ... */
8713 if (mode != BLKmode)
8714 return GET_MODE_SIZE (mode);
8715
8716 /* If we have neither type nor mode, abort */
8717 gcc_unreachable ();
8718 }
8719
8720 /* Return true if a function argument of type TYPE and mode MODE
8721 is to be passed in a floating-point register, if available. */
8722
8723 static bool
8724 s390_function_arg_float (enum machine_mode mode, const_tree type)
8725 {
8726 int size = s390_function_arg_size (mode, type);
8727 if (size > 8)
8728 return false;
8729
8730 /* Soft-float changes the ABI: no floating-point registers are used. */
8731 if (TARGET_SOFT_FLOAT)
8732 return false;
8733
8734 /* No type info available for some library calls ... */
8735 if (!type)
8736 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
8737
8738 /* The ABI says that record types with a single member are treated
8739 just like that member would be. */
8740 while (TREE_CODE (type) == RECORD_TYPE)
8741 {
8742 tree field, single = NULL_TREE;
8743
8744 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8745 {
8746 if (TREE_CODE (field) != FIELD_DECL)
8747 continue;
8748
8749 if (single == NULL_TREE)
8750 single = TREE_TYPE (field);
8751 else
8752 return false;
8753 }
8754
8755 if (single == NULL_TREE)
8756 return false;
8757 else
8758 type = single;
8759 }
8760
8761 return TREE_CODE (type) == REAL_TYPE;
8762 }
8763
8764 /* Return true if a function argument of type TYPE and mode MODE
8765 is to be passed in an integer register, or a pair of integer
8766 registers, if available. */
8767
8768 static bool
8769 s390_function_arg_integer (enum machine_mode mode, const_tree type)
8770 {
8771 int size = s390_function_arg_size (mode, type);
8772 if (size > 8)
8773 return false;
8774
8775 /* No type info available for some library calls ... */
8776 if (!type)
8777 return GET_MODE_CLASS (mode) == MODE_INT
8778 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
8779
8780 /* We accept small integral (and similar) types. */
8781 if (INTEGRAL_TYPE_P (type)
8782 || POINTER_TYPE_P (type)
8783 || TREE_CODE (type) == NULLPTR_TYPE
8784 || TREE_CODE (type) == OFFSET_TYPE
8785 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
8786 return true;
8787
8788 /* We also accept structs of size 1, 2, 4, 8 that are not
8789 passed in floating-point registers. */
8790 if (AGGREGATE_TYPE_P (type)
8791 && exact_log2 (size) >= 0
8792 && !s390_function_arg_float (mode, type))
8793 return true;
8794
8795 return false;
8796 }
8797
8798 /* Return 1 if a function argument of type TYPE and mode MODE
8799 is to be passed by reference. The ABI specifies that only
8800 structures of size 1, 2, 4, or 8 bytes are passed by value,
8801 all other structures (and complex numbers) are passed by
8802 reference. */
8803
8804 static bool
8805 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
8806 enum machine_mode mode, const_tree type,
8807 bool named ATTRIBUTE_UNUSED)
8808 {
8809 int size = s390_function_arg_size (mode, type);
8810 if (size > 8)
8811 return true;
8812
8813 if (type)
8814 {
8815 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
8816 return 1;
8817
8818 if (TREE_CODE (type) == COMPLEX_TYPE
8819 || TREE_CODE (type) == VECTOR_TYPE)
8820 return 1;
8821 }
8822
8823 return 0;
8824 }
8825
8826 /* Update the data in CUM to advance over an argument of mode MODE and
8827 data type TYPE. (TYPE is null for libcalls where that information
8828 may not be available.). The boolean NAMED specifies whether the
8829 argument is a named argument (as opposed to an unnamed argument
8830 matching an ellipsis). */
8831
8832 static void
8833 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
8834 const_tree type, bool named ATTRIBUTE_UNUSED)
8835 {
8836 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8837
8838 if (s390_function_arg_float (mode, type))
8839 {
8840 cum->fprs += 1;
8841 }
8842 else if (s390_function_arg_integer (mode, type))
8843 {
8844 int size = s390_function_arg_size (mode, type);
8845 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
8846 }
8847 else
8848 gcc_unreachable ();
8849 }
8850
8851 /* Define where to put the arguments to a function.
8852 Value is zero to push the argument on the stack,
8853 or a hard register in which to store the argument.
8854
8855 MODE is the argument's machine mode.
8856 TYPE is the data type of the argument (as a tree).
8857 This is null for libcalls where that information may
8858 not be available.
8859 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8860 the preceding args and about the function being called.
8861 NAMED is nonzero if this argument is a named parameter
8862 (otherwise it is an extra parameter matching an ellipsis).
8863
8864 On S/390, we use general purpose registers 2 through 6 to
8865 pass integer, pointer, and certain structure arguments, and
8866 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
8867 to pass floating point arguments. All remaining arguments
8868 are pushed to the stack. */
8869
8870 static rtx
8871 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
8872 const_tree type, bool named ATTRIBUTE_UNUSED)
8873 {
8874 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8875
8876 if (s390_function_arg_float (mode, type))
8877 {
8878 if (cum->fprs + 1 > FP_ARG_NUM_REG)
8879 return 0;
8880 else
8881 return gen_rtx_REG (mode, cum->fprs + 16);
8882 }
8883 else if (s390_function_arg_integer (mode, type))
8884 {
8885 int size = s390_function_arg_size (mode, type);
8886 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
8887
8888 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
8889 return 0;
8890 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
8891 return gen_rtx_REG (mode, cum->gprs + 2);
8892 else if (n_gprs == 2)
8893 {
8894 rtvec p = rtvec_alloc (2);
8895
8896 RTVEC_ELT (p, 0)
8897 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
8898 const0_rtx);
8899 RTVEC_ELT (p, 1)
8900 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
8901 GEN_INT (4));
8902
8903 return gen_rtx_PARALLEL (mode, p);
8904 }
8905 }
8906
8907 /* After the real arguments, expand_call calls us once again
8908 with a void_type_node type. Whatever we return here is
8909 passed as operand 2 to the call expanders.
8910
8911 We don't need this feature ... */
8912 else if (type == void_type_node)
8913 return const0_rtx;
8914
8915 gcc_unreachable ();
8916 }
8917
8918 /* Return true if return values of type TYPE should be returned
8919 in a memory buffer whose address is passed by the caller as
8920 hidden first argument. */
8921
8922 static bool
8923 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
8924 {
8925 /* We accept small integral (and similar) types. */
8926 if (INTEGRAL_TYPE_P (type)
8927 || POINTER_TYPE_P (type)
8928 || TREE_CODE (type) == OFFSET_TYPE
8929 || TREE_CODE (type) == REAL_TYPE)
8930 return int_size_in_bytes (type) > 8;
8931
8932 /* Aggregates and similar constructs are always returned
8933 in memory. */
8934 if (AGGREGATE_TYPE_P (type)
8935 || TREE_CODE (type) == COMPLEX_TYPE
8936 || TREE_CODE (type) == VECTOR_TYPE)
8937 return true;
8938
8939 /* ??? We get called on all sorts of random stuff from
8940 aggregate_value_p. We can't abort, but it's not clear
8941 what's safe to return. Pretend it's a struct I guess. */
8942 return true;
8943 }
8944
8945 /* Function arguments and return values are promoted to word size. */
8946
8947 static enum machine_mode
8948 s390_promote_function_mode (const_tree type, enum machine_mode mode,
8949 int *punsignedp,
8950 const_tree fntype ATTRIBUTE_UNUSED,
8951 int for_return ATTRIBUTE_UNUSED)
8952 {
8953 if (INTEGRAL_MODE_P (mode)
8954 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
8955 {
8956 if (type != NULL_TREE && POINTER_TYPE_P (type))
8957 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8958 return Pmode;
8959 }
8960
8961 return mode;
8962 }
8963
8964 /* Define where to return a (scalar) value of type RET_TYPE.
8965 If RET_TYPE is null, define where to return a (scalar)
8966 value of mode MODE from a libcall. */
8967
8968 static rtx
8969 s390_function_and_libcall_value (enum machine_mode mode,
8970 const_tree ret_type,
8971 const_tree fntype_or_decl,
8972 bool outgoing ATTRIBUTE_UNUSED)
8973 {
8974 /* For normal functions perform the promotion as
8975 promote_function_mode would do. */
8976 if (ret_type)
8977 {
8978 int unsignedp = TYPE_UNSIGNED (ret_type);
8979 mode = promote_function_mode (ret_type, mode, &unsignedp,
8980 fntype_or_decl, 1);
8981 }
8982
8983 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
8984 gcc_assert (GET_MODE_SIZE (mode) <= 8);
8985
8986 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
8987 return gen_rtx_REG (mode, 16);
8988 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
8989 || UNITS_PER_LONG == UNITS_PER_WORD)
8990 return gen_rtx_REG (mode, 2);
8991 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
8992 {
8993 /* This case is triggered when returning a 64 bit value with
8994 -m31 -mzarch. Although the value would fit into a single
8995 register it has to be forced into a 32 bit register pair in
8996 order to match the ABI. */
8997 rtvec p = rtvec_alloc (2);
8998
8999 RTVEC_ELT (p, 0)
9000 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
9001 RTVEC_ELT (p, 1)
9002 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
9003
9004 return gen_rtx_PARALLEL (mode, p);
9005 }
9006
9007 gcc_unreachable ();
9008 }
9009
9010 /* Define where to return a scalar return value of type RET_TYPE. */
9011
9012 static rtx
9013 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
9014 bool outgoing)
9015 {
9016 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9017 fn_decl_or_type, outgoing);
9018 }
9019
9020 /* Define where to return a scalar libcall return value of mode
9021 MODE. */
9022
9023 static rtx
9024 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9025 {
9026 return s390_function_and_libcall_value (mode, NULL_TREE,
9027 NULL_TREE, true);
9028 }
9029
9030
9031 /* Create and return the va_list datatype.
9032
9033 On S/390, va_list is an array type equivalent to
9034
9035 typedef struct __va_list_tag
9036 {
9037 long __gpr;
9038 long __fpr;
9039 void *__overflow_arg_area;
9040 void *__reg_save_area;
9041 } va_list[1];
9042
9043 where __gpr and __fpr hold the number of general purpose
9044 or floating point arguments used up to now, respectively,
9045 __overflow_arg_area points to the stack location of the
9046 next argument passed on the stack, and __reg_save_area
9047 always points to the start of the register area in the
9048 call frame of the current function. The function prologue
9049 saves all registers used for argument passing into this
9050 area if the function uses variable arguments. */
9051
9052 static tree
9053 s390_build_builtin_va_list (void)
9054 {
9055 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9056
9057 record = lang_hooks.types.make_type (RECORD_TYPE);
9058
9059 type_decl =
9060 build_decl (BUILTINS_LOCATION,
9061 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9062
9063 f_gpr = build_decl (BUILTINS_LOCATION,
9064 FIELD_DECL, get_identifier ("__gpr"),
9065 long_integer_type_node);
9066 f_fpr = build_decl (BUILTINS_LOCATION,
9067 FIELD_DECL, get_identifier ("__fpr"),
9068 long_integer_type_node);
9069 f_ovf = build_decl (BUILTINS_LOCATION,
9070 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9071 ptr_type_node);
9072 f_sav = build_decl (BUILTINS_LOCATION,
9073 FIELD_DECL, get_identifier ("__reg_save_area"),
9074 ptr_type_node);
9075
9076 va_list_gpr_counter_field = f_gpr;
9077 va_list_fpr_counter_field = f_fpr;
9078
9079 DECL_FIELD_CONTEXT (f_gpr) = record;
9080 DECL_FIELD_CONTEXT (f_fpr) = record;
9081 DECL_FIELD_CONTEXT (f_ovf) = record;
9082 DECL_FIELD_CONTEXT (f_sav) = record;
9083
9084 TYPE_STUB_DECL (record) = type_decl;
9085 TYPE_NAME (record) = type_decl;
9086 TYPE_FIELDS (record) = f_gpr;
9087 DECL_CHAIN (f_gpr) = f_fpr;
9088 DECL_CHAIN (f_fpr) = f_ovf;
9089 DECL_CHAIN (f_ovf) = f_sav;
9090
9091 layout_type (record);
9092
9093 /* The correct type is an array type of one element. */
9094 return build_array_type (record, build_index_type (size_zero_node));
9095 }
9096
9097 /* Implement va_start by filling the va_list structure VALIST.
9098 STDARG_P is always true, and ignored.
9099 NEXTARG points to the first anonymous stack argument.
9100
9101 The following global variables are used to initialize
9102 the va_list structure:
9103
9104 crtl->args.info:
9105 holds number of gprs and fprs used for named arguments.
9106 crtl->args.arg_offset_rtx:
9107 holds the offset of the first anonymous stack argument
9108 (relative to the virtual arg pointer). */
9109
9110 static void
9111 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9112 {
9113 HOST_WIDE_INT n_gpr, n_fpr;
9114 int off;
9115 tree f_gpr, f_fpr, f_ovf, f_sav;
9116 tree gpr, fpr, ovf, sav, t;
9117
9118 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9119 f_fpr = DECL_CHAIN (f_gpr);
9120 f_ovf = DECL_CHAIN (f_fpr);
9121 f_sav = DECL_CHAIN (f_ovf);
9122
9123 valist = build_simple_mem_ref (valist);
9124 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9125 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9126 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9127 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9128
9129 /* Count number of gp and fp argument registers used. */
9130
9131 n_gpr = crtl->args.info.gprs;
9132 n_fpr = crtl->args.info.fprs;
9133
9134 if (cfun->va_list_gpr_size)
9135 {
9136 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9137 build_int_cst (NULL_TREE, n_gpr));
9138 TREE_SIDE_EFFECTS (t) = 1;
9139 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9140 }
9141
9142 if (cfun->va_list_fpr_size)
9143 {
9144 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9145 build_int_cst (NULL_TREE, n_fpr));
9146 TREE_SIDE_EFFECTS (t) = 1;
9147 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9148 }
9149
9150 /* Find the overflow area. */
9151 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9152 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9153 {
9154 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9155
9156 off = INTVAL (crtl->args.arg_offset_rtx);
9157 off = off < 0 ? 0 : off;
9158 if (TARGET_DEBUG_ARG)
9159 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9160 (int)n_gpr, (int)n_fpr, off);
9161
9162 t = fold_build_pointer_plus_hwi (t, off);
9163
9164 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9165 TREE_SIDE_EFFECTS (t) = 1;
9166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9167 }
9168
9169 /* Find the register save area. */
9170 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9171 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9172 {
9173 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9174 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9175
9176 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9177 TREE_SIDE_EFFECTS (t) = 1;
9178 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9179 }
9180 }
9181
9182 /* Implement va_arg by updating the va_list structure
9183 VALIST as required to retrieve an argument of type
9184 TYPE, and returning that argument.
9185
9186 Generates code equivalent to:
9187
9188 if (integral value) {
9189 if (size <= 4 && args.gpr < 5 ||
9190 size > 4 && args.gpr < 4 )
9191 ret = args.reg_save_area[args.gpr+8]
9192 else
9193 ret = *args.overflow_arg_area++;
9194 } else if (float value) {
9195 if (args.fgpr < 2)
9196 ret = args.reg_save_area[args.fpr+64]
9197 else
9198 ret = *args.overflow_arg_area++;
9199 } else if (aggregate value) {
9200 if (args.gpr < 5)
9201 ret = *args.reg_save_area[args.gpr]
9202 else
9203 ret = **args.overflow_arg_area++;
9204 } */
9205
9206 static tree
9207 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9208 gimple_seq *post_p ATTRIBUTE_UNUSED)
9209 {
9210 tree f_gpr, f_fpr, f_ovf, f_sav;
9211 tree gpr, fpr, ovf, sav, reg, t, u;
9212 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9213 tree lab_false, lab_over, addr;
9214
9215 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9216 f_fpr = DECL_CHAIN (f_gpr);
9217 f_ovf = DECL_CHAIN (f_fpr);
9218 f_sav = DECL_CHAIN (f_ovf);
9219
9220 valist = build_va_arg_indirect_ref (valist);
9221 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9222 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9223 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9224
9225 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9226 both appear on a lhs. */
9227 valist = unshare_expr (valist);
9228 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9229
9230 size = int_size_in_bytes (type);
9231
9232 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9233 {
9234 if (TARGET_DEBUG_ARG)
9235 {
9236 fprintf (stderr, "va_arg: aggregate type");
9237 debug_tree (type);
9238 }
9239
9240 /* Aggregates are passed by reference. */
9241 indirect_p = 1;
9242 reg = gpr;
9243 n_reg = 1;
9244
9245 /* kernel stack layout on 31 bit: It is assumed here that no padding
9246 will be added by s390_frame_info because for va_args always an even
9247 number of gprs has to be saved r15-r2 = 14 regs. */
9248 sav_ofs = 2 * UNITS_PER_LONG;
9249 sav_scale = UNITS_PER_LONG;
9250 size = UNITS_PER_LONG;
9251 max_reg = GP_ARG_NUM_REG - n_reg;
9252 }
9253 else if (s390_function_arg_float (TYPE_MODE (type), type))
9254 {
9255 if (TARGET_DEBUG_ARG)
9256 {
9257 fprintf (stderr, "va_arg: float type");
9258 debug_tree (type);
9259 }
9260
9261 /* FP args go in FP registers, if present. */
9262 indirect_p = 0;
9263 reg = fpr;
9264 n_reg = 1;
9265 sav_ofs = 16 * UNITS_PER_LONG;
9266 sav_scale = 8;
9267 max_reg = FP_ARG_NUM_REG - n_reg;
9268 }
9269 else
9270 {
9271 if (TARGET_DEBUG_ARG)
9272 {
9273 fprintf (stderr, "va_arg: other type");
9274 debug_tree (type);
9275 }
9276
9277 /* Otherwise into GP registers. */
9278 indirect_p = 0;
9279 reg = gpr;
9280 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9281
9282 /* kernel stack layout on 31 bit: It is assumed here that no padding
9283 will be added by s390_frame_info because for va_args always an even
9284 number of gprs has to be saved r15-r2 = 14 regs. */
9285 sav_ofs = 2 * UNITS_PER_LONG;
9286
9287 if (size < UNITS_PER_LONG)
9288 sav_ofs += UNITS_PER_LONG - size;
9289
9290 sav_scale = UNITS_PER_LONG;
9291 max_reg = GP_ARG_NUM_REG - n_reg;
9292 }
9293
9294 /* Pull the value out of the saved registers ... */
9295
9296 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9297 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9298 addr = create_tmp_var (ptr_type_node, "addr");
9299
9300 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9301 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9302 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9303 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9304 gimplify_and_add (t, pre_p);
9305
9306 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9307 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9308 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9309 t = fold_build_pointer_plus (t, u);
9310
9311 gimplify_assign (addr, t, pre_p);
9312
9313 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9314
9315 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9316
9317
9318 /* ... Otherwise out of the overflow area. */
9319
9320 t = ovf;
9321 if (size < UNITS_PER_LONG)
9322 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9323
9324 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9325
9326 gimplify_assign (addr, t, pre_p);
9327
9328 t = fold_build_pointer_plus_hwi (t, size);
9329 gimplify_assign (ovf, t, pre_p);
9330
9331 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9332
9333
9334 /* Increment register save count. */
9335
9336 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9337 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9338 gimplify_and_add (u, pre_p);
9339
9340 if (indirect_p)
9341 {
9342 t = build_pointer_type_for_mode (build_pointer_type (type),
9343 ptr_mode, true);
9344 addr = fold_convert (t, addr);
9345 addr = build_va_arg_indirect_ref (addr);
9346 }
9347 else
9348 {
9349 t = build_pointer_type_for_mode (type, ptr_mode, true);
9350 addr = fold_convert (t, addr);
9351 }
9352
9353 return build_va_arg_indirect_ref (addr);
9354 }
9355
9356 /* Output assembly code for the trampoline template to
9357 stdio stream FILE.
9358
9359 On S/390, we use gpr 1 internally in the trampoline code;
9360 gpr 0 is used to hold the static chain. */
9361
9362 static void
9363 s390_asm_trampoline_template (FILE *file)
9364 {
9365 rtx op[2];
9366 op[0] = gen_rtx_REG (Pmode, 0);
9367 op[1] = gen_rtx_REG (Pmode, 1);
9368
9369 if (TARGET_64BIT)
9370 {
9371 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9372 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
9373 output_asm_insn ("br\t%1", op); /* 2 byte */
9374 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
9375 }
9376 else
9377 {
9378 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
9379 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
9380 output_asm_insn ("br\t%1", op); /* 2 byte */
9381 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
9382 }
9383 }
9384
9385 /* Emit RTL insns to initialize the variable parts of a trampoline.
9386 FNADDR is an RTX for the address of the function's pure code.
9387 CXT is an RTX for the static chain value for the function. */
9388
9389 static void
9390 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9391 {
9392 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
9393 rtx mem;
9394
9395 emit_block_move (m_tramp, assemble_trampoline_template (),
9396 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
9397
9398 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
9399 emit_move_insn (mem, cxt);
9400 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
9401 emit_move_insn (mem, fnaddr);
9402 }
9403
9404 /* Output assembler code to FILE to increment profiler label # LABELNO
9405 for profiling a function entry. */
9406
9407 void
9408 s390_function_profiler (FILE *file, int labelno)
9409 {
9410 rtx op[7];
9411
9412 char label[128];
9413 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
9414
9415 fprintf (file, "# function profiler \n");
9416
9417 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
9418 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
9419 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
9420
9421 op[2] = gen_rtx_REG (Pmode, 1);
9422 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
9423 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
9424
9425 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
9426 if (flag_pic)
9427 {
9428 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
9429 op[4] = gen_rtx_CONST (Pmode, op[4]);
9430 }
9431
9432 if (TARGET_64BIT)
9433 {
9434 output_asm_insn ("stg\t%0,%1", op);
9435 output_asm_insn ("larl\t%2,%3", op);
9436 output_asm_insn ("brasl\t%0,%4", op);
9437 output_asm_insn ("lg\t%0,%1", op);
9438 }
9439 else if (!flag_pic)
9440 {
9441 op[6] = gen_label_rtx ();
9442
9443 output_asm_insn ("st\t%0,%1", op);
9444 output_asm_insn ("bras\t%2,%l6", op);
9445 output_asm_insn (".long\t%4", op);
9446 output_asm_insn (".long\t%3", op);
9447 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9448 output_asm_insn ("l\t%0,0(%2)", op);
9449 output_asm_insn ("l\t%2,4(%2)", op);
9450 output_asm_insn ("basr\t%0,%0", op);
9451 output_asm_insn ("l\t%0,%1", op);
9452 }
9453 else
9454 {
9455 op[5] = gen_label_rtx ();
9456 op[6] = gen_label_rtx ();
9457
9458 output_asm_insn ("st\t%0,%1", op);
9459 output_asm_insn ("bras\t%2,%l6", op);
9460 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
9461 output_asm_insn (".long\t%4-%l5", op);
9462 output_asm_insn (".long\t%3-%l5", op);
9463 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
9464 output_asm_insn ("lr\t%0,%2", op);
9465 output_asm_insn ("a\t%0,0(%2)", op);
9466 output_asm_insn ("a\t%2,4(%2)", op);
9467 output_asm_insn ("basr\t%0,%0", op);
9468 output_asm_insn ("l\t%0,%1", op);
9469 }
9470 }
9471
9472 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
9473 into its SYMBOL_REF_FLAGS. */
9474
9475 static void
9476 s390_encode_section_info (tree decl, rtx rtl, int first)
9477 {
9478 default_encode_section_info (decl, rtl, first);
9479
9480 if (TREE_CODE (decl) == VAR_DECL)
9481 {
9482 /* If a variable has a forced alignment to < 2 bytes, mark it
9483 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
9484 operand. */
9485 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
9486 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
9487 if (!DECL_SIZE (decl)
9488 || !DECL_ALIGN (decl)
9489 || !host_integerp (DECL_SIZE (decl), 0)
9490 || (DECL_ALIGN (decl) <= 64
9491 && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
9492 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9493 }
9494
9495 /* Literal pool references don't have a decl so they are handled
9496 differently here. We rely on the information in the MEM_ALIGN
9497 entry to decide upon natural alignment. */
9498 if (MEM_P (rtl)
9499 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
9500 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
9501 && (MEM_ALIGN (rtl) == 0
9502 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
9503 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
9504 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
9505 }
9506
9507 /* Output thunk to FILE that implements a C++ virtual function call (with
9508 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
9509 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
9510 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
9511 relative to the resulting this pointer. */
9512
9513 static void
9514 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
9515 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9516 tree function)
9517 {
9518 rtx op[10];
9519 int nonlocal = 0;
9520
9521 /* Make sure unwind info is emitted for the thunk if needed. */
9522 final_start_function (emit_barrier (), file, 1);
9523
9524 /* Operand 0 is the target function. */
9525 op[0] = XEXP (DECL_RTL (function), 0);
9526 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
9527 {
9528 nonlocal = 1;
9529 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
9530 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
9531 op[0] = gen_rtx_CONST (Pmode, op[0]);
9532 }
9533
9534 /* Operand 1 is the 'this' pointer. */
9535 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9536 op[1] = gen_rtx_REG (Pmode, 3);
9537 else
9538 op[1] = gen_rtx_REG (Pmode, 2);
9539
9540 /* Operand 2 is the delta. */
9541 op[2] = GEN_INT (delta);
9542
9543 /* Operand 3 is the vcall_offset. */
9544 op[3] = GEN_INT (vcall_offset);
9545
9546 /* Operand 4 is the temporary register. */
9547 op[4] = gen_rtx_REG (Pmode, 1);
9548
9549 /* Operands 5 to 8 can be used as labels. */
9550 op[5] = NULL_RTX;
9551 op[6] = NULL_RTX;
9552 op[7] = NULL_RTX;
9553 op[8] = NULL_RTX;
9554
9555 /* Operand 9 can be used for temporary register. */
9556 op[9] = NULL_RTX;
9557
9558 /* Generate code. */
9559 if (TARGET_64BIT)
9560 {
9561 /* Setup literal pool pointer if required. */
9562 if ((!DISP_IN_RANGE (delta)
9563 && !CONST_OK_FOR_K (delta)
9564 && !CONST_OK_FOR_Os (delta))
9565 || (!DISP_IN_RANGE (vcall_offset)
9566 && !CONST_OK_FOR_K (vcall_offset)
9567 && !CONST_OK_FOR_Os (vcall_offset)))
9568 {
9569 op[5] = gen_label_rtx ();
9570 output_asm_insn ("larl\t%4,%5", op);
9571 }
9572
9573 /* Add DELTA to this pointer. */
9574 if (delta)
9575 {
9576 if (CONST_OK_FOR_J (delta))
9577 output_asm_insn ("la\t%1,%2(%1)", op);
9578 else if (DISP_IN_RANGE (delta))
9579 output_asm_insn ("lay\t%1,%2(%1)", op);
9580 else if (CONST_OK_FOR_K (delta))
9581 output_asm_insn ("aghi\t%1,%2", op);
9582 else if (CONST_OK_FOR_Os (delta))
9583 output_asm_insn ("agfi\t%1,%2", op);
9584 else
9585 {
9586 op[6] = gen_label_rtx ();
9587 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
9588 }
9589 }
9590
9591 /* Perform vcall adjustment. */
9592 if (vcall_offset)
9593 {
9594 if (DISP_IN_RANGE (vcall_offset))
9595 {
9596 output_asm_insn ("lg\t%4,0(%1)", op);
9597 output_asm_insn ("ag\t%1,%3(%4)", op);
9598 }
9599 else if (CONST_OK_FOR_K (vcall_offset))
9600 {
9601 output_asm_insn ("lghi\t%4,%3", op);
9602 output_asm_insn ("ag\t%4,0(%1)", op);
9603 output_asm_insn ("ag\t%1,0(%4)", op);
9604 }
9605 else if (CONST_OK_FOR_Os (vcall_offset))
9606 {
9607 output_asm_insn ("lgfi\t%4,%3", op);
9608 output_asm_insn ("ag\t%4,0(%1)", op);
9609 output_asm_insn ("ag\t%1,0(%4)", op);
9610 }
9611 else
9612 {
9613 op[7] = gen_label_rtx ();
9614 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
9615 output_asm_insn ("ag\t%4,0(%1)", op);
9616 output_asm_insn ("ag\t%1,0(%4)", op);
9617 }
9618 }
9619
9620 /* Jump to target. */
9621 output_asm_insn ("jg\t%0", op);
9622
9623 /* Output literal pool if required. */
9624 if (op[5])
9625 {
9626 output_asm_insn (".align\t4", op);
9627 targetm.asm_out.internal_label (file, "L",
9628 CODE_LABEL_NUMBER (op[5]));
9629 }
9630 if (op[6])
9631 {
9632 targetm.asm_out.internal_label (file, "L",
9633 CODE_LABEL_NUMBER (op[6]));
9634 output_asm_insn (".long\t%2", op);
9635 }
9636 if (op[7])
9637 {
9638 targetm.asm_out.internal_label (file, "L",
9639 CODE_LABEL_NUMBER (op[7]));
9640 output_asm_insn (".long\t%3", op);
9641 }
9642 }
9643 else
9644 {
9645 /* Setup base pointer if required. */
9646 if (!vcall_offset
9647 || (!DISP_IN_RANGE (delta)
9648 && !CONST_OK_FOR_K (delta)
9649 && !CONST_OK_FOR_Os (delta))
9650 || (!DISP_IN_RANGE (delta)
9651 && !CONST_OK_FOR_K (vcall_offset)
9652 && !CONST_OK_FOR_Os (vcall_offset)))
9653 {
9654 op[5] = gen_label_rtx ();
9655 output_asm_insn ("basr\t%4,0", op);
9656 targetm.asm_out.internal_label (file, "L",
9657 CODE_LABEL_NUMBER (op[5]));
9658 }
9659
9660 /* Add DELTA to this pointer. */
9661 if (delta)
9662 {
9663 if (CONST_OK_FOR_J (delta))
9664 output_asm_insn ("la\t%1,%2(%1)", op);
9665 else if (DISP_IN_RANGE (delta))
9666 output_asm_insn ("lay\t%1,%2(%1)", op);
9667 else if (CONST_OK_FOR_K (delta))
9668 output_asm_insn ("ahi\t%1,%2", op);
9669 else if (CONST_OK_FOR_Os (delta))
9670 output_asm_insn ("afi\t%1,%2", op);
9671 else
9672 {
9673 op[6] = gen_label_rtx ();
9674 output_asm_insn ("a\t%1,%6-%5(%4)", op);
9675 }
9676 }
9677
9678 /* Perform vcall adjustment. */
9679 if (vcall_offset)
9680 {
9681 if (CONST_OK_FOR_J (vcall_offset))
9682 {
9683 output_asm_insn ("l\t%4,0(%1)", op);
9684 output_asm_insn ("a\t%1,%3(%4)", op);
9685 }
9686 else if (DISP_IN_RANGE (vcall_offset))
9687 {
9688 output_asm_insn ("l\t%4,0(%1)", op);
9689 output_asm_insn ("ay\t%1,%3(%4)", op);
9690 }
9691 else if (CONST_OK_FOR_K (vcall_offset))
9692 {
9693 output_asm_insn ("lhi\t%4,%3", op);
9694 output_asm_insn ("a\t%4,0(%1)", op);
9695 output_asm_insn ("a\t%1,0(%4)", op);
9696 }
9697 else if (CONST_OK_FOR_Os (vcall_offset))
9698 {
9699 output_asm_insn ("iilf\t%4,%3", op);
9700 output_asm_insn ("a\t%4,0(%1)", op);
9701 output_asm_insn ("a\t%1,0(%4)", op);
9702 }
9703 else
9704 {
9705 op[7] = gen_label_rtx ();
9706 output_asm_insn ("l\t%4,%7-%5(%4)", op);
9707 output_asm_insn ("a\t%4,0(%1)", op);
9708 output_asm_insn ("a\t%1,0(%4)", op);
9709 }
9710
9711 /* We had to clobber the base pointer register.
9712 Re-setup the base pointer (with a different base). */
9713 op[5] = gen_label_rtx ();
9714 output_asm_insn ("basr\t%4,0", op);
9715 targetm.asm_out.internal_label (file, "L",
9716 CODE_LABEL_NUMBER (op[5]));
9717 }
9718
9719 /* Jump to target. */
9720 op[8] = gen_label_rtx ();
9721
9722 if (!flag_pic)
9723 output_asm_insn ("l\t%4,%8-%5(%4)", op);
9724 else if (!nonlocal)
9725 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9726 /* We cannot call through .plt, since .plt requires %r12 loaded. */
9727 else if (flag_pic == 1)
9728 {
9729 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9730 output_asm_insn ("l\t%4,%0(%4)", op);
9731 }
9732 else if (flag_pic == 2)
9733 {
9734 op[9] = gen_rtx_REG (Pmode, 0);
9735 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
9736 output_asm_insn ("a\t%4,%8-%5(%4)", op);
9737 output_asm_insn ("ar\t%4,%9", op);
9738 output_asm_insn ("l\t%4,0(%4)", op);
9739 }
9740
9741 output_asm_insn ("br\t%4", op);
9742
9743 /* Output literal pool. */
9744 output_asm_insn (".align\t4", op);
9745
9746 if (nonlocal && flag_pic == 2)
9747 output_asm_insn (".long\t%0", op);
9748 if (nonlocal)
9749 {
9750 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9751 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
9752 }
9753
9754 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
9755 if (!flag_pic)
9756 output_asm_insn (".long\t%0", op);
9757 else
9758 output_asm_insn (".long\t%0-%5", op);
9759
9760 if (op[6])
9761 {
9762 targetm.asm_out.internal_label (file, "L",
9763 CODE_LABEL_NUMBER (op[6]));
9764 output_asm_insn (".long\t%2", op);
9765 }
9766 if (op[7])
9767 {
9768 targetm.asm_out.internal_label (file, "L",
9769 CODE_LABEL_NUMBER (op[7]));
9770 output_asm_insn (".long\t%3", op);
9771 }
9772 }
9773 final_end_function ();
9774 }
9775
9776 static bool
9777 s390_valid_pointer_mode (enum machine_mode mode)
9778 {
9779 return (mode == SImode || (TARGET_64BIT && mode == DImode));
9780 }
9781
9782 /* Checks whether the given CALL_EXPR would use a caller
9783 saved register. This is used to decide whether sibling call
9784 optimization could be performed on the respective function
9785 call. */
9786
9787 static bool
9788 s390_call_saved_register_used (tree call_expr)
9789 {
9790 CUMULATIVE_ARGS cum_v;
9791 cumulative_args_t cum;
9792 tree parameter;
9793 enum machine_mode mode;
9794 tree type;
9795 rtx parm_rtx;
9796 int reg, i;
9797
9798 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
9799 cum = pack_cumulative_args (&cum_v);
9800
9801 for (i = 0; i < call_expr_nargs (call_expr); i++)
9802 {
9803 parameter = CALL_EXPR_ARG (call_expr, i);
9804 gcc_assert (parameter);
9805
9806 /* For an undeclared variable passed as parameter we will get
9807 an ERROR_MARK node here. */
9808 if (TREE_CODE (parameter) == ERROR_MARK)
9809 return true;
9810
9811 type = TREE_TYPE (parameter);
9812 gcc_assert (type);
9813
9814 mode = TYPE_MODE (type);
9815 gcc_assert (mode);
9816
9817 if (pass_by_reference (&cum_v, mode, type, true))
9818 {
9819 mode = Pmode;
9820 type = build_pointer_type (type);
9821 }
9822
9823 parm_rtx = s390_function_arg (cum, mode, type, 0);
9824
9825 s390_function_arg_advance (cum, mode, type, 0);
9826
9827 if (!parm_rtx)
9828 continue;
9829
9830 if (REG_P (parm_rtx))
9831 {
9832 for (reg = 0;
9833 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
9834 reg++)
9835 if (!call_used_regs[reg + REGNO (parm_rtx)])
9836 return true;
9837 }
9838
9839 if (GET_CODE (parm_rtx) == PARALLEL)
9840 {
9841 int i;
9842
9843 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
9844 {
9845 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
9846
9847 gcc_assert (REG_P (r));
9848
9849 for (reg = 0;
9850 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
9851 reg++)
9852 if (!call_used_regs[reg + REGNO (r)])
9853 return true;
9854 }
9855 }
9856
9857 }
9858 return false;
9859 }
9860
9861 /* Return true if the given call expression can be
9862 turned into a sibling call.
9863 DECL holds the declaration of the function to be called whereas
9864 EXP is the call expression itself. */
9865
9866 static bool
9867 s390_function_ok_for_sibcall (tree decl, tree exp)
9868 {
9869 /* The TPF epilogue uses register 1. */
9870 if (TARGET_TPF_PROFILING)
9871 return false;
9872
9873 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
9874 which would have to be restored before the sibcall. */
9875 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
9876 return false;
9877
9878 /* Register 6 on s390 is available as an argument register but unfortunately
9879 "caller saved". This makes functions needing this register for arguments
9880 not suitable for sibcalls. */
9881 return !s390_call_saved_register_used (exp);
9882 }
9883
9884 /* Return the fixed registers used for condition codes. */
9885
9886 static bool
9887 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9888 {
9889 *p1 = CC_REGNUM;
9890 *p2 = INVALID_REGNUM;
9891
9892 return true;
9893 }
9894
9895 /* This function is used by the call expanders of the machine description.
9896 It emits the call insn itself together with the necessary operations
9897 to adjust the target address and returns the emitted insn.
9898 ADDR_LOCATION is the target address rtx
9899 TLS_CALL the location of the thread-local symbol
9900 RESULT_REG the register where the result of the call should be stored
9901 RETADDR_REG the register where the return address should be stored
9902 If this parameter is NULL_RTX the call is considered
9903 to be a sibling call. */
9904
9905 rtx
9906 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
9907 rtx retaddr_reg)
9908 {
9909 bool plt_call = false;
9910 rtx insn;
9911 rtx call;
9912 rtx clobber;
9913 rtvec vec;
9914
9915 /* Direct function calls need special treatment. */
9916 if (GET_CODE (addr_location) == SYMBOL_REF)
9917 {
9918 /* When calling a global routine in PIC mode, we must
9919 replace the symbol itself with the PLT stub. */
9920 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
9921 {
9922 if (retaddr_reg != NULL_RTX)
9923 {
9924 addr_location = gen_rtx_UNSPEC (Pmode,
9925 gen_rtvec (1, addr_location),
9926 UNSPEC_PLT);
9927 addr_location = gen_rtx_CONST (Pmode, addr_location);
9928 plt_call = true;
9929 }
9930 else
9931 /* For -fpic code the PLT entries might use r12 which is
9932 call-saved. Therefore we cannot do a sibcall when
9933 calling directly using a symbol ref. When reaching
9934 this point we decided (in s390_function_ok_for_sibcall)
9935 to do a sibcall for a function pointer but one of the
9936 optimizers was able to get rid of the function pointer
9937 by propagating the symbol ref into the call. This
9938 optimization is illegal for S/390 so we turn the direct
9939 call into a indirect call again. */
9940 addr_location = force_reg (Pmode, addr_location);
9941 }
9942
9943 /* Unless we can use the bras(l) insn, force the
9944 routine address into a register. */
9945 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
9946 {
9947 if (flag_pic)
9948 addr_location = legitimize_pic_address (addr_location, 0);
9949 else
9950 addr_location = force_reg (Pmode, addr_location);
9951 }
9952 }
9953
9954 /* If it is already an indirect call or the code above moved the
9955 SYMBOL_REF to somewhere else make sure the address can be found in
9956 register 1. */
9957 if (retaddr_reg == NULL_RTX
9958 && GET_CODE (addr_location) != SYMBOL_REF
9959 && !plt_call)
9960 {
9961 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
9962 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
9963 }
9964
9965 addr_location = gen_rtx_MEM (QImode, addr_location);
9966 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
9967
9968 if (result_reg != NULL_RTX)
9969 call = gen_rtx_SET (VOIDmode, result_reg, call);
9970
9971 if (retaddr_reg != NULL_RTX)
9972 {
9973 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
9974
9975 if (tls_call != NULL_RTX)
9976 vec = gen_rtvec (3, call, clobber,
9977 gen_rtx_USE (VOIDmode, tls_call));
9978 else
9979 vec = gen_rtvec (2, call, clobber);
9980
9981 call = gen_rtx_PARALLEL (VOIDmode, vec);
9982 }
9983
9984 insn = emit_call_insn (call);
9985
9986 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
9987 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
9988 {
9989 /* s390_function_ok_for_sibcall should
9990 have denied sibcalls in this case. */
9991 gcc_assert (retaddr_reg != NULL_RTX);
9992 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
9993 }
9994 return insn;
9995 }
9996
9997 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
9998
9999 static void
10000 s390_conditional_register_usage (void)
10001 {
10002 int i;
10003
10004 if (flag_pic)
10005 {
10006 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10007 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10008 }
10009 if (TARGET_CPU_ZARCH)
10010 {
10011 fixed_regs[BASE_REGNUM] = 0;
10012 call_used_regs[BASE_REGNUM] = 0;
10013 fixed_regs[RETURN_REGNUM] = 0;
10014 call_used_regs[RETURN_REGNUM] = 0;
10015 }
10016 if (TARGET_64BIT)
10017 {
10018 for (i = 24; i < 32; i++)
10019 call_used_regs[i] = call_really_used_regs[i] = 0;
10020 }
10021 else
10022 {
10023 for (i = 18; i < 20; i++)
10024 call_used_regs[i] = call_really_used_regs[i] = 0;
10025 }
10026
10027 if (TARGET_SOFT_FLOAT)
10028 {
10029 for (i = 16; i < 32; i++)
10030 call_used_regs[i] = fixed_regs[i] = 1;
10031 }
10032 }
10033
10034 /* Corresponding function to eh_return expander. */
10035
10036 static GTY(()) rtx s390_tpf_eh_return_symbol;
10037 void
10038 s390_emit_tpf_eh_return (rtx target)
10039 {
10040 rtx insn, reg;
10041
10042 if (!s390_tpf_eh_return_symbol)
10043 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10044
10045 reg = gen_rtx_REG (Pmode, 2);
10046
10047 emit_move_insn (reg, target);
10048 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10049 gen_rtx_REG (Pmode, RETURN_REGNUM));
10050 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10051
10052 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10053 }
10054
10055 /* Rework the prologue/epilogue to avoid saving/restoring
10056 registers unnecessarily. */
10057
10058 static void
10059 s390_optimize_prologue (void)
10060 {
10061 rtx insn, new_insn, next_insn;
10062
10063 /* Do a final recompute of the frame-related data. */
10064
10065 s390_update_frame_layout ();
10066
10067 /* If all special registers are in fact used, there's nothing we
10068 can do, so no point in walking the insn list. */
10069
10070 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10071 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10072 && (TARGET_CPU_ZARCH
10073 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10074 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10075 return;
10076
10077 /* Search for prologue/epilogue insns and replace them. */
10078
10079 for (insn = get_insns (); insn; insn = next_insn)
10080 {
10081 int first, last, off;
10082 rtx set, base, offset;
10083
10084 next_insn = NEXT_INSN (insn);
10085
10086 if (GET_CODE (insn) != INSN)
10087 continue;
10088
10089 if (GET_CODE (PATTERN (insn)) == PARALLEL
10090 && store_multiple_operation (PATTERN (insn), VOIDmode))
10091 {
10092 set = XVECEXP (PATTERN (insn), 0, 0);
10093 first = REGNO (SET_SRC (set));
10094 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10095 offset = const0_rtx;
10096 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10097 off = INTVAL (offset);
10098
10099 if (GET_CODE (base) != REG || off < 0)
10100 continue;
10101 if (cfun_frame_layout.first_save_gpr != -1
10102 && (cfun_frame_layout.first_save_gpr < first
10103 || cfun_frame_layout.last_save_gpr > last))
10104 continue;
10105 if (REGNO (base) != STACK_POINTER_REGNUM
10106 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10107 continue;
10108 if (first > BASE_REGNUM || last < BASE_REGNUM)
10109 continue;
10110
10111 if (cfun_frame_layout.first_save_gpr != -1)
10112 {
10113 new_insn = save_gprs (base,
10114 off + (cfun_frame_layout.first_save_gpr
10115 - first) * UNITS_PER_LONG,
10116 cfun_frame_layout.first_save_gpr,
10117 cfun_frame_layout.last_save_gpr);
10118 new_insn = emit_insn_before (new_insn, insn);
10119 INSN_ADDRESSES_NEW (new_insn, -1);
10120 }
10121
10122 remove_insn (insn);
10123 continue;
10124 }
10125
10126 if (cfun_frame_layout.first_save_gpr == -1
10127 && GET_CODE (PATTERN (insn)) == SET
10128 && GET_CODE (SET_SRC (PATTERN (insn))) == REG
10129 && (REGNO (SET_SRC (PATTERN (insn))) == BASE_REGNUM
10130 || (!TARGET_CPU_ZARCH
10131 && REGNO (SET_SRC (PATTERN (insn))) == RETURN_REGNUM))
10132 && GET_CODE (SET_DEST (PATTERN (insn))) == MEM)
10133 {
10134 set = PATTERN (insn);
10135 first = REGNO (SET_SRC (set));
10136 offset = const0_rtx;
10137 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10138 off = INTVAL (offset);
10139
10140 if (GET_CODE (base) != REG || off < 0)
10141 continue;
10142 if (REGNO (base) != STACK_POINTER_REGNUM
10143 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10144 continue;
10145
10146 remove_insn (insn);
10147 continue;
10148 }
10149
10150 if (GET_CODE (PATTERN (insn)) == PARALLEL
10151 && load_multiple_operation (PATTERN (insn), VOIDmode))
10152 {
10153 set = XVECEXP (PATTERN (insn), 0, 0);
10154 first = REGNO (SET_DEST (set));
10155 last = first + XVECLEN (PATTERN (insn), 0) - 1;
10156 offset = const0_rtx;
10157 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10158 off = INTVAL (offset);
10159
10160 if (GET_CODE (base) != REG || off < 0)
10161 continue;
10162 if (cfun_frame_layout.first_restore_gpr != -1
10163 && (cfun_frame_layout.first_restore_gpr < first
10164 || cfun_frame_layout.last_restore_gpr > last))
10165 continue;
10166 if (REGNO (base) != STACK_POINTER_REGNUM
10167 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10168 continue;
10169 if (first > BASE_REGNUM || last < BASE_REGNUM)
10170 continue;
10171
10172 if (cfun_frame_layout.first_restore_gpr != -1)
10173 {
10174 new_insn = restore_gprs (base,
10175 off + (cfun_frame_layout.first_restore_gpr
10176 - first) * UNITS_PER_LONG,
10177 cfun_frame_layout.first_restore_gpr,
10178 cfun_frame_layout.last_restore_gpr);
10179 new_insn = emit_insn_before (new_insn, insn);
10180 INSN_ADDRESSES_NEW (new_insn, -1);
10181 }
10182
10183 remove_insn (insn);
10184 continue;
10185 }
10186
10187 if (cfun_frame_layout.first_restore_gpr == -1
10188 && GET_CODE (PATTERN (insn)) == SET
10189 && GET_CODE (SET_DEST (PATTERN (insn))) == REG
10190 && (REGNO (SET_DEST (PATTERN (insn))) == BASE_REGNUM
10191 || (!TARGET_CPU_ZARCH
10192 && REGNO (SET_DEST (PATTERN (insn))) == RETURN_REGNUM))
10193 && GET_CODE (SET_SRC (PATTERN (insn))) == MEM)
10194 {
10195 set = PATTERN (insn);
10196 first = REGNO (SET_DEST (set));
10197 offset = const0_rtx;
10198 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10199 off = INTVAL (offset);
10200
10201 if (GET_CODE (base) != REG || off < 0)
10202 continue;
10203 if (REGNO (base) != STACK_POINTER_REGNUM
10204 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10205 continue;
10206
10207 remove_insn (insn);
10208 continue;
10209 }
10210 }
10211 }
10212
10213 /* On z10 and later the dynamic branch prediction must see the
10214 backward jump within a certain windows. If not it falls back to
10215 the static prediction. This function rearranges the loop backward
10216 branch in a way which makes the static prediction always correct.
10217 The function returns true if it added an instruction. */
10218 static bool
10219 s390_fix_long_loop_prediction (rtx insn)
10220 {
10221 rtx set = single_set (insn);
10222 rtx code_label, label_ref, new_label;
10223 rtx uncond_jump;
10224 rtx cur_insn;
10225 rtx tmp;
10226 int distance;
10227
10228 /* This will exclude branch on count and branch on index patterns
10229 since these are correctly statically predicted. */
10230 if (!set
10231 || SET_DEST (set) != pc_rtx
10232 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
10233 return false;
10234
10235 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
10236 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
10237
10238 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
10239
10240 code_label = XEXP (label_ref, 0);
10241
10242 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
10243 || INSN_ADDRESSES (INSN_UID (insn)) == -1
10244 || (INSN_ADDRESSES (INSN_UID (insn))
10245 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
10246 return false;
10247
10248 for (distance = 0, cur_insn = PREV_INSN (insn);
10249 distance < PREDICT_DISTANCE - 6;
10250 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
10251 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
10252 return false;
10253
10254 new_label = gen_label_rtx ();
10255 uncond_jump = emit_jump_insn_after (
10256 gen_rtx_SET (VOIDmode, pc_rtx,
10257 gen_rtx_LABEL_REF (VOIDmode, code_label)),
10258 insn);
10259 emit_label_after (new_label, uncond_jump);
10260
10261 tmp = XEXP (SET_SRC (set), 1);
10262 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
10263 XEXP (SET_SRC (set), 2) = tmp;
10264 INSN_CODE (insn) = -1;
10265
10266 XEXP (label_ref, 0) = new_label;
10267 JUMP_LABEL (insn) = new_label;
10268 JUMP_LABEL (uncond_jump) = code_label;
10269
10270 return true;
10271 }
10272
10273 /* Returns 1 if INSN reads the value of REG for purposes not related
10274 to addressing of memory, and 0 otherwise. */
10275 static int
10276 s390_non_addr_reg_read_p (rtx reg, rtx insn)
10277 {
10278 return reg_referenced_p (reg, PATTERN (insn))
10279 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
10280 }
10281
10282 /* Starting from INSN find_cond_jump looks downwards in the insn
10283 stream for a single jump insn which is the last user of the
10284 condition code set in INSN. */
10285 static rtx
10286 find_cond_jump (rtx insn)
10287 {
10288 for (; insn; insn = NEXT_INSN (insn))
10289 {
10290 rtx ite, cc;
10291
10292 if (LABEL_P (insn))
10293 break;
10294
10295 if (!JUMP_P (insn))
10296 {
10297 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
10298 break;
10299 continue;
10300 }
10301
10302 /* This will be triggered by a return. */
10303 if (GET_CODE (PATTERN (insn)) != SET)
10304 break;
10305
10306 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
10307 ite = SET_SRC (PATTERN (insn));
10308
10309 if (GET_CODE (ite) != IF_THEN_ELSE)
10310 break;
10311
10312 cc = XEXP (XEXP (ite, 0), 0);
10313 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
10314 break;
10315
10316 if (find_reg_note (insn, REG_DEAD, cc))
10317 return insn;
10318 break;
10319 }
10320
10321 return NULL_RTX;
10322 }
10323
10324 /* Swap the condition in COND and the operands in OP0 and OP1 so that
10325 the semantics does not change. If NULL_RTX is passed as COND the
10326 function tries to find the conditional jump starting with INSN. */
10327 static void
10328 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
10329 {
10330 rtx tmp = *op0;
10331
10332 if (cond == NULL_RTX)
10333 {
10334 rtx jump = find_cond_jump (NEXT_INSN (insn));
10335 jump = jump ? single_set (jump) : NULL_RTX;
10336
10337 if (jump == NULL_RTX)
10338 return;
10339
10340 cond = XEXP (XEXP (jump, 1), 0);
10341 }
10342
10343 *op0 = *op1;
10344 *op1 = tmp;
10345 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
10346 }
10347
10348 /* On z10, instructions of the compare-and-branch family have the
10349 property to access the register occurring as second operand with
10350 its bits complemented. If such a compare is grouped with a second
10351 instruction that accesses the same register non-complemented, and
10352 if that register's value is delivered via a bypass, then the
10353 pipeline recycles, thereby causing significant performance decline.
10354 This function locates such situations and exchanges the two
10355 operands of the compare. The function return true whenever it
10356 added an insn. */
10357 static bool
10358 s390_z10_optimize_cmp (rtx insn)
10359 {
10360 rtx prev_insn, next_insn;
10361 bool insn_added_p = false;
10362 rtx cond, *op0, *op1;
10363
10364 if (GET_CODE (PATTERN (insn)) == PARALLEL)
10365 {
10366 /* Handle compare and branch and branch on count
10367 instructions. */
10368 rtx pattern = single_set (insn);
10369
10370 if (!pattern
10371 || SET_DEST (pattern) != pc_rtx
10372 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
10373 return false;
10374
10375 cond = XEXP (SET_SRC (pattern), 0);
10376 op0 = &XEXP (cond, 0);
10377 op1 = &XEXP (cond, 1);
10378 }
10379 else if (GET_CODE (PATTERN (insn)) == SET)
10380 {
10381 rtx src, dest;
10382
10383 /* Handle normal compare instructions. */
10384 src = SET_SRC (PATTERN (insn));
10385 dest = SET_DEST (PATTERN (insn));
10386
10387 if (!REG_P (dest)
10388 || !CC_REGNO_P (REGNO (dest))
10389 || GET_CODE (src) != COMPARE)
10390 return false;
10391
10392 /* s390_swap_cmp will try to find the conditional
10393 jump when passing NULL_RTX as condition. */
10394 cond = NULL_RTX;
10395 op0 = &XEXP (src, 0);
10396 op1 = &XEXP (src, 1);
10397 }
10398 else
10399 return false;
10400
10401 if (!REG_P (*op0) || !REG_P (*op1))
10402 return false;
10403
10404 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
10405 return false;
10406
10407 /* Swap the COMPARE arguments and its mask if there is a
10408 conflicting access in the previous insn. */
10409 prev_insn = prev_active_insn (insn);
10410 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10411 && reg_referenced_p (*op1, PATTERN (prev_insn)))
10412 s390_swap_cmp (cond, op0, op1, insn);
10413
10414 /* Check if there is a conflict with the next insn. If there
10415 was no conflict with the previous insn, then swap the
10416 COMPARE arguments and its mask. If we already swapped
10417 the operands, or if swapping them would cause a conflict
10418 with the previous insn, issue a NOP after the COMPARE in
10419 order to separate the two instuctions. */
10420 next_insn = next_active_insn (insn);
10421 if (next_insn != NULL_RTX && INSN_P (next_insn)
10422 && s390_non_addr_reg_read_p (*op1, next_insn))
10423 {
10424 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
10425 && s390_non_addr_reg_read_p (*op0, prev_insn))
10426 {
10427 if (REGNO (*op1) == 0)
10428 emit_insn_after (gen_nop1 (), insn);
10429 else
10430 emit_insn_after (gen_nop (), insn);
10431 insn_added_p = true;
10432 }
10433 else
10434 s390_swap_cmp (cond, op0, op1, insn);
10435 }
10436 return insn_added_p;
10437 }
10438
10439 /* Perform machine-dependent processing. */
10440
10441 static void
10442 s390_reorg (void)
10443 {
10444 bool pool_overflow = false;
10445
10446 /* Make sure all splits have been performed; splits after
10447 machine_dependent_reorg might confuse insn length counts. */
10448 split_all_insns_noflow ();
10449
10450 /* Install the main literal pool and the associated base
10451 register load insns.
10452
10453 In addition, there are two problematic situations we need
10454 to correct:
10455
10456 - the literal pool might be > 4096 bytes in size, so that
10457 some of its elements cannot be directly accessed
10458
10459 - a branch target might be > 64K away from the branch, so that
10460 it is not possible to use a PC-relative instruction.
10461
10462 To fix those, we split the single literal pool into multiple
10463 pool chunks, reloading the pool base register at various
10464 points throughout the function to ensure it always points to
10465 the pool chunk the following code expects, and / or replace
10466 PC-relative branches by absolute branches.
10467
10468 However, the two problems are interdependent: splitting the
10469 literal pool can move a branch further away from its target,
10470 causing the 64K limit to overflow, and on the other hand,
10471 replacing a PC-relative branch by an absolute branch means
10472 we need to put the branch target address into the literal
10473 pool, possibly causing it to overflow.
10474
10475 So, we loop trying to fix up both problems until we manage
10476 to satisfy both conditions at the same time. Note that the
10477 loop is guaranteed to terminate as every pass of the loop
10478 strictly decreases the total number of PC-relative branches
10479 in the function. (This is not completely true as there
10480 might be branch-over-pool insns introduced by chunkify_start.
10481 Those never need to be split however.) */
10482
10483 for (;;)
10484 {
10485 struct constant_pool *pool = NULL;
10486
10487 /* Collect the literal pool. */
10488 if (!pool_overflow)
10489 {
10490 pool = s390_mainpool_start ();
10491 if (!pool)
10492 pool_overflow = true;
10493 }
10494
10495 /* If literal pool overflowed, start to chunkify it. */
10496 if (pool_overflow)
10497 pool = s390_chunkify_start ();
10498
10499 /* Split out-of-range branches. If this has created new
10500 literal pool entries, cancel current chunk list and
10501 recompute it. zSeries machines have large branch
10502 instructions, so we never need to split a branch. */
10503 if (!TARGET_CPU_ZARCH && s390_split_branches ())
10504 {
10505 if (pool_overflow)
10506 s390_chunkify_cancel (pool);
10507 else
10508 s390_mainpool_cancel (pool);
10509
10510 continue;
10511 }
10512
10513 /* If we made it up to here, both conditions are satisfied.
10514 Finish up literal pool related changes. */
10515 if (pool_overflow)
10516 s390_chunkify_finish (pool);
10517 else
10518 s390_mainpool_finish (pool);
10519
10520 /* We're done splitting branches. */
10521 cfun->machine->split_branches_pending_p = false;
10522 break;
10523 }
10524
10525 /* Generate out-of-pool execute target insns. */
10526 if (TARGET_CPU_ZARCH)
10527 {
10528 rtx insn, label, target;
10529
10530 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10531 {
10532 label = s390_execute_label (insn);
10533 if (!label)
10534 continue;
10535
10536 gcc_assert (label != const0_rtx);
10537
10538 target = emit_label (XEXP (label, 0));
10539 INSN_ADDRESSES_NEW (target, -1);
10540
10541 target = emit_insn (s390_execute_target (insn));
10542 INSN_ADDRESSES_NEW (target, -1);
10543 }
10544 }
10545
10546 /* Try to optimize prologue and epilogue further. */
10547 s390_optimize_prologue ();
10548
10549 /* Walk over the insns and do some >=z10 specific changes. */
10550 if (s390_tune == PROCESSOR_2097_Z10
10551 || s390_tune == PROCESSOR_2817_Z196
10552 || s390_tune == PROCESSOR_2827_ZEC12)
10553 {
10554 rtx insn;
10555 bool insn_added_p = false;
10556
10557 /* The insn lengths and addresses have to be up to date for the
10558 following manipulations. */
10559 shorten_branches (get_insns ());
10560
10561 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10562 {
10563 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10564 continue;
10565
10566 if (JUMP_P (insn))
10567 insn_added_p |= s390_fix_long_loop_prediction (insn);
10568
10569 if ((GET_CODE (PATTERN (insn)) == PARALLEL
10570 || GET_CODE (PATTERN (insn)) == SET)
10571 && s390_tune == PROCESSOR_2097_Z10)
10572 insn_added_p |= s390_z10_optimize_cmp (insn);
10573 }
10574
10575 /* Adjust branches if we added new instructions. */
10576 if (insn_added_p)
10577 shorten_branches (get_insns ());
10578 }
10579 }
10580
10581 /* Return true if INSN is a fp load insn writing register REGNO. */
10582 static inline bool
10583 s390_fpload_toreg (rtx insn, unsigned int regno)
10584 {
10585 rtx set;
10586 enum attr_type flag = s390_safe_attr_type (insn);
10587
10588 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
10589 return false;
10590
10591 set = single_set (insn);
10592
10593 if (set == NULL_RTX)
10594 return false;
10595
10596 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
10597 return false;
10598
10599 if (REGNO (SET_DEST (set)) != regno)
10600 return false;
10601
10602 return true;
10603 }
10604
10605 /* This value describes the distance to be avoided between an
10606 aritmetic fp instruction and an fp load writing the same register.
10607 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
10608 fine but the exact value has to be avoided. Otherwise the FP
10609 pipeline will throw an exception causing a major penalty. */
10610 #define Z10_EARLYLOAD_DISTANCE 7
10611
10612 /* Rearrange the ready list in order to avoid the situation described
10613 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
10614 moved to the very end of the ready list. */
10615 static void
10616 s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
10617 {
10618 unsigned int regno;
10619 int nready = *nready_p;
10620 rtx tmp;
10621 int i;
10622 rtx insn;
10623 rtx set;
10624 enum attr_type flag;
10625 int distance;
10626
10627 /* Skip DISTANCE - 1 active insns. */
10628 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
10629 distance > 0 && insn != NULL_RTX;
10630 distance--, insn = prev_active_insn (insn))
10631 if (CALL_P (insn) || JUMP_P (insn))
10632 return;
10633
10634 if (insn == NULL_RTX)
10635 return;
10636
10637 set = single_set (insn);
10638
10639 if (set == NULL_RTX || !REG_P (SET_DEST (set))
10640 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
10641 return;
10642
10643 flag = s390_safe_attr_type (insn);
10644
10645 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
10646 return;
10647
10648 regno = REGNO (SET_DEST (set));
10649 i = nready - 1;
10650
10651 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
10652 i--;
10653
10654 if (!i)
10655 return;
10656
10657 tmp = ready[i];
10658 memmove (&ready[1], &ready[0], sizeof (rtx) * i);
10659 ready[0] = tmp;
10660 }
10661
10662
10663 /* The s390_sched_state variable tracks the state of the current or
10664 the last instruction group.
10665
10666 0,1,2 number of instructions scheduled in the current group
10667 3 the last group is complete - normal insns
10668 4 the last group was a cracked/expanded insn */
10669
10670 static int s390_sched_state;
10671
10672 #define S390_OOO_SCHED_STATE_NORMAL 3
10673 #define S390_OOO_SCHED_STATE_CRACKED 4
10674
10675 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
10676 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
10677 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
10678 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
10679
10680 static unsigned int
10681 s390_get_sched_attrmask (rtx insn)
10682 {
10683 unsigned int mask = 0;
10684
10685 if (get_attr_ooo_cracked (insn))
10686 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
10687 if (get_attr_ooo_expanded (insn))
10688 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
10689 if (get_attr_ooo_endgroup (insn))
10690 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
10691 if (get_attr_ooo_groupalone (insn))
10692 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
10693 return mask;
10694 }
10695
10696 /* Return the scheduling score for INSN. The higher the score the
10697 better. The score is calculated from the OOO scheduling attributes
10698 of INSN and the scheduling state s390_sched_state. */
10699 static int
10700 s390_sched_score (rtx insn)
10701 {
10702 unsigned int mask = s390_get_sched_attrmask (insn);
10703 int score = 0;
10704
10705 switch (s390_sched_state)
10706 {
10707 case 0:
10708 /* Try to put insns into the first slot which would otherwise
10709 break a group. */
10710 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10711 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10712 score += 5;
10713 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10714 score += 10;
10715 case 1:
10716 /* Prefer not cracked insns while trying to put together a
10717 group. */
10718 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10719 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10720 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10721 score += 10;
10722 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
10723 score += 5;
10724 break;
10725 case 2:
10726 /* Prefer not cracked insns while trying to put together a
10727 group. */
10728 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10729 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
10730 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
10731 score += 10;
10732 /* Prefer endgroup insns in the last slot. */
10733 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
10734 score += 10;
10735 break;
10736 case S390_OOO_SCHED_STATE_NORMAL:
10737 /* Prefer not cracked insns if the last was not cracked. */
10738 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
10739 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
10740 score += 5;
10741 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10742 score += 10;
10743 break;
10744 case S390_OOO_SCHED_STATE_CRACKED:
10745 /* Try to keep cracked insns together to prevent them from
10746 interrupting groups. */
10747 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10748 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10749 score += 5;
10750 break;
10751 }
10752 return score;
10753 }
10754
10755 /* This function is called via hook TARGET_SCHED_REORDER before
10756 issueing one insn from list READY which contains *NREADYP entries.
10757 For target z10 it reorders load instructions to avoid early load
10758 conflicts in the floating point pipeline */
10759 static int
10760 s390_sched_reorder (FILE *file, int verbose,
10761 rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
10762 {
10763 if (s390_tune == PROCESSOR_2097_Z10)
10764 if (reload_completed && *nreadyp > 1)
10765 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
10766
10767 if (s390_tune == PROCESSOR_2827_ZEC12
10768 && reload_completed
10769 && *nreadyp > 1)
10770 {
10771 int i;
10772 int last_index = *nreadyp - 1;
10773 int max_index = -1;
10774 int max_score = -1;
10775 rtx tmp;
10776
10777 /* Just move the insn with the highest score to the top (the
10778 end) of the list. A full sort is not needed since a conflict
10779 in the hazard recognition cannot happen. So the top insn in
10780 the ready list will always be taken. */
10781 for (i = last_index; i >= 0; i--)
10782 {
10783 int score;
10784
10785 if (recog_memoized (ready[i]) < 0)
10786 continue;
10787
10788 score = s390_sched_score (ready[i]);
10789 if (score > max_score)
10790 {
10791 max_score = score;
10792 max_index = i;
10793 }
10794 }
10795
10796 if (max_index != -1)
10797 {
10798 if (max_index != last_index)
10799 {
10800 tmp = ready[max_index];
10801 ready[max_index] = ready[last_index];
10802 ready[last_index] = tmp;
10803
10804 if (verbose > 5)
10805 fprintf (file,
10806 "move insn %d to the top of list\n",
10807 INSN_UID (ready[last_index]));
10808 }
10809 else if (verbose > 5)
10810 fprintf (file,
10811 "best insn %d already on top\n",
10812 INSN_UID (ready[last_index]));
10813 }
10814
10815 if (verbose > 5)
10816 {
10817 fprintf (file, "ready list ooo attributes - sched state: %d\n",
10818 s390_sched_state);
10819
10820 for (i = last_index; i >= 0; i--)
10821 {
10822 if (recog_memoized (ready[i]) < 0)
10823 continue;
10824 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
10825 s390_sched_score (ready[i]));
10826 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
10827 PRINT_OOO_ATTR (ooo_cracked);
10828 PRINT_OOO_ATTR (ooo_expanded);
10829 PRINT_OOO_ATTR (ooo_endgroup);
10830 PRINT_OOO_ATTR (ooo_groupalone);
10831 #undef PRINT_OOO_ATTR
10832 fprintf (file, "\n");
10833 }
10834 }
10835 }
10836
10837 return s390_issue_rate ();
10838 }
10839
10840
10841 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
10842 the scheduler has issued INSN. It stores the last issued insn into
10843 last_scheduled_insn in order to make it available for
10844 s390_sched_reorder. */
10845 static int
10846 s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
10847 {
10848 last_scheduled_insn = insn;
10849
10850 if (s390_tune == PROCESSOR_2827_ZEC12
10851 && reload_completed
10852 && recog_memoized (insn) >= 0)
10853 {
10854 unsigned int mask = s390_get_sched_attrmask (insn);
10855
10856 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
10857 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
10858 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
10859 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
10860 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
10861 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10862 else
10863 {
10864 /* Only normal insns are left (mask == 0). */
10865 switch (s390_sched_state)
10866 {
10867 case 0:
10868 case 1:
10869 case 2:
10870 case S390_OOO_SCHED_STATE_NORMAL:
10871 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
10872 s390_sched_state = 1;
10873 else
10874 s390_sched_state++;
10875
10876 break;
10877 case S390_OOO_SCHED_STATE_CRACKED:
10878 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
10879 break;
10880 }
10881 }
10882 if (verbose > 5)
10883 {
10884 fprintf (file, "insn %d: ", INSN_UID (insn));
10885 #define PRINT_OOO_ATTR(ATTR) \
10886 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
10887 PRINT_OOO_ATTR (ooo_cracked);
10888 PRINT_OOO_ATTR (ooo_expanded);
10889 PRINT_OOO_ATTR (ooo_endgroup);
10890 PRINT_OOO_ATTR (ooo_groupalone);
10891 #undef PRINT_OOO_ATTR
10892 fprintf (file, "\n");
10893 fprintf (file, "sched state: %d\n", s390_sched_state);
10894 }
10895 }
10896
10897 if (GET_CODE (PATTERN (insn)) != USE
10898 && GET_CODE (PATTERN (insn)) != CLOBBER)
10899 return more - 1;
10900 else
10901 return more;
10902 }
10903
10904 static void
10905 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
10906 int verbose ATTRIBUTE_UNUSED,
10907 int max_ready ATTRIBUTE_UNUSED)
10908 {
10909 last_scheduled_insn = NULL_RTX;
10910 s390_sched_state = 0;
10911 }
10912
10913 /* This function checks the whole of insn X for memory references. The
10914 function always returns zero because the framework it is called
10915 from would stop recursively analyzing the insn upon a return value
10916 other than zero. The real result of this function is updating
10917 counter variable MEM_COUNT. */
10918 static int
10919 check_dpu (rtx *x, unsigned *mem_count)
10920 {
10921 if (*x != NULL_RTX && MEM_P (*x))
10922 (*mem_count)++;
10923 return 0;
10924 }
10925
10926 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
10927 a new number struct loop *loop should be unrolled if tuned for cpus with
10928 a built-in stride prefetcher.
10929 The loop is analyzed for memory accesses by calling check_dpu for
10930 each rtx of the loop. Depending on the loop_depth and the amount of
10931 memory accesses a new number <=nunroll is returned to improve the
10932 behaviour of the hardware prefetch unit. */
10933 static unsigned
10934 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
10935 {
10936 basic_block *bbs;
10937 rtx insn;
10938 unsigned i;
10939 unsigned mem_count = 0;
10940
10941 if (s390_tune != PROCESSOR_2097_Z10
10942 && s390_tune != PROCESSOR_2817_Z196
10943 && s390_tune != PROCESSOR_2827_ZEC12)
10944 return nunroll;
10945
10946 /* Count the number of memory references within the loop body. */
10947 bbs = get_loop_body (loop);
10948 for (i = 0; i < loop->num_nodes; i++)
10949 {
10950 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
10951 if (INSN_P (insn) && INSN_CODE (insn) != -1)
10952 for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
10953 }
10954 free (bbs);
10955
10956 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
10957 if (mem_count == 0)
10958 return nunroll;
10959
10960 switch (loop_depth(loop))
10961 {
10962 case 1:
10963 return MIN (nunroll, 28 / mem_count);
10964 case 2:
10965 return MIN (nunroll, 22 / mem_count);
10966 default:
10967 return MIN (nunroll, 16 / mem_count);
10968 }
10969 }
10970
10971 /* Initialize GCC target structure. */
10972
10973 #undef TARGET_ASM_ALIGNED_HI_OP
10974 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
10975 #undef TARGET_ASM_ALIGNED_DI_OP
10976 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
10977 #undef TARGET_ASM_INTEGER
10978 #define TARGET_ASM_INTEGER s390_assemble_integer
10979
10980 #undef TARGET_ASM_OPEN_PAREN
10981 #define TARGET_ASM_OPEN_PAREN ""
10982
10983 #undef TARGET_ASM_CLOSE_PAREN
10984 #define TARGET_ASM_CLOSE_PAREN ""
10985
10986 #undef TARGET_OPTION_OVERRIDE
10987 #define TARGET_OPTION_OVERRIDE s390_option_override
10988
10989 #undef TARGET_ENCODE_SECTION_INFO
10990 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
10991
10992 #undef TARGET_SCALAR_MODE_SUPPORTED_P
10993 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
10994
10995 #ifdef HAVE_AS_TLS
10996 #undef TARGET_HAVE_TLS
10997 #define TARGET_HAVE_TLS true
10998 #endif
10999 #undef TARGET_CANNOT_FORCE_CONST_MEM
11000 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
11001
11002 #undef TARGET_DELEGITIMIZE_ADDRESS
11003 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
11004
11005 #undef TARGET_LEGITIMIZE_ADDRESS
11006 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
11007
11008 #undef TARGET_RETURN_IN_MEMORY
11009 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
11010
11011 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
11012 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
11013
11014 #undef TARGET_ASM_OUTPUT_MI_THUNK
11015 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
11016 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11017 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11018
11019 #undef TARGET_SCHED_ADJUST_PRIORITY
11020 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
11021 #undef TARGET_SCHED_ISSUE_RATE
11022 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
11023 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11024 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
11025
11026 #undef TARGET_SCHED_VARIABLE_ISSUE
11027 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
11028 #undef TARGET_SCHED_REORDER
11029 #define TARGET_SCHED_REORDER s390_sched_reorder
11030 #undef TARGET_SCHED_INIT
11031 #define TARGET_SCHED_INIT s390_sched_init
11032
11033 #undef TARGET_CANNOT_COPY_INSN_P
11034 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
11035 #undef TARGET_RTX_COSTS
11036 #define TARGET_RTX_COSTS s390_rtx_costs
11037 #undef TARGET_ADDRESS_COST
11038 #define TARGET_ADDRESS_COST s390_address_cost
11039 #undef TARGET_REGISTER_MOVE_COST
11040 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
11041 #undef TARGET_MEMORY_MOVE_COST
11042 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
11043
11044 #undef TARGET_MACHINE_DEPENDENT_REORG
11045 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
11046
11047 #undef TARGET_VALID_POINTER_MODE
11048 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
11049
11050 #undef TARGET_BUILD_BUILTIN_VA_LIST
11051 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
11052 #undef TARGET_EXPAND_BUILTIN_VA_START
11053 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
11054 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11055 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
11056
11057 #undef TARGET_PROMOTE_FUNCTION_MODE
11058 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
11059 #undef TARGET_PASS_BY_REFERENCE
11060 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
11061
11062 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11063 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
11064 #undef TARGET_FUNCTION_ARG
11065 #define TARGET_FUNCTION_ARG s390_function_arg
11066 #undef TARGET_FUNCTION_ARG_ADVANCE
11067 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
11068 #undef TARGET_FUNCTION_VALUE
11069 #define TARGET_FUNCTION_VALUE s390_function_value
11070 #undef TARGET_LIBCALL_VALUE
11071 #define TARGET_LIBCALL_VALUE s390_libcall_value
11072
11073 #undef TARGET_FIXED_CONDITION_CODE_REGS
11074 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
11075
11076 #undef TARGET_CC_MODES_COMPATIBLE
11077 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
11078
11079 #undef TARGET_INVALID_WITHIN_DOLOOP
11080 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
11081
11082 #ifdef HAVE_AS_TLS
11083 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
11084 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
11085 #endif
11086
11087 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11088 #undef TARGET_MANGLE_TYPE
11089 #define TARGET_MANGLE_TYPE s390_mangle_type
11090 #endif
11091
11092 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11093 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
11094
11095 #undef TARGET_PREFERRED_RELOAD_CLASS
11096 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
11097
11098 #undef TARGET_SECONDARY_RELOAD
11099 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
11100
11101 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11102 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
11103
11104 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
11105 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
11106
11107 #undef TARGET_LEGITIMATE_ADDRESS_P
11108 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
11109
11110 #undef TARGET_LEGITIMATE_CONSTANT_P
11111 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
11112
11113 #undef TARGET_CAN_ELIMINATE
11114 #define TARGET_CAN_ELIMINATE s390_can_eliminate
11115
11116 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11117 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
11118
11119 #undef TARGET_LOOP_UNROLL_ADJUST
11120 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
11121
11122 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11123 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
11124 #undef TARGET_TRAMPOLINE_INIT
11125 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
11126
11127 #undef TARGET_UNWIND_WORD_MODE
11128 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
11129
11130 #undef TARGET_CANONICALIZE_COMPARISON
11131 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
11132
11133 struct gcc_target targetm = TARGET_INITIALIZER;
11134
11135 #include "gt-s390.h"