aarch64.c, [...]: Move target-def.h includes to end.
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "print-tree.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "varasm.h"
36 #include "calls.h"
37 #include "tm_p.h"
38 #include "regs.h"
39 #include "hard-reg-set.h"
40 #include "insn-config.h"
41 #include "conditions.h"
42 #include "output.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "except.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expmed.h"
49 #include "dojump.h"
50 #include "explow.h"
51 #include "emit-rtl.h"
52 #include "stmt.h"
53 #include "expr.h"
54 #include "reload.h"
55 #include "diagnostic-core.h"
56 #include "predict.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfgrtl.h"
60 #include "cfganal.h"
61 #include "lcm.h"
62 #include "cfgbuild.h"
63 #include "cfgcleanup.h"
64 #include "basic-block.h"
65 #include "target.h"
66 #include "debug.h"
67 #include "langhooks.h"
68 #include "insn-codes.h"
69 #include "optabs.h"
70 #include "tree-ssa-alias.h"
71 #include "internal-fn.h"
72 #include "gimple-fold.h"
73 #include "tree-eh.h"
74 #include "gimple-expr.h"
75 #include "gimple.h"
76 #include "gimplify.h"
77 #include "df.h"
78 #include "params.h"
79 #include "cfgloop.h"
80 #include "opts.h"
81 #include "tree-pass.h"
82 #include "context.h"
83 #include "builtins.h"
84 #include "rtl-iter.h"
85 #include "intl.h"
86 #include "plugin-api.h"
87 #include "ipa-ref.h"
88 #include "cgraph.h"
89
90 #include "target-def.h"
91
92 /* Define the specific costs for a given cpu. */
93
94 struct processor_costs
95 {
96 /* multiplication */
97 const int m; /* cost of an M instruction. */
98 const int mghi; /* cost of an MGHI instruction. */
99 const int mh; /* cost of an MH instruction. */
100 const int mhi; /* cost of an MHI instruction. */
101 const int ml; /* cost of an ML instruction. */
102 const int mr; /* cost of an MR instruction. */
103 const int ms; /* cost of an MS instruction. */
104 const int msg; /* cost of an MSG instruction. */
105 const int msgf; /* cost of an MSGF instruction. */
106 const int msgfr; /* cost of an MSGFR instruction. */
107 const int msgr; /* cost of an MSGR instruction. */
108 const int msr; /* cost of an MSR instruction. */
109 const int mult_df; /* cost of multiplication in DFmode. */
110 const int mxbr;
111 /* square root */
112 const int sqxbr; /* cost of square root in TFmode. */
113 const int sqdbr; /* cost of square root in DFmode. */
114 const int sqebr; /* cost of square root in SFmode. */
115 /* multiply and add */
116 const int madbr; /* cost of multiply and add in DFmode. */
117 const int maebr; /* cost of multiply and add in SFmode. */
118 /* division */
119 const int dxbr;
120 const int ddbr;
121 const int debr;
122 const int dlgr;
123 const int dlr;
124 const int dr;
125 const int dsgfr;
126 const int dsgr;
127 };
128
129 const struct processor_costs *s390_cost;
130
131 static const
132 struct processor_costs z900_cost =
133 {
134 COSTS_N_INSNS (5), /* M */
135 COSTS_N_INSNS (10), /* MGHI */
136 COSTS_N_INSNS (5), /* MH */
137 COSTS_N_INSNS (4), /* MHI */
138 COSTS_N_INSNS (5), /* ML */
139 COSTS_N_INSNS (5), /* MR */
140 COSTS_N_INSNS (4), /* MS */
141 COSTS_N_INSNS (15), /* MSG */
142 COSTS_N_INSNS (7), /* MSGF */
143 COSTS_N_INSNS (7), /* MSGFR */
144 COSTS_N_INSNS (10), /* MSGR */
145 COSTS_N_INSNS (4), /* MSR */
146 COSTS_N_INSNS (7), /* multiplication in DFmode */
147 COSTS_N_INSNS (13), /* MXBR */
148 COSTS_N_INSNS (136), /* SQXBR */
149 COSTS_N_INSNS (44), /* SQDBR */
150 COSTS_N_INSNS (35), /* SQEBR */
151 COSTS_N_INSNS (18), /* MADBR */
152 COSTS_N_INSNS (13), /* MAEBR */
153 COSTS_N_INSNS (134), /* DXBR */
154 COSTS_N_INSNS (30), /* DDBR */
155 COSTS_N_INSNS (27), /* DEBR */
156 COSTS_N_INSNS (220), /* DLGR */
157 COSTS_N_INSNS (34), /* DLR */
158 COSTS_N_INSNS (34), /* DR */
159 COSTS_N_INSNS (32), /* DSGFR */
160 COSTS_N_INSNS (32), /* DSGR */
161 };
162
163 static const
164 struct processor_costs z990_cost =
165 {
166 COSTS_N_INSNS (4), /* M */
167 COSTS_N_INSNS (2), /* MGHI */
168 COSTS_N_INSNS (2), /* MH */
169 COSTS_N_INSNS (2), /* MHI */
170 COSTS_N_INSNS (4), /* ML */
171 COSTS_N_INSNS (4), /* MR */
172 COSTS_N_INSNS (5), /* MS */
173 COSTS_N_INSNS (6), /* MSG */
174 COSTS_N_INSNS (4), /* MSGF */
175 COSTS_N_INSNS (4), /* MSGFR */
176 COSTS_N_INSNS (4), /* MSGR */
177 COSTS_N_INSNS (4), /* MSR */
178 COSTS_N_INSNS (1), /* multiplication in DFmode */
179 COSTS_N_INSNS (28), /* MXBR */
180 COSTS_N_INSNS (130), /* SQXBR */
181 COSTS_N_INSNS (66), /* SQDBR */
182 COSTS_N_INSNS (38), /* SQEBR */
183 COSTS_N_INSNS (1), /* MADBR */
184 COSTS_N_INSNS (1), /* MAEBR */
185 COSTS_N_INSNS (60), /* DXBR */
186 COSTS_N_INSNS (40), /* DDBR */
187 COSTS_N_INSNS (26), /* DEBR */
188 COSTS_N_INSNS (176), /* DLGR */
189 COSTS_N_INSNS (31), /* DLR */
190 COSTS_N_INSNS (31), /* DR */
191 COSTS_N_INSNS (31), /* DSGFR */
192 COSTS_N_INSNS (31), /* DSGR */
193 };
194
195 static const
196 struct processor_costs z9_109_cost =
197 {
198 COSTS_N_INSNS (4), /* M */
199 COSTS_N_INSNS (2), /* MGHI */
200 COSTS_N_INSNS (2), /* MH */
201 COSTS_N_INSNS (2), /* MHI */
202 COSTS_N_INSNS (4), /* ML */
203 COSTS_N_INSNS (4), /* MR */
204 COSTS_N_INSNS (5), /* MS */
205 COSTS_N_INSNS (6), /* MSG */
206 COSTS_N_INSNS (4), /* MSGF */
207 COSTS_N_INSNS (4), /* MSGFR */
208 COSTS_N_INSNS (4), /* MSGR */
209 COSTS_N_INSNS (4), /* MSR */
210 COSTS_N_INSNS (1), /* multiplication in DFmode */
211 COSTS_N_INSNS (28), /* MXBR */
212 COSTS_N_INSNS (130), /* SQXBR */
213 COSTS_N_INSNS (66), /* SQDBR */
214 COSTS_N_INSNS (38), /* SQEBR */
215 COSTS_N_INSNS (1), /* MADBR */
216 COSTS_N_INSNS (1), /* MAEBR */
217 COSTS_N_INSNS (60), /* DXBR */
218 COSTS_N_INSNS (40), /* DDBR */
219 COSTS_N_INSNS (26), /* DEBR */
220 COSTS_N_INSNS (30), /* DLGR */
221 COSTS_N_INSNS (23), /* DLR */
222 COSTS_N_INSNS (23), /* DR */
223 COSTS_N_INSNS (24), /* DSGFR */
224 COSTS_N_INSNS (24), /* DSGR */
225 };
226
227 static const
228 struct processor_costs z10_cost =
229 {
230 COSTS_N_INSNS (10), /* M */
231 COSTS_N_INSNS (10), /* MGHI */
232 COSTS_N_INSNS (10), /* MH */
233 COSTS_N_INSNS (10), /* MHI */
234 COSTS_N_INSNS (10), /* ML */
235 COSTS_N_INSNS (10), /* MR */
236 COSTS_N_INSNS (10), /* MS */
237 COSTS_N_INSNS (10), /* MSG */
238 COSTS_N_INSNS (10), /* MSGF */
239 COSTS_N_INSNS (10), /* MSGFR */
240 COSTS_N_INSNS (10), /* MSGR */
241 COSTS_N_INSNS (10), /* MSR */
242 COSTS_N_INSNS (1) , /* multiplication in DFmode */
243 COSTS_N_INSNS (50), /* MXBR */
244 COSTS_N_INSNS (120), /* SQXBR */
245 COSTS_N_INSNS (52), /* SQDBR */
246 COSTS_N_INSNS (38), /* SQEBR */
247 COSTS_N_INSNS (1), /* MADBR */
248 COSTS_N_INSNS (1), /* MAEBR */
249 COSTS_N_INSNS (111), /* DXBR */
250 COSTS_N_INSNS (39), /* DDBR */
251 COSTS_N_INSNS (32), /* DEBR */
252 COSTS_N_INSNS (160), /* DLGR */
253 COSTS_N_INSNS (71), /* DLR */
254 COSTS_N_INSNS (71), /* DR */
255 COSTS_N_INSNS (71), /* DSGFR */
256 COSTS_N_INSNS (71), /* DSGR */
257 };
258
259 static const
260 struct processor_costs z196_cost =
261 {
262 COSTS_N_INSNS (7), /* M */
263 COSTS_N_INSNS (5), /* MGHI */
264 COSTS_N_INSNS (5), /* MH */
265 COSTS_N_INSNS (5), /* MHI */
266 COSTS_N_INSNS (7), /* ML */
267 COSTS_N_INSNS (7), /* MR */
268 COSTS_N_INSNS (6), /* MS */
269 COSTS_N_INSNS (8), /* MSG */
270 COSTS_N_INSNS (6), /* MSGF */
271 COSTS_N_INSNS (6), /* MSGFR */
272 COSTS_N_INSNS (8), /* MSGR */
273 COSTS_N_INSNS (6), /* MSR */
274 COSTS_N_INSNS (1) , /* multiplication in DFmode */
275 COSTS_N_INSNS (40), /* MXBR B+40 */
276 COSTS_N_INSNS (100), /* SQXBR B+100 */
277 COSTS_N_INSNS (42), /* SQDBR B+42 */
278 COSTS_N_INSNS (28), /* SQEBR B+28 */
279 COSTS_N_INSNS (1), /* MADBR B */
280 COSTS_N_INSNS (1), /* MAEBR B */
281 COSTS_N_INSNS (101), /* DXBR B+101 */
282 COSTS_N_INSNS (29), /* DDBR */
283 COSTS_N_INSNS (22), /* DEBR */
284 COSTS_N_INSNS (160), /* DLGR cracked */
285 COSTS_N_INSNS (160), /* DLR cracked */
286 COSTS_N_INSNS (160), /* DR expanded */
287 COSTS_N_INSNS (160), /* DSGFR cracked */
288 COSTS_N_INSNS (160), /* DSGR cracked */
289 };
290
291 static const
292 struct processor_costs zEC12_cost =
293 {
294 COSTS_N_INSNS (7), /* M */
295 COSTS_N_INSNS (5), /* MGHI */
296 COSTS_N_INSNS (5), /* MH */
297 COSTS_N_INSNS (5), /* MHI */
298 COSTS_N_INSNS (7), /* ML */
299 COSTS_N_INSNS (7), /* MR */
300 COSTS_N_INSNS (6), /* MS */
301 COSTS_N_INSNS (8), /* MSG */
302 COSTS_N_INSNS (6), /* MSGF */
303 COSTS_N_INSNS (6), /* MSGFR */
304 COSTS_N_INSNS (8), /* MSGR */
305 COSTS_N_INSNS (6), /* MSR */
306 COSTS_N_INSNS (1) , /* multiplication in DFmode */
307 COSTS_N_INSNS (40), /* MXBR B+40 */
308 COSTS_N_INSNS (100), /* SQXBR B+100 */
309 COSTS_N_INSNS (42), /* SQDBR B+42 */
310 COSTS_N_INSNS (28), /* SQEBR B+28 */
311 COSTS_N_INSNS (1), /* MADBR B */
312 COSTS_N_INSNS (1), /* MAEBR B */
313 COSTS_N_INSNS (131), /* DXBR B+131 */
314 COSTS_N_INSNS (29), /* DDBR */
315 COSTS_N_INSNS (22), /* DEBR */
316 COSTS_N_INSNS (160), /* DLGR cracked */
317 COSTS_N_INSNS (160), /* DLR cracked */
318 COSTS_N_INSNS (160), /* DR expanded */
319 COSTS_N_INSNS (160), /* DSGFR cracked */
320 COSTS_N_INSNS (160), /* DSGR cracked */
321 };
322
323 extern int reload_completed;
324
325 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
326 static rtx_insn *last_scheduled_insn;
327
328 /* Structure used to hold the components of a S/390 memory
329 address. A legitimate address on S/390 is of the general
330 form
331 base + index + displacement
332 where any of the components is optional.
333
334 base and index are registers of the class ADDR_REGS,
335 displacement is an unsigned 12-bit immediate constant. */
336
337 struct s390_address
338 {
339 rtx base;
340 rtx indx;
341 rtx disp;
342 bool pointer;
343 bool literal_pool;
344 };
345
346 /* The following structure is embedded in the machine
347 specific part of struct function. */
348
349 struct GTY (()) s390_frame_layout
350 {
351 /* Offset within stack frame. */
352 HOST_WIDE_INT gprs_offset;
353 HOST_WIDE_INT f0_offset;
354 HOST_WIDE_INT f4_offset;
355 HOST_WIDE_INT f8_offset;
356 HOST_WIDE_INT backchain_offset;
357
358 /* Number of first and last gpr where slots in the register
359 save area are reserved for. */
360 int first_save_gpr_slot;
361 int last_save_gpr_slot;
362
363 /* Location (FP register number) where GPRs (r0-r15) should
364 be saved to.
365 0 - does not need to be saved at all
366 -1 - stack slot */
367 signed char gpr_save_slots[16];
368
369 /* Number of first and last gpr to be saved, restored. */
370 int first_save_gpr;
371 int first_restore_gpr;
372 int last_save_gpr;
373 int last_restore_gpr;
374
375 /* Bits standing for floating point registers. Set, if the
376 respective register has to be saved. Starting with reg 16 (f0)
377 at the rightmost bit.
378 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
379 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
380 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
381 unsigned int fpr_bitmap;
382
383 /* Number of floating point registers f8-f15 which must be saved. */
384 int high_fprs;
385
386 /* Set if return address needs to be saved.
387 This flag is set by s390_return_addr_rtx if it could not use
388 the initial value of r14 and therefore depends on r14 saved
389 to the stack. */
390 bool save_return_addr_p;
391
392 /* Size of stack frame. */
393 HOST_WIDE_INT frame_size;
394 };
395
396 /* Define the structure for the machine field in struct function. */
397
398 struct GTY(()) machine_function
399 {
400 struct s390_frame_layout frame_layout;
401
402 /* Literal pool base register. */
403 rtx base_reg;
404
405 /* True if we may need to perform branch splitting. */
406 bool split_branches_pending_p;
407
408 bool has_landing_pad_p;
409
410 /* True if the current function may contain a tbegin clobbering
411 FPRs. */
412 bool tbegin_p;
413 };
414
415 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
416
417 #define cfun_frame_layout (cfun->machine->frame_layout)
418 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
419 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
420 ? cfun_frame_layout.fpr_bitmap & 0x0f \
421 : cfun_frame_layout.fpr_bitmap & 0x03))
422 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
423 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
424 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
425 (1 << (REGNO - FPR0_REGNUM)))
426 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
427 (1 << (REGNO - FPR0_REGNUM))))
428 #define cfun_gpr_save_slot(REGNO) \
429 cfun->machine->frame_layout.gpr_save_slots[REGNO]
430
431 /* Number of GPRs and FPRs used for argument passing. */
432 #define GP_ARG_NUM_REG 5
433 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
434 #define VEC_ARG_NUM_REG 8
435
436 /* A couple of shortcuts. */
437 #define CONST_OK_FOR_J(x) \
438 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
439 #define CONST_OK_FOR_K(x) \
440 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
441 #define CONST_OK_FOR_Os(x) \
442 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
443 #define CONST_OK_FOR_Op(x) \
444 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
445 #define CONST_OK_FOR_On(x) \
446 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
447
448 #define REGNO_PAIR_OK(REGNO, MODE) \
449 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
450
451 /* That's the read ahead of the dynamic branch prediction unit in
452 bytes on a z10 (or higher) CPU. */
453 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
454
455
456 /* Indicate which ABI has been used for passing vector args.
457 0 - no vector type arguments have been passed where the ABI is relevant
458 1 - the old ABI has been used
459 2 - a vector type argument has been passed either in a vector register
460 or on the stack by value */
461 static int s390_vector_abi = 0;
462
463 /* Set the vector ABI marker if TYPE is subject to the vector ABI
464 switch. The vector ABI affects only vector data types. There are
465 two aspects of the vector ABI relevant here:
466
467 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
468 ABI and natural alignment with the old.
469
470 2. vector <= 16 bytes are passed in VRs or by value on the stack
471 with the new ABI but by reference on the stack with the old.
472
473 If ARG_P is true TYPE is used for a function argument or return
474 value. The ABI marker then is set for all vector data types. If
475 ARG_P is false only type 1 vectors are being checked. */
476
477 static void
478 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
479 {
480 static hash_set<const_tree> visited_types_hash;
481
482 if (s390_vector_abi)
483 return;
484
485 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
486 return;
487
488 if (visited_types_hash.contains (type))
489 return;
490
491 visited_types_hash.add (type);
492
493 if (VECTOR_TYPE_P (type))
494 {
495 int type_size = int_size_in_bytes (type);
496
497 /* Outside arguments only the alignment is changing and this
498 only happens for vector types >= 16 bytes. */
499 if (!arg_p && type_size < 16)
500 return;
501
502 /* In arguments vector types > 16 are passed as before (GCC
503 never enforced the bigger alignment for arguments which was
504 required by the old vector ABI). However, it might still be
505 ABI relevant due to the changed alignment if it is a struct
506 member. */
507 if (arg_p && type_size > 16 && !in_struct_p)
508 return;
509
510 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
511 }
512 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
513 {
514 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
515 natural alignment there will never be ABI dependent padding
516 in an array type. That's why we do not set in_struct_p to
517 true here. */
518 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
519 }
520 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
521 {
522 tree arg_chain;
523
524 /* Check the return type. */
525 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
526
527 for (arg_chain = TYPE_ARG_TYPES (type);
528 arg_chain;
529 arg_chain = TREE_CHAIN (arg_chain))
530 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
531 }
532 else if (RECORD_OR_UNION_TYPE_P (type))
533 {
534 tree field;
535
536 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
537 {
538 if (TREE_CODE (field) != FIELD_DECL)
539 continue;
540
541 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
542 }
543 }
544 }
545
546
547 /* System z builtins. */
548
549 #include "s390-builtins.h"
550
551 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
552 {
553 #undef B_DEF
554 #undef OB_DEF
555 #undef OB_DEF_VAR
556 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
557 #define OB_DEF(...)
558 #define OB_DEF_VAR(...)
559 #include "s390-builtins.def"
560 0
561 };
562
563 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
564 {
565 #undef B_DEF
566 #undef OB_DEF
567 #undef OB_DEF_VAR
568 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
569 #define OB_DEF(...)
570 #define OB_DEF_VAR(...)
571 #include "s390-builtins.def"
572 0
573 };
574
575 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
576 {
577 #undef B_DEF
578 #undef OB_DEF
579 #undef OB_DEF_VAR
580 #define B_DEF(...)
581 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
582 #define OB_DEF_VAR(...)
583 #include "s390-builtins.def"
584 0
585 };
586
587 const unsigned int
588 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
589 {
590 #undef B_DEF
591 #undef OB_DEF
592 #undef OB_DEF_VAR
593 #define B_DEF(...)
594 #define OB_DEF(...)
595 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
596 #include "s390-builtins.def"
597 0
598 };
599
600 tree s390_builtin_types[BT_MAX];
601 tree s390_builtin_fn_types[BT_FN_MAX];
602 tree s390_builtin_decls[S390_BUILTIN_MAX +
603 S390_OVERLOADED_BUILTIN_MAX +
604 S390_OVERLOADED_BUILTIN_VAR_MAX];
605
606 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
607 #undef B_DEF
608 #undef OB_DEF
609 #undef OB_DEF_VAR
610 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
611 #define OB_DEF(...)
612 #define OB_DEF_VAR(...)
613
614 #include "s390-builtins.def"
615 CODE_FOR_nothing
616 };
617
618 static void
619 s390_init_builtins (void)
620 {
621 /* These definitions are being used in s390-builtins.def. */
622 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
623 NULL, NULL);
624 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
625 tree c_uint64_type_node;
626 unsigned int bflags_mask = (BFLAGS_MASK_INIT);
627
628 bflags_mask |= (TARGET_VX) ? B_VX : 0;
629 bflags_mask |= (TARGET_HTM) ? B_HTM : 0;
630
631 /* The uint64_type_node from tree.c is not compatible to the C99
632 uint64_t data type. What we want is c_uint64_type_node from
633 c-common.c. But since backend code is not supposed to interface
634 with the frontend we recreate it here. */
635 if (TARGET_64BIT)
636 c_uint64_type_node = long_unsigned_type_node;
637 else
638 c_uint64_type_node = long_long_unsigned_type_node;
639
640 #undef DEF_TYPE
641 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
642 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
643 s390_builtin_types[INDEX] = (!CONST_P) ? \
644 (NODE) : build_type_variant ((NODE), 1, 0);
645
646 #undef DEF_POINTER_TYPE
647 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
648 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
649 s390_builtin_types[INDEX] = \
650 build_pointer_type (s390_builtin_types[INDEX_BASE]);
651
652 #undef DEF_DISTINCT_TYPE
653 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
654 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
655 s390_builtin_types[INDEX] = \
656 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
657
658 #undef DEF_VECTOR_TYPE
659 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
660 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
661 s390_builtin_types[INDEX] = \
662 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
663
664 #undef DEF_OPAQUE_VECTOR_TYPE
665 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
666 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
667 s390_builtin_types[INDEX] = \
668 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
669
670 #undef DEF_FN_TYPE
671 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
672 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
673 s390_builtin_fn_types[INDEX] = \
674 build_function_type_list (args, NULL_TREE);
675 #undef DEF_OV_TYPE
676 #define DEF_OV_TYPE(...)
677 #include "s390-builtin-types.def"
678
679 #undef B_DEF
680 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
681 if (((BFLAGS) & ~bflags_mask) == 0) \
682 s390_builtin_decls[S390_BUILTIN_##NAME] = \
683 add_builtin_function ("__builtin_" #NAME, \
684 s390_builtin_fn_types[FNTYPE], \
685 S390_BUILTIN_##NAME, \
686 BUILT_IN_MD, \
687 NULL, \
688 ATTRS);
689 #undef OB_DEF
690 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
691 if (((BFLAGS) & ~bflags_mask) == 0) \
692 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
693 add_builtin_function ("__builtin_" #NAME, \
694 s390_builtin_fn_types[FNTYPE], \
695 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
696 BUILT_IN_MD, \
697 NULL, \
698 0);
699 #undef OB_DEF_VAR
700 #define OB_DEF_VAR(...)
701 #include "s390-builtins.def"
702
703 }
704
705 /* Return true if ARG is appropriate as argument number ARGNUM of
706 builtin DECL. The operand flags from s390-builtins.def have to
707 passed as OP_FLAGS. */
708 bool
709 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
710 {
711 if (O_UIMM_P (op_flags))
712 {
713 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
714 int bitwidth = bitwidths[op_flags - O_U1];
715
716 if (!tree_fits_uhwi_p (arg)
717 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
718 {
719 error("constant argument %d for builtin %qF is out of range (0.."
720 HOST_WIDE_INT_PRINT_UNSIGNED ")",
721 argnum, decl,
722 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
723 return false;
724 }
725 }
726
727 if (O_SIMM_P (op_flags))
728 {
729 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
730 int bitwidth = bitwidths[op_flags - O_S2];
731
732 if (!tree_fits_shwi_p (arg)
733 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
734 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
735 {
736 error("constant argument %d for builtin %qF is out of range ("
737 HOST_WIDE_INT_PRINT_DEC ".."
738 HOST_WIDE_INT_PRINT_DEC ")",
739 argnum, decl,
740 -(HOST_WIDE_INT)1 << (bitwidth - 1),
741 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
742 return false;
743 }
744 }
745 return true;
746 }
747
748 /* Expand an expression EXP that calls a built-in function,
749 with result going to TARGET if that's convenient
750 (and in mode MODE if that's convenient).
751 SUBTARGET may be used as the target for computing one of EXP's operands.
752 IGNORE is nonzero if the value is to be ignored. */
753
754 static rtx
755 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
756 machine_mode mode ATTRIBUTE_UNUSED,
757 int ignore ATTRIBUTE_UNUSED)
758 {
759 #define MAX_ARGS 5
760
761 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
762 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
763 enum insn_code icode;
764 rtx op[MAX_ARGS], pat;
765 int arity;
766 bool nonvoid;
767 tree arg;
768 call_expr_arg_iterator iter;
769 unsigned int all_op_flags = opflags_for_builtin (fcode);
770 machine_mode last_vec_mode = VOIDmode;
771
772 if (TARGET_DEBUG_ARG)
773 {
774 fprintf (stderr,
775 "s390_expand_builtin, code = %4d, %s\n",
776 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
777 }
778
779 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
780 && fcode < S390_ALL_BUILTIN_MAX)
781 {
782 gcc_unreachable ();
783 }
784 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
785 {
786 icode = code_for_builtin[fcode];
787 /* Set a flag in the machine specific cfun part in order to support
788 saving/restoring of FPRs. */
789 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
790 cfun->machine->tbegin_p = true;
791 }
792 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
793 {
794 error ("Unresolved overloaded builtin");
795 return const0_rtx;
796 }
797 else
798 internal_error ("bad builtin fcode");
799
800 if (icode == 0)
801 internal_error ("bad builtin icode");
802
803 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
804
805 if (nonvoid)
806 {
807 machine_mode tmode = insn_data[icode].operand[0].mode;
808 if (!target
809 || GET_MODE (target) != tmode
810 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
811 target = gen_reg_rtx (tmode);
812
813 /* There are builtins (e.g. vec_promote) with no vector
814 arguments but an element selector. So we have to also look
815 at the vector return type when emitting the modulo
816 operation. */
817 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
818 last_vec_mode = insn_data[icode].operand[0].mode;
819 }
820
821 arity = 0;
822 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
823 {
824 const struct insn_operand_data *insn_op;
825 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
826
827 all_op_flags = all_op_flags >> O_SHIFT;
828
829 if (arg == error_mark_node)
830 return NULL_RTX;
831 if (arity >= MAX_ARGS)
832 return NULL_RTX;
833
834 if (O_IMM_P (op_flags)
835 && TREE_CODE (arg) != INTEGER_CST)
836 {
837 error ("constant value required for builtin %qF argument %d",
838 fndecl, arity + 1);
839 return const0_rtx;
840 }
841
842 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
843 return const0_rtx;
844
845 insn_op = &insn_data[icode].operand[arity + nonvoid];
846 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
847
848 /* Wrap the expanded RTX for pointer types into a MEM expr with
849 the proper mode. This allows us to use e.g. (match_operand
850 "memory_operand"..) in the insn patterns instead of (mem
851 (match_operand "address_operand)). This is helpful for
852 patterns not just accepting MEMs. */
853 if (POINTER_TYPE_P (TREE_TYPE (arg))
854 && insn_op->predicate != address_operand)
855 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
856
857 /* Expand the module operation required on element selectors. */
858 if (op_flags == O_ELEM)
859 {
860 gcc_assert (last_vec_mode != VOIDmode);
861 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
862 op[arity],
863 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
864 NULL_RTX, 1, OPTAB_DIRECT);
865 }
866
867 /* Record the vector mode used for an element selector. This assumes:
868 1. There is no builtin with two different vector modes and an element selector
869 2. The element selector comes after the vector type it is referring to.
870 This currently the true for all the builtins but FIXME we
871 should better check for that. */
872 if (VECTOR_MODE_P (insn_op->mode))
873 last_vec_mode = insn_op->mode;
874
875 if (insn_op->predicate (op[arity], insn_op->mode))
876 {
877 arity++;
878 continue;
879 }
880
881 if (MEM_P (op[arity])
882 && insn_op->predicate == memory_operand
883 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
884 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
885 {
886 op[arity] = replace_equiv_address (op[arity],
887 copy_to_mode_reg (Pmode,
888 XEXP (op[arity], 0)));
889 }
890 else if (GET_MODE (op[arity]) == insn_op->mode
891 || GET_MODE (op[arity]) == VOIDmode
892 || (insn_op->predicate == address_operand
893 && GET_MODE (op[arity]) == Pmode))
894 {
895 /* An address_operand usually has VOIDmode in the expander
896 so we cannot use this. */
897 machine_mode target_mode =
898 (insn_op->predicate == address_operand
899 ? Pmode : insn_op->mode);
900 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
901 }
902
903 if (!insn_op->predicate (op[arity], insn_op->mode))
904 {
905 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
906 return const0_rtx;
907 }
908 arity++;
909 }
910
911 if (last_vec_mode != VOIDmode && !TARGET_VX)
912 {
913 error ("Vector type builtin %qF is not supported without -mvx "
914 "(default with -march=z13).",
915 fndecl);
916 return const0_rtx;
917 }
918
919 switch (arity)
920 {
921 case 0:
922 pat = GEN_FCN (icode) (target);
923 break;
924 case 1:
925 if (nonvoid)
926 pat = GEN_FCN (icode) (target, op[0]);
927 else
928 pat = GEN_FCN (icode) (op[0]);
929 break;
930 case 2:
931 if (nonvoid)
932 pat = GEN_FCN (icode) (target, op[0], op[1]);
933 else
934 pat = GEN_FCN (icode) (op[0], op[1]);
935 break;
936 case 3:
937 if (nonvoid)
938 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
939 else
940 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
941 break;
942 case 4:
943 if (nonvoid)
944 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
945 else
946 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
947 break;
948 case 5:
949 if (nonvoid)
950 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
951 else
952 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
953 break;
954 case 6:
955 if (nonvoid)
956 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
957 else
958 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
959 break;
960 default:
961 gcc_unreachable ();
962 }
963 if (!pat)
964 return NULL_RTX;
965 emit_insn (pat);
966
967 if (nonvoid)
968 return target;
969 else
970 return const0_rtx;
971 }
972
973
974 static const int s390_hotpatch_hw_max = 1000000;
975 static int s390_hotpatch_hw_before_label = 0;
976 static int s390_hotpatch_hw_after_label = 0;
977
978 /* Check whether the hotpatch attribute is applied to a function and, if it has
979 an argument, the argument is valid. */
980
981 static tree
982 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
983 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
984 {
985 tree expr;
986 tree expr2;
987 int err;
988
989 if (TREE_CODE (*node) != FUNCTION_DECL)
990 {
991 warning (OPT_Wattributes, "%qE attribute only applies to functions",
992 name);
993 *no_add_attrs = true;
994 }
995 if (args != NULL && TREE_CHAIN (args) != NULL)
996 {
997 expr = TREE_VALUE (args);
998 expr2 = TREE_VALUE (TREE_CHAIN (args));
999 }
1000 if (args == NULL || TREE_CHAIN (args) == NULL)
1001 err = 1;
1002 else if (TREE_CODE (expr) != INTEGER_CST
1003 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1004 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1005 err = 1;
1006 else if (TREE_CODE (expr2) != INTEGER_CST
1007 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1008 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1009 err = 1;
1010 else
1011 err = 0;
1012 if (err)
1013 {
1014 error ("requested %qE attribute is not a comma separated pair of"
1015 " non-negative integer constants or too large (max. %d)", name,
1016 s390_hotpatch_hw_max);
1017 *no_add_attrs = true;
1018 }
1019
1020 return NULL_TREE;
1021 }
1022
1023 /* Expand the s390_vector_bool type attribute. */
1024
1025 static tree
1026 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1027 tree args ATTRIBUTE_UNUSED,
1028 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1029 {
1030 tree type = *node, result = NULL_TREE;
1031 machine_mode mode;
1032
1033 while (POINTER_TYPE_P (type)
1034 || TREE_CODE (type) == FUNCTION_TYPE
1035 || TREE_CODE (type) == METHOD_TYPE
1036 || TREE_CODE (type) == ARRAY_TYPE)
1037 type = TREE_TYPE (type);
1038
1039 mode = TYPE_MODE (type);
1040 switch (mode)
1041 {
1042 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1043 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1044 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1045 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1046 default: break;
1047 }
1048
1049 *no_add_attrs = true; /* No need to hang on to the attribute. */
1050
1051 if (result)
1052 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1053
1054 return NULL_TREE;
1055 }
1056
1057 static const struct attribute_spec s390_attribute_table[] = {
1058 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1059 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1060 /* End element. */
1061 { NULL, 0, 0, false, false, false, NULL, false }
1062 };
1063
1064 /* Return the alignment for LABEL. We default to the -falign-labels
1065 value except for the literal pool base label. */
1066 int
1067 s390_label_align (rtx label)
1068 {
1069 rtx_insn *prev_insn = prev_active_insn (label);
1070 rtx set, src;
1071
1072 if (prev_insn == NULL_RTX)
1073 goto old;
1074
1075 set = single_set (prev_insn);
1076
1077 if (set == NULL_RTX)
1078 goto old;
1079
1080 src = SET_SRC (set);
1081
1082 /* Don't align literal pool base labels. */
1083 if (GET_CODE (src) == UNSPEC
1084 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1085 return 0;
1086
1087 old:
1088 return align_labels_log;
1089 }
1090
1091 static machine_mode
1092 s390_libgcc_cmp_return_mode (void)
1093 {
1094 return TARGET_64BIT ? DImode : SImode;
1095 }
1096
1097 static machine_mode
1098 s390_libgcc_shift_count_mode (void)
1099 {
1100 return TARGET_64BIT ? DImode : SImode;
1101 }
1102
1103 static machine_mode
1104 s390_unwind_word_mode (void)
1105 {
1106 return TARGET_64BIT ? DImode : SImode;
1107 }
1108
1109 /* Return true if the back end supports mode MODE. */
1110 static bool
1111 s390_scalar_mode_supported_p (machine_mode mode)
1112 {
1113 /* In contrast to the default implementation reject TImode constants on 31bit
1114 TARGET_ZARCH for ABI compliance. */
1115 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1116 return false;
1117
1118 if (DECIMAL_FLOAT_MODE_P (mode))
1119 return default_decimal_float_supported_p ();
1120
1121 return default_scalar_mode_supported_p (mode);
1122 }
1123
1124 /* Return true if the back end supports vector mode MODE. */
1125 static bool
1126 s390_vector_mode_supported_p (machine_mode mode)
1127 {
1128 machine_mode inner;
1129
1130 if (!VECTOR_MODE_P (mode)
1131 || !TARGET_VX
1132 || GET_MODE_SIZE (mode) > 16)
1133 return false;
1134
1135 inner = GET_MODE_INNER (mode);
1136
1137 switch (inner)
1138 {
1139 case QImode:
1140 case HImode:
1141 case SImode:
1142 case DImode:
1143 case TImode:
1144 case SFmode:
1145 case DFmode:
1146 case TFmode:
1147 return true;
1148 default:
1149 return false;
1150 }
1151 }
1152
1153 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1154
1155 void
1156 s390_set_has_landing_pad_p (bool value)
1157 {
1158 cfun->machine->has_landing_pad_p = value;
1159 }
1160
1161 /* If two condition code modes are compatible, return a condition code
1162 mode which is compatible with both. Otherwise, return
1163 VOIDmode. */
1164
1165 static machine_mode
1166 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1167 {
1168 if (m1 == m2)
1169 return m1;
1170
1171 switch (m1)
1172 {
1173 case CCZmode:
1174 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1175 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1176 return m2;
1177 return VOIDmode;
1178
1179 case CCSmode:
1180 case CCUmode:
1181 case CCTmode:
1182 case CCSRmode:
1183 case CCURmode:
1184 case CCZ1mode:
1185 if (m2 == CCZmode)
1186 return m1;
1187
1188 return VOIDmode;
1189
1190 default:
1191 return VOIDmode;
1192 }
1193 return VOIDmode;
1194 }
1195
1196 /* Return true if SET either doesn't set the CC register, or else
1197 the source and destination have matching CC modes and that
1198 CC mode is at least as constrained as REQ_MODE. */
1199
1200 static bool
1201 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1202 {
1203 machine_mode set_mode;
1204
1205 gcc_assert (GET_CODE (set) == SET);
1206
1207 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1208 return 1;
1209
1210 set_mode = GET_MODE (SET_DEST (set));
1211 switch (set_mode)
1212 {
1213 case CCSmode:
1214 case CCSRmode:
1215 case CCUmode:
1216 case CCURmode:
1217 case CCLmode:
1218 case CCL1mode:
1219 case CCL2mode:
1220 case CCL3mode:
1221 case CCT1mode:
1222 case CCT2mode:
1223 case CCT3mode:
1224 case CCVEQmode:
1225 case CCVHmode:
1226 case CCVHUmode:
1227 case CCVFHmode:
1228 case CCVFHEmode:
1229 if (req_mode != set_mode)
1230 return 0;
1231 break;
1232
1233 case CCZmode:
1234 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1235 && req_mode != CCSRmode && req_mode != CCURmode)
1236 return 0;
1237 break;
1238
1239 case CCAPmode:
1240 case CCANmode:
1241 if (req_mode != CCAmode)
1242 return 0;
1243 break;
1244
1245 default:
1246 gcc_unreachable ();
1247 }
1248
1249 return (GET_MODE (SET_SRC (set)) == set_mode);
1250 }
1251
1252 /* Return true if every SET in INSN that sets the CC register
1253 has source and destination with matching CC modes and that
1254 CC mode is at least as constrained as REQ_MODE.
1255 If REQ_MODE is VOIDmode, always return false. */
1256
1257 bool
1258 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1259 {
1260 int i;
1261
1262 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1263 if (req_mode == VOIDmode)
1264 return false;
1265
1266 if (GET_CODE (PATTERN (insn)) == SET)
1267 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1268
1269 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1270 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1271 {
1272 rtx set = XVECEXP (PATTERN (insn), 0, i);
1273 if (GET_CODE (set) == SET)
1274 if (!s390_match_ccmode_set (set, req_mode))
1275 return false;
1276 }
1277
1278 return true;
1279 }
1280
1281 /* If a test-under-mask instruction can be used to implement
1282 (compare (and ... OP1) OP2), return the CC mode required
1283 to do that. Otherwise, return VOIDmode.
1284 MIXED is true if the instruction can distinguish between
1285 CC1 and CC2 for mixed selected bits (TMxx), it is false
1286 if the instruction cannot (TM). */
1287
1288 machine_mode
1289 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1290 {
1291 int bit0, bit1;
1292
1293 /* ??? Fixme: should work on CONST_DOUBLE as well. */
1294 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1295 return VOIDmode;
1296
1297 /* Selected bits all zero: CC0.
1298 e.g.: int a; if ((a & (16 + 128)) == 0) */
1299 if (INTVAL (op2) == 0)
1300 return CCTmode;
1301
1302 /* Selected bits all one: CC3.
1303 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1304 if (INTVAL (op2) == INTVAL (op1))
1305 return CCT3mode;
1306
1307 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1308 int a;
1309 if ((a & (16 + 128)) == 16) -> CCT1
1310 if ((a & (16 + 128)) == 128) -> CCT2 */
1311 if (mixed)
1312 {
1313 bit1 = exact_log2 (INTVAL (op2));
1314 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1315 if (bit0 != -1 && bit1 != -1)
1316 return bit0 > bit1 ? CCT1mode : CCT2mode;
1317 }
1318
1319 return VOIDmode;
1320 }
1321
1322 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1323 OP0 and OP1 of a COMPARE, return the mode to be used for the
1324 comparison. */
1325
1326 machine_mode
1327 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1328 {
1329 if (TARGET_VX
1330 && register_operand (op0, DFmode)
1331 && register_operand (op1, DFmode))
1332 {
1333 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1334 s390_emit_compare or s390_canonicalize_comparison will take
1335 care of it. */
1336 switch (code)
1337 {
1338 case EQ:
1339 case NE:
1340 return CCVEQmode;
1341 case GT:
1342 case UNLE:
1343 return CCVFHmode;
1344 case GE:
1345 case UNLT:
1346 return CCVFHEmode;
1347 default:
1348 ;
1349 }
1350 }
1351
1352 switch (code)
1353 {
1354 case EQ:
1355 case NE:
1356 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1357 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1358 return CCAPmode;
1359 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1360 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1361 return CCAPmode;
1362 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1363 || GET_CODE (op1) == NEG)
1364 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1365 return CCLmode;
1366
1367 if (GET_CODE (op0) == AND)
1368 {
1369 /* Check whether we can potentially do it via TM. */
1370 machine_mode ccmode;
1371 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1372 if (ccmode != VOIDmode)
1373 {
1374 /* Relax CCTmode to CCZmode to allow fall-back to AND
1375 if that turns out to be beneficial. */
1376 return ccmode == CCTmode ? CCZmode : ccmode;
1377 }
1378 }
1379
1380 if (register_operand (op0, HImode)
1381 && GET_CODE (op1) == CONST_INT
1382 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1383 return CCT3mode;
1384 if (register_operand (op0, QImode)
1385 && GET_CODE (op1) == CONST_INT
1386 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1387 return CCT3mode;
1388
1389 return CCZmode;
1390
1391 case LE:
1392 case LT:
1393 case GE:
1394 case GT:
1395 /* The only overflow condition of NEG and ABS happens when
1396 -INT_MAX is used as parameter, which stays negative. So
1397 we have an overflow from a positive value to a negative.
1398 Using CCAP mode the resulting cc can be used for comparisons. */
1399 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1400 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1401 return CCAPmode;
1402
1403 /* If constants are involved in an add instruction it is possible to use
1404 the resulting cc for comparisons with zero. Knowing the sign of the
1405 constant the overflow behavior gets predictable. e.g.:
1406 int a, b; if ((b = a + c) > 0)
1407 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1408 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1409 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1410 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1411 /* Avoid INT32_MIN on 32 bit. */
1412 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1413 {
1414 if (INTVAL (XEXP((op0), 1)) < 0)
1415 return CCANmode;
1416 else
1417 return CCAPmode;
1418 }
1419 /* Fall through. */
1420 case UNORDERED:
1421 case ORDERED:
1422 case UNEQ:
1423 case UNLE:
1424 case UNLT:
1425 case UNGE:
1426 case UNGT:
1427 case LTGT:
1428 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1429 && GET_CODE (op1) != CONST_INT)
1430 return CCSRmode;
1431 return CCSmode;
1432
1433 case LTU:
1434 case GEU:
1435 if (GET_CODE (op0) == PLUS
1436 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1437 return CCL1mode;
1438
1439 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1440 && GET_CODE (op1) != CONST_INT)
1441 return CCURmode;
1442 return CCUmode;
1443
1444 case LEU:
1445 case GTU:
1446 if (GET_CODE (op0) == MINUS
1447 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1448 return CCL2mode;
1449
1450 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1451 && GET_CODE (op1) != CONST_INT)
1452 return CCURmode;
1453 return CCUmode;
1454
1455 default:
1456 gcc_unreachable ();
1457 }
1458 }
1459
1460 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1461 that we can implement more efficiently. */
1462
1463 static void
1464 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1465 bool op0_preserve_value)
1466 {
1467 if (op0_preserve_value)
1468 return;
1469
1470 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1471 if ((*code == EQ || *code == NE)
1472 && *op1 == const0_rtx
1473 && GET_CODE (*op0) == ZERO_EXTRACT
1474 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1475 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1476 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1477 {
1478 rtx inner = XEXP (*op0, 0);
1479 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1480 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1481 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1482
1483 if (len > 0 && len < modesize
1484 && pos >= 0 && pos + len <= modesize
1485 && modesize <= HOST_BITS_PER_WIDE_INT)
1486 {
1487 unsigned HOST_WIDE_INT block;
1488 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1489 block <<= modesize - pos - len;
1490
1491 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1492 gen_int_mode (block, GET_MODE (inner)));
1493 }
1494 }
1495
1496 /* Narrow AND of memory against immediate to enable TM. */
1497 if ((*code == EQ || *code == NE)
1498 && *op1 == const0_rtx
1499 && GET_CODE (*op0) == AND
1500 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1501 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1502 {
1503 rtx inner = XEXP (*op0, 0);
1504 rtx mask = XEXP (*op0, 1);
1505
1506 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1507 if (GET_CODE (inner) == SUBREG
1508 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1509 && (GET_MODE_SIZE (GET_MODE (inner))
1510 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1511 && ((INTVAL (mask)
1512 & GET_MODE_MASK (GET_MODE (inner))
1513 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1514 == 0))
1515 inner = SUBREG_REG (inner);
1516
1517 /* Do not change volatile MEMs. */
1518 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1519 {
1520 int part = s390_single_part (XEXP (*op0, 1),
1521 GET_MODE (inner), QImode, 0);
1522 if (part >= 0)
1523 {
1524 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1525 inner = adjust_address_nv (inner, QImode, part);
1526 *op0 = gen_rtx_AND (QImode, inner, mask);
1527 }
1528 }
1529 }
1530
1531 /* Narrow comparisons against 0xffff to HImode if possible. */
1532 if ((*code == EQ || *code == NE)
1533 && GET_CODE (*op1) == CONST_INT
1534 && INTVAL (*op1) == 0xffff
1535 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1536 && (nonzero_bits (*op0, GET_MODE (*op0))
1537 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1538 {
1539 *op0 = gen_lowpart (HImode, *op0);
1540 *op1 = constm1_rtx;
1541 }
1542
1543 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1544 if (GET_CODE (*op0) == UNSPEC
1545 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1546 && XVECLEN (*op0, 0) == 1
1547 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1548 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1549 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1550 && *op1 == const0_rtx)
1551 {
1552 enum rtx_code new_code = UNKNOWN;
1553 switch (*code)
1554 {
1555 case EQ: new_code = EQ; break;
1556 case NE: new_code = NE; break;
1557 case LT: new_code = GTU; break;
1558 case GT: new_code = LTU; break;
1559 case LE: new_code = GEU; break;
1560 case GE: new_code = LEU; break;
1561 default: break;
1562 }
1563
1564 if (new_code != UNKNOWN)
1565 {
1566 *op0 = XVECEXP (*op0, 0, 0);
1567 *code = new_code;
1568 }
1569 }
1570
1571 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1572 if (GET_CODE (*op0) == UNSPEC
1573 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1574 && XVECLEN (*op0, 0) == 1
1575 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1576 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1577 && CONST_INT_P (*op1))
1578 {
1579 enum rtx_code new_code = UNKNOWN;
1580 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1581 {
1582 case CCZmode:
1583 case CCRAWmode:
1584 switch (*code)
1585 {
1586 case EQ: new_code = EQ; break;
1587 case NE: new_code = NE; break;
1588 default: break;
1589 }
1590 break;
1591 default: break;
1592 }
1593
1594 if (new_code != UNKNOWN)
1595 {
1596 /* For CCRAWmode put the required cc mask into the second
1597 operand. */
1598 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1599 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1600 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1601 *op0 = XVECEXP (*op0, 0, 0);
1602 *code = new_code;
1603 }
1604 }
1605
1606 /* Simplify cascaded EQ, NE with const0_rtx. */
1607 if ((*code == NE || *code == EQ)
1608 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1609 && GET_MODE (*op0) == SImode
1610 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1611 && REG_P (XEXP (*op0, 0))
1612 && XEXP (*op0, 1) == const0_rtx
1613 && *op1 == const0_rtx)
1614 {
1615 if ((*code == EQ && GET_CODE (*op0) == NE)
1616 || (*code == NE && GET_CODE (*op0) == EQ))
1617 *code = EQ;
1618 else
1619 *code = NE;
1620 *op0 = XEXP (*op0, 0);
1621 }
1622
1623 /* Prefer register over memory as first operand. */
1624 if (MEM_P (*op0) && REG_P (*op1))
1625 {
1626 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1627 *code = (int)swap_condition ((enum rtx_code)*code);
1628 }
1629
1630 /* Using the scalar variants of vector instructions for 64 bit FP
1631 comparisons might require swapping the operands. */
1632 if (TARGET_VX
1633 && register_operand (*op0, DFmode)
1634 && register_operand (*op1, DFmode)
1635 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1636 {
1637 rtx tmp;
1638
1639 switch (*code)
1640 {
1641 case LT: *code = GT; break;
1642 case LE: *code = GE; break;
1643 case UNGT: *code = UNLE; break;
1644 case UNGE: *code = UNLT; break;
1645 default: ;
1646 }
1647 tmp = *op0; *op0 = *op1; *op1 = tmp;
1648 }
1649 }
1650
1651 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1652 FP compare using the single element variant of vector instructions.
1653 Replace CODE with the comparison code to be used in the CC reg
1654 compare and return the condition code register RTX in CC. */
1655
1656 static bool
1657 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1658 rtx *cc)
1659 {
1660 machine_mode cmp_mode;
1661 bool swap_p = false;
1662
1663 switch (*code)
1664 {
1665 case EQ: cmp_mode = CCVEQmode; break;
1666 case NE: cmp_mode = CCVEQmode; break;
1667 case GT: cmp_mode = CCVFHmode; break;
1668 case GE: cmp_mode = CCVFHEmode; break;
1669 case UNLE: cmp_mode = CCVFHmode; break;
1670 case UNLT: cmp_mode = CCVFHEmode; break;
1671 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1672 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1673 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1674 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1675 default: return false;
1676 }
1677
1678 if (swap_p)
1679 {
1680 rtx tmp = cmp2;
1681 cmp2 = cmp1;
1682 cmp1 = tmp;
1683 }
1684 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1685 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1686 gen_rtvec (2,
1687 gen_rtx_SET (*cc,
1688 gen_rtx_COMPARE (cmp_mode, cmp1,
1689 cmp2)),
1690 gen_rtx_CLOBBER (VOIDmode,
1691 gen_rtx_SCRATCH (V2DImode)))));
1692 return true;
1693 }
1694
1695
1696 /* Emit a compare instruction suitable to implement the comparison
1697 OP0 CODE OP1. Return the correct condition RTL to be placed in
1698 the IF_THEN_ELSE of the conditional branch testing the result. */
1699
1700 rtx
1701 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1702 {
1703 machine_mode mode = s390_select_ccmode (code, op0, op1);
1704 rtx cc;
1705
1706 if (TARGET_VX
1707 && register_operand (op0, DFmode)
1708 && register_operand (op1, DFmode)
1709 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1710 {
1711 /* Work has been done by s390_expand_vec_compare_scalar already. */
1712 }
1713 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1714 {
1715 /* Do not output a redundant compare instruction if a
1716 compare_and_swap pattern already computed the result and the
1717 machine modes are compatible. */
1718 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1719 == GET_MODE (op0));
1720 cc = op0;
1721 }
1722 else
1723 {
1724 cc = gen_rtx_REG (mode, CC_REGNUM);
1725 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1726 }
1727
1728 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1729 }
1730
1731 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1732 matches CMP.
1733 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1734 conditional branch testing the result. */
1735
1736 static rtx
1737 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1738 rtx cmp, rtx new_rtx)
1739 {
1740 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1741 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1742 const0_rtx);
1743 }
1744
1745 /* Emit a jump instruction to TARGET and return it. If COND is
1746 NULL_RTX, emit an unconditional jump, else a conditional jump under
1747 condition COND. */
1748
1749 rtx_insn *
1750 s390_emit_jump (rtx target, rtx cond)
1751 {
1752 rtx insn;
1753
1754 target = gen_rtx_LABEL_REF (VOIDmode, target);
1755 if (cond)
1756 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1757
1758 insn = gen_rtx_SET (pc_rtx, target);
1759 return emit_jump_insn (insn);
1760 }
1761
1762 /* Return branch condition mask to implement a branch
1763 specified by CODE. Return -1 for invalid comparisons. */
1764
1765 int
1766 s390_branch_condition_mask (rtx code)
1767 {
1768 const int CC0 = 1 << 3;
1769 const int CC1 = 1 << 2;
1770 const int CC2 = 1 << 1;
1771 const int CC3 = 1 << 0;
1772
1773 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1774 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1775 gcc_assert (XEXP (code, 1) == const0_rtx
1776 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1777 && CONST_INT_P (XEXP (code, 1))));
1778
1779
1780 switch (GET_MODE (XEXP (code, 0)))
1781 {
1782 case CCZmode:
1783 case CCZ1mode:
1784 switch (GET_CODE (code))
1785 {
1786 case EQ: return CC0;
1787 case NE: return CC1 | CC2 | CC3;
1788 default: return -1;
1789 }
1790 break;
1791
1792 case CCT1mode:
1793 switch (GET_CODE (code))
1794 {
1795 case EQ: return CC1;
1796 case NE: return CC0 | CC2 | CC3;
1797 default: return -1;
1798 }
1799 break;
1800
1801 case CCT2mode:
1802 switch (GET_CODE (code))
1803 {
1804 case EQ: return CC2;
1805 case NE: return CC0 | CC1 | CC3;
1806 default: return -1;
1807 }
1808 break;
1809
1810 case CCT3mode:
1811 switch (GET_CODE (code))
1812 {
1813 case EQ: return CC3;
1814 case NE: return CC0 | CC1 | CC2;
1815 default: return -1;
1816 }
1817 break;
1818
1819 case CCLmode:
1820 switch (GET_CODE (code))
1821 {
1822 case EQ: return CC0 | CC2;
1823 case NE: return CC1 | CC3;
1824 default: return -1;
1825 }
1826 break;
1827
1828 case CCL1mode:
1829 switch (GET_CODE (code))
1830 {
1831 case LTU: return CC2 | CC3; /* carry */
1832 case GEU: return CC0 | CC1; /* no carry */
1833 default: return -1;
1834 }
1835 break;
1836
1837 case CCL2mode:
1838 switch (GET_CODE (code))
1839 {
1840 case GTU: return CC0 | CC1; /* borrow */
1841 case LEU: return CC2 | CC3; /* no borrow */
1842 default: return -1;
1843 }
1844 break;
1845
1846 case CCL3mode:
1847 switch (GET_CODE (code))
1848 {
1849 case EQ: return CC0 | CC2;
1850 case NE: return CC1 | CC3;
1851 case LTU: return CC1;
1852 case GTU: return CC3;
1853 case LEU: return CC1 | CC2;
1854 case GEU: return CC2 | CC3;
1855 default: return -1;
1856 }
1857
1858 case CCUmode:
1859 switch (GET_CODE (code))
1860 {
1861 case EQ: return CC0;
1862 case NE: return CC1 | CC2 | CC3;
1863 case LTU: return CC1;
1864 case GTU: return CC2;
1865 case LEU: return CC0 | CC1;
1866 case GEU: return CC0 | CC2;
1867 default: return -1;
1868 }
1869 break;
1870
1871 case CCURmode:
1872 switch (GET_CODE (code))
1873 {
1874 case EQ: return CC0;
1875 case NE: return CC2 | CC1 | CC3;
1876 case LTU: return CC2;
1877 case GTU: return CC1;
1878 case LEU: return CC0 | CC2;
1879 case GEU: return CC0 | CC1;
1880 default: return -1;
1881 }
1882 break;
1883
1884 case CCAPmode:
1885 switch (GET_CODE (code))
1886 {
1887 case EQ: return CC0;
1888 case NE: return CC1 | CC2 | CC3;
1889 case LT: return CC1 | CC3;
1890 case GT: return CC2;
1891 case LE: return CC0 | CC1 | CC3;
1892 case GE: return CC0 | CC2;
1893 default: return -1;
1894 }
1895 break;
1896
1897 case CCANmode:
1898 switch (GET_CODE (code))
1899 {
1900 case EQ: return CC0;
1901 case NE: return CC1 | CC2 | CC3;
1902 case LT: return CC1;
1903 case GT: return CC2 | CC3;
1904 case LE: return CC0 | CC1;
1905 case GE: return CC0 | CC2 | CC3;
1906 default: return -1;
1907 }
1908 break;
1909
1910 case CCSmode:
1911 switch (GET_CODE (code))
1912 {
1913 case EQ: return CC0;
1914 case NE: return CC1 | CC2 | CC3;
1915 case LT: return CC1;
1916 case GT: return CC2;
1917 case LE: return CC0 | CC1;
1918 case GE: return CC0 | CC2;
1919 case UNORDERED: return CC3;
1920 case ORDERED: return CC0 | CC1 | CC2;
1921 case UNEQ: return CC0 | CC3;
1922 case UNLT: return CC1 | CC3;
1923 case UNGT: return CC2 | CC3;
1924 case UNLE: return CC0 | CC1 | CC3;
1925 case UNGE: return CC0 | CC2 | CC3;
1926 case LTGT: return CC1 | CC2;
1927 default: return -1;
1928 }
1929 break;
1930
1931 case CCSRmode:
1932 switch (GET_CODE (code))
1933 {
1934 case EQ: return CC0;
1935 case NE: return CC2 | CC1 | CC3;
1936 case LT: return CC2;
1937 case GT: return CC1;
1938 case LE: return CC0 | CC2;
1939 case GE: return CC0 | CC1;
1940 case UNORDERED: return CC3;
1941 case ORDERED: return CC0 | CC2 | CC1;
1942 case UNEQ: return CC0 | CC3;
1943 case UNLT: return CC2 | CC3;
1944 case UNGT: return CC1 | CC3;
1945 case UNLE: return CC0 | CC2 | CC3;
1946 case UNGE: return CC0 | CC1 | CC3;
1947 case LTGT: return CC2 | CC1;
1948 default: return -1;
1949 }
1950 break;
1951
1952 /* Vector comparison modes. */
1953
1954 case CCVEQmode:
1955 switch (GET_CODE (code))
1956 {
1957 case EQ: return CC0;
1958 case NE: return CC3;
1959 default: return -1;
1960 }
1961
1962 case CCVEQANYmode:
1963 switch (GET_CODE (code))
1964 {
1965 case EQ: return CC0 | CC1;
1966 case NE: return CC3 | CC1;
1967 default: return -1;
1968 }
1969
1970 /* Integer vector compare modes. */
1971
1972 case CCVHmode:
1973 switch (GET_CODE (code))
1974 {
1975 case GT: return CC0;
1976 case LE: return CC3;
1977 default: return -1;
1978 }
1979
1980 case CCVHANYmode:
1981 switch (GET_CODE (code))
1982 {
1983 case GT: return CC0 | CC1;
1984 case LE: return CC3 | CC1;
1985 default: return -1;
1986 }
1987
1988 case CCVHUmode:
1989 switch (GET_CODE (code))
1990 {
1991 case GTU: return CC0;
1992 case LEU: return CC3;
1993 default: return -1;
1994 }
1995
1996 case CCVHUANYmode:
1997 switch (GET_CODE (code))
1998 {
1999 case GTU: return CC0 | CC1;
2000 case LEU: return CC3 | CC1;
2001 default: return -1;
2002 }
2003
2004 /* FP vector compare modes. */
2005
2006 case CCVFHmode:
2007 switch (GET_CODE (code))
2008 {
2009 case GT: return CC0;
2010 case UNLE: return CC3;
2011 default: return -1;
2012 }
2013
2014 case CCVFHANYmode:
2015 switch (GET_CODE (code))
2016 {
2017 case GT: return CC0 | CC1;
2018 case UNLE: return CC3 | CC1;
2019 default: return -1;
2020 }
2021
2022 case CCVFHEmode:
2023 switch (GET_CODE (code))
2024 {
2025 case GE: return CC0;
2026 case UNLT: return CC3;
2027 default: return -1;
2028 }
2029
2030 case CCVFHEANYmode:
2031 switch (GET_CODE (code))
2032 {
2033 case GE: return CC0 | CC1;
2034 case UNLT: return CC3 | CC1;
2035 default: return -1;
2036 }
2037
2038
2039 case CCRAWmode:
2040 switch (GET_CODE (code))
2041 {
2042 case EQ:
2043 return INTVAL (XEXP (code, 1));
2044 case NE:
2045 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2046 default:
2047 gcc_unreachable ();
2048 }
2049
2050 default:
2051 return -1;
2052 }
2053 }
2054
2055
2056 /* Return branch condition mask to implement a compare and branch
2057 specified by CODE. Return -1 for invalid comparisons. */
2058
2059 int
2060 s390_compare_and_branch_condition_mask (rtx code)
2061 {
2062 const int CC0 = 1 << 3;
2063 const int CC1 = 1 << 2;
2064 const int CC2 = 1 << 1;
2065
2066 switch (GET_CODE (code))
2067 {
2068 case EQ:
2069 return CC0;
2070 case NE:
2071 return CC1 | CC2;
2072 case LT:
2073 case LTU:
2074 return CC1;
2075 case GT:
2076 case GTU:
2077 return CC2;
2078 case LE:
2079 case LEU:
2080 return CC0 | CC1;
2081 case GE:
2082 case GEU:
2083 return CC0 | CC2;
2084 default:
2085 gcc_unreachable ();
2086 }
2087 return -1;
2088 }
2089
2090 /* If INV is false, return assembler mnemonic string to implement
2091 a branch specified by CODE. If INV is true, return mnemonic
2092 for the corresponding inverted branch. */
2093
2094 static const char *
2095 s390_branch_condition_mnemonic (rtx code, int inv)
2096 {
2097 int mask;
2098
2099 static const char *const mnemonic[16] =
2100 {
2101 NULL, "o", "h", "nle",
2102 "l", "nhe", "lh", "ne",
2103 "e", "nlh", "he", "nl",
2104 "le", "nh", "no", NULL
2105 };
2106
2107 if (GET_CODE (XEXP (code, 0)) == REG
2108 && REGNO (XEXP (code, 0)) == CC_REGNUM
2109 && (XEXP (code, 1) == const0_rtx
2110 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2111 && CONST_INT_P (XEXP (code, 1)))))
2112 mask = s390_branch_condition_mask (code);
2113 else
2114 mask = s390_compare_and_branch_condition_mask (code);
2115
2116 gcc_assert (mask >= 0);
2117
2118 if (inv)
2119 mask ^= 15;
2120
2121 gcc_assert (mask >= 1 && mask <= 14);
2122
2123 return mnemonic[mask];
2124 }
2125
2126 /* Return the part of op which has a value different from def.
2127 The size of the part is determined by mode.
2128 Use this function only if you already know that op really
2129 contains such a part. */
2130
2131 unsigned HOST_WIDE_INT
2132 s390_extract_part (rtx op, machine_mode mode, int def)
2133 {
2134 unsigned HOST_WIDE_INT value = 0;
2135 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2136 int part_bits = GET_MODE_BITSIZE (mode);
2137 unsigned HOST_WIDE_INT part_mask
2138 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2139 int i;
2140
2141 for (i = 0; i < max_parts; i++)
2142 {
2143 if (i == 0)
2144 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2145 else
2146 value >>= part_bits;
2147
2148 if ((value & part_mask) != (def & part_mask))
2149 return value & part_mask;
2150 }
2151
2152 gcc_unreachable ();
2153 }
2154
2155 /* If OP is an integer constant of mode MODE with exactly one
2156 part of mode PART_MODE unequal to DEF, return the number of that
2157 part. Otherwise, return -1. */
2158
2159 int
2160 s390_single_part (rtx op,
2161 machine_mode mode,
2162 machine_mode part_mode,
2163 int def)
2164 {
2165 unsigned HOST_WIDE_INT value = 0;
2166 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2167 unsigned HOST_WIDE_INT part_mask
2168 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2169 int i, part = -1;
2170
2171 if (GET_CODE (op) != CONST_INT)
2172 return -1;
2173
2174 for (i = 0; i < n_parts; i++)
2175 {
2176 if (i == 0)
2177 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2178 else
2179 value >>= GET_MODE_BITSIZE (part_mode);
2180
2181 if ((value & part_mask) != (def & part_mask))
2182 {
2183 if (part != -1)
2184 return -1;
2185 else
2186 part = i;
2187 }
2188 }
2189 return part == -1 ? -1 : n_parts - 1 - part;
2190 }
2191
2192 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2193 bits and no other bits are set in IN. POS and LENGTH can be used
2194 to obtain the start position and the length of the bitfield.
2195
2196 POS gives the position of the first bit of the bitfield counting
2197 from the lowest order bit starting with zero. In order to use this
2198 value for S/390 instructions this has to be converted to "bits big
2199 endian" style. */
2200
2201 bool
2202 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2203 int *pos, int *length)
2204 {
2205 int tmp_pos = 0;
2206 int tmp_length = 0;
2207 int i;
2208 unsigned HOST_WIDE_INT mask = 1ULL;
2209 bool contiguous = false;
2210
2211 for (i = 0; i < size; mask <<= 1, i++)
2212 {
2213 if (contiguous)
2214 {
2215 if (mask & in)
2216 tmp_length++;
2217 else
2218 break;
2219 }
2220 else
2221 {
2222 if (mask & in)
2223 {
2224 contiguous = true;
2225 tmp_length++;
2226 }
2227 else
2228 tmp_pos++;
2229 }
2230 }
2231
2232 if (!tmp_length)
2233 return false;
2234
2235 /* Calculate a mask for all bits beyond the contiguous bits. */
2236 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2237
2238 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2239 mask &= (HOST_WIDE_INT_1U << size) - 1;
2240
2241 if (mask & in)
2242 return false;
2243
2244 if (tmp_length + tmp_pos - 1 > size)
2245 return false;
2246
2247 if (length)
2248 *length = tmp_length;
2249
2250 if (pos)
2251 *pos = tmp_pos;
2252
2253 return true;
2254 }
2255
2256 /* Return true if OP contains the same contiguous bitfield in *all*
2257 its elements. START and END can be used to obtain the start and
2258 end position of the bitfield.
2259
2260 START/STOP give the position of the first/last bit of the bitfield
2261 counting from the lowest order bit starting with zero. In order to
2262 use these values for S/390 instructions this has to be converted to
2263 "bits big endian" style. */
2264
2265 bool
2266 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2267 {
2268 unsigned HOST_WIDE_INT mask;
2269 int length, size;
2270
2271 if (!VECTOR_MODE_P (GET_MODE (op))
2272 || GET_CODE (op) != CONST_VECTOR
2273 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2274 return false;
2275
2276 if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
2277 {
2278 int i;
2279
2280 for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
2281 if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
2282 return false;
2283 }
2284
2285 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2286 mask = UINTVAL (XVECEXP (op, 0, 0));
2287 if (s390_contiguous_bitmask_p (mask, size, start,
2288 end != NULL ? &length : NULL))
2289 {
2290 if (end != NULL)
2291 *end = *start + length - 1;
2292 return true;
2293 }
2294 /* 0xff00000f style immediates can be covered by swapping start and
2295 end indices in vgm. */
2296 if (s390_contiguous_bitmask_p (~mask, size, start,
2297 end != NULL ? &length : NULL))
2298 {
2299 if (end != NULL)
2300 *end = *start - 1;
2301 if (start != NULL)
2302 *start = *start + length;
2303 return true;
2304 }
2305 return false;
2306 }
2307
2308 /* Return true if C consists only of byte chunks being either 0 or
2309 0xff. If MASK is !=NULL a byte mask is generated which is
2310 appropriate for the vector generate byte mask instruction. */
2311
2312 bool
2313 s390_bytemask_vector_p (rtx op, unsigned *mask)
2314 {
2315 int i;
2316 unsigned tmp_mask = 0;
2317 int nunit, unit_size;
2318
2319 if (!VECTOR_MODE_P (GET_MODE (op))
2320 || GET_CODE (op) != CONST_VECTOR
2321 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2322 return false;
2323
2324 nunit = GET_MODE_NUNITS (GET_MODE (op));
2325 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2326
2327 for (i = 0; i < nunit; i++)
2328 {
2329 unsigned HOST_WIDE_INT c;
2330 int j;
2331
2332 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2333 return false;
2334
2335 c = UINTVAL (XVECEXP (op, 0, i));
2336 for (j = 0; j < unit_size; j++)
2337 {
2338 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2339 return false;
2340 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2341 c = c >> BITS_PER_UNIT;
2342 }
2343 }
2344
2345 if (mask != NULL)
2346 *mask = tmp_mask;
2347
2348 return true;
2349 }
2350
2351 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2352 equivalent to a shift followed by the AND. In particular, CONTIG
2353 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2354 for ROTL indicate a rotate to the right. */
2355
2356 bool
2357 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2358 {
2359 int pos, len;
2360 bool ok;
2361
2362 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2363 gcc_assert (ok);
2364
2365 return ((rotl >= 0 && rotl <= pos)
2366 || (rotl < 0 && -rotl <= bitsize - len - pos));
2367 }
2368
2369 /* Check whether we can (and want to) split a double-word
2370 move in mode MODE from SRC to DST into two single-word
2371 moves, moving the subword FIRST_SUBWORD first. */
2372
2373 bool
2374 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2375 {
2376 /* Floating point and vector registers cannot be split. */
2377 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2378 return false;
2379
2380 /* We don't need to split if operands are directly accessible. */
2381 if (s_operand (src, mode) || s_operand (dst, mode))
2382 return false;
2383
2384 /* Non-offsettable memory references cannot be split. */
2385 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2386 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2387 return false;
2388
2389 /* Moving the first subword must not clobber a register
2390 needed to move the second subword. */
2391 if (register_operand (dst, mode))
2392 {
2393 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2394 if (reg_overlap_mentioned_p (subreg, src))
2395 return false;
2396 }
2397
2398 return true;
2399 }
2400
2401 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2402 and [MEM2, MEM2 + SIZE] do overlap and false
2403 otherwise. */
2404
2405 bool
2406 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2407 {
2408 rtx addr1, addr2, addr_delta;
2409 HOST_WIDE_INT delta;
2410
2411 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2412 return true;
2413
2414 if (size == 0)
2415 return false;
2416
2417 addr1 = XEXP (mem1, 0);
2418 addr2 = XEXP (mem2, 0);
2419
2420 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2421
2422 /* This overlapping check is used by peepholes merging memory block operations.
2423 Overlapping operations would otherwise be recognized by the S/390 hardware
2424 and would fall back to a slower implementation. Allowing overlapping
2425 operations would lead to slow code but not to wrong code. Therefore we are
2426 somewhat optimistic if we cannot prove that the memory blocks are
2427 overlapping.
2428 That's why we return false here although this may accept operations on
2429 overlapping memory areas. */
2430 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2431 return false;
2432
2433 delta = INTVAL (addr_delta);
2434
2435 if (delta == 0
2436 || (delta > 0 && delta < size)
2437 || (delta < 0 && -delta < size))
2438 return true;
2439
2440 return false;
2441 }
2442
2443 /* Check whether the address of memory reference MEM2 equals exactly
2444 the address of memory reference MEM1 plus DELTA. Return true if
2445 we can prove this to be the case, false otherwise. */
2446
2447 bool
2448 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2449 {
2450 rtx addr1, addr2, addr_delta;
2451
2452 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2453 return false;
2454
2455 addr1 = XEXP (mem1, 0);
2456 addr2 = XEXP (mem2, 0);
2457
2458 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2459 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2460 return false;
2461
2462 return true;
2463 }
2464
2465 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2466
2467 void
2468 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2469 rtx *operands)
2470 {
2471 machine_mode wmode = mode;
2472 rtx dst = operands[0];
2473 rtx src1 = operands[1];
2474 rtx src2 = operands[2];
2475 rtx op, clob, tem;
2476
2477 /* If we cannot handle the operation directly, use a temp register. */
2478 if (!s390_logical_operator_ok_p (operands))
2479 dst = gen_reg_rtx (mode);
2480
2481 /* QImode and HImode patterns make sense only if we have a destination
2482 in memory. Otherwise perform the operation in SImode. */
2483 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2484 wmode = SImode;
2485
2486 /* Widen operands if required. */
2487 if (mode != wmode)
2488 {
2489 if (GET_CODE (dst) == SUBREG
2490 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2491 dst = tem;
2492 else if (REG_P (dst))
2493 dst = gen_rtx_SUBREG (wmode, dst, 0);
2494 else
2495 dst = gen_reg_rtx (wmode);
2496
2497 if (GET_CODE (src1) == SUBREG
2498 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2499 src1 = tem;
2500 else if (GET_MODE (src1) != VOIDmode)
2501 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2502
2503 if (GET_CODE (src2) == SUBREG
2504 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2505 src2 = tem;
2506 else if (GET_MODE (src2) != VOIDmode)
2507 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2508 }
2509
2510 /* Emit the instruction. */
2511 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2512 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2513 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2514
2515 /* Fix up the destination if needed. */
2516 if (dst != operands[0])
2517 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2518 }
2519
2520 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2521
2522 bool
2523 s390_logical_operator_ok_p (rtx *operands)
2524 {
2525 /* If the destination operand is in memory, it needs to coincide
2526 with one of the source operands. After reload, it has to be
2527 the first source operand. */
2528 if (GET_CODE (operands[0]) == MEM)
2529 return rtx_equal_p (operands[0], operands[1])
2530 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2531
2532 return true;
2533 }
2534
2535 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2536 operand IMMOP to switch from SS to SI type instructions. */
2537
2538 void
2539 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2540 {
2541 int def = code == AND ? -1 : 0;
2542 HOST_WIDE_INT mask;
2543 int part;
2544
2545 gcc_assert (GET_CODE (*memop) == MEM);
2546 gcc_assert (!MEM_VOLATILE_P (*memop));
2547
2548 mask = s390_extract_part (*immop, QImode, def);
2549 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2550 gcc_assert (part >= 0);
2551
2552 *memop = adjust_address (*memop, QImode, part);
2553 *immop = gen_int_mode (mask, QImode);
2554 }
2555
2556
2557 /* How to allocate a 'struct machine_function'. */
2558
2559 static struct machine_function *
2560 s390_init_machine_status (void)
2561 {
2562 return ggc_cleared_alloc<machine_function> ();
2563 }
2564
2565 /* Map for smallest class containing reg regno. */
2566
2567 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2568 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2569 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2570 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2571 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2572 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2573 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2574 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2575 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2576 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2577 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2578 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2579 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2580 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2581 VEC_REGS, VEC_REGS /* 52 */
2582 };
2583
2584 /* Return attribute type of insn. */
2585
2586 static enum attr_type
2587 s390_safe_attr_type (rtx_insn *insn)
2588 {
2589 if (recog_memoized (insn) >= 0)
2590 return get_attr_type (insn);
2591 else
2592 return TYPE_NONE;
2593 }
2594
2595 /* Return true if DISP is a valid short displacement. */
2596
2597 static bool
2598 s390_short_displacement (rtx disp)
2599 {
2600 /* No displacement is OK. */
2601 if (!disp)
2602 return true;
2603
2604 /* Without the long displacement facility we don't need to
2605 distingiush between long and short displacement. */
2606 if (!TARGET_LONG_DISPLACEMENT)
2607 return true;
2608
2609 /* Integer displacement in range. */
2610 if (GET_CODE (disp) == CONST_INT)
2611 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2612
2613 /* GOT offset is not OK, the GOT can be large. */
2614 if (GET_CODE (disp) == CONST
2615 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2616 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2617 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2618 return false;
2619
2620 /* All other symbolic constants are literal pool references,
2621 which are OK as the literal pool must be small. */
2622 if (GET_CODE (disp) == CONST)
2623 return true;
2624
2625 return false;
2626 }
2627
2628 /* Decompose a RTL expression ADDR for a memory address into
2629 its components, returned in OUT.
2630
2631 Returns false if ADDR is not a valid memory address, true
2632 otherwise. If OUT is NULL, don't return the components,
2633 but check for validity only.
2634
2635 Note: Only addresses in canonical form are recognized.
2636 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2637 canonical form so that they will be recognized. */
2638
2639 static int
2640 s390_decompose_address (rtx addr, struct s390_address *out)
2641 {
2642 HOST_WIDE_INT offset = 0;
2643 rtx base = NULL_RTX;
2644 rtx indx = NULL_RTX;
2645 rtx disp = NULL_RTX;
2646 rtx orig_disp;
2647 bool pointer = false;
2648 bool base_ptr = false;
2649 bool indx_ptr = false;
2650 bool literal_pool = false;
2651
2652 /* We may need to substitute the literal pool base register into the address
2653 below. However, at this point we do not know which register is going to
2654 be used as base, so we substitute the arg pointer register. This is going
2655 to be treated as holding a pointer below -- it shouldn't be used for any
2656 other purpose. */
2657 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2658
2659 /* Decompose address into base + index + displacement. */
2660
2661 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2662 base = addr;
2663
2664 else if (GET_CODE (addr) == PLUS)
2665 {
2666 rtx op0 = XEXP (addr, 0);
2667 rtx op1 = XEXP (addr, 1);
2668 enum rtx_code code0 = GET_CODE (op0);
2669 enum rtx_code code1 = GET_CODE (op1);
2670
2671 if (code0 == REG || code0 == UNSPEC)
2672 {
2673 if (code1 == REG || code1 == UNSPEC)
2674 {
2675 indx = op0; /* index + base */
2676 base = op1;
2677 }
2678
2679 else
2680 {
2681 base = op0; /* base + displacement */
2682 disp = op1;
2683 }
2684 }
2685
2686 else if (code0 == PLUS)
2687 {
2688 indx = XEXP (op0, 0); /* index + base + disp */
2689 base = XEXP (op0, 1);
2690 disp = op1;
2691 }
2692
2693 else
2694 {
2695 return false;
2696 }
2697 }
2698
2699 else
2700 disp = addr; /* displacement */
2701
2702 /* Extract integer part of displacement. */
2703 orig_disp = disp;
2704 if (disp)
2705 {
2706 if (GET_CODE (disp) == CONST_INT)
2707 {
2708 offset = INTVAL (disp);
2709 disp = NULL_RTX;
2710 }
2711 else if (GET_CODE (disp) == CONST
2712 && GET_CODE (XEXP (disp, 0)) == PLUS
2713 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2714 {
2715 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2716 disp = XEXP (XEXP (disp, 0), 0);
2717 }
2718 }
2719
2720 /* Strip off CONST here to avoid special case tests later. */
2721 if (disp && GET_CODE (disp) == CONST)
2722 disp = XEXP (disp, 0);
2723
2724 /* We can convert literal pool addresses to
2725 displacements by basing them off the base register. */
2726 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2727 {
2728 /* Either base or index must be free to hold the base register. */
2729 if (!base)
2730 base = fake_pool_base, literal_pool = true;
2731 else if (!indx)
2732 indx = fake_pool_base, literal_pool = true;
2733 else
2734 return false;
2735
2736 /* Mark up the displacement. */
2737 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2738 UNSPEC_LTREL_OFFSET);
2739 }
2740
2741 /* Validate base register. */
2742 if (base)
2743 {
2744 if (GET_CODE (base) == UNSPEC)
2745 switch (XINT (base, 1))
2746 {
2747 case UNSPEC_LTREF:
2748 if (!disp)
2749 disp = gen_rtx_UNSPEC (Pmode,
2750 gen_rtvec (1, XVECEXP (base, 0, 0)),
2751 UNSPEC_LTREL_OFFSET);
2752 else
2753 return false;
2754
2755 base = XVECEXP (base, 0, 1);
2756 break;
2757
2758 case UNSPEC_LTREL_BASE:
2759 if (XVECLEN (base, 0) == 1)
2760 base = fake_pool_base, literal_pool = true;
2761 else
2762 base = XVECEXP (base, 0, 1);
2763 break;
2764
2765 default:
2766 return false;
2767 }
2768
2769 if (!REG_P (base)
2770 || (GET_MODE (base) != SImode
2771 && GET_MODE (base) != Pmode))
2772 return false;
2773
2774 if (REGNO (base) == STACK_POINTER_REGNUM
2775 || REGNO (base) == FRAME_POINTER_REGNUM
2776 || ((reload_completed || reload_in_progress)
2777 && frame_pointer_needed
2778 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2779 || REGNO (base) == ARG_POINTER_REGNUM
2780 || (flag_pic
2781 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2782 pointer = base_ptr = true;
2783
2784 if ((reload_completed || reload_in_progress)
2785 && base == cfun->machine->base_reg)
2786 pointer = base_ptr = literal_pool = true;
2787 }
2788
2789 /* Validate index register. */
2790 if (indx)
2791 {
2792 if (GET_CODE (indx) == UNSPEC)
2793 switch (XINT (indx, 1))
2794 {
2795 case UNSPEC_LTREF:
2796 if (!disp)
2797 disp = gen_rtx_UNSPEC (Pmode,
2798 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2799 UNSPEC_LTREL_OFFSET);
2800 else
2801 return false;
2802
2803 indx = XVECEXP (indx, 0, 1);
2804 break;
2805
2806 case UNSPEC_LTREL_BASE:
2807 if (XVECLEN (indx, 0) == 1)
2808 indx = fake_pool_base, literal_pool = true;
2809 else
2810 indx = XVECEXP (indx, 0, 1);
2811 break;
2812
2813 default:
2814 return false;
2815 }
2816
2817 if (!REG_P (indx)
2818 || (GET_MODE (indx) != SImode
2819 && GET_MODE (indx) != Pmode))
2820 return false;
2821
2822 if (REGNO (indx) == STACK_POINTER_REGNUM
2823 || REGNO (indx) == FRAME_POINTER_REGNUM
2824 || ((reload_completed || reload_in_progress)
2825 && frame_pointer_needed
2826 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2827 || REGNO (indx) == ARG_POINTER_REGNUM
2828 || (flag_pic
2829 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2830 pointer = indx_ptr = true;
2831
2832 if ((reload_completed || reload_in_progress)
2833 && indx == cfun->machine->base_reg)
2834 pointer = indx_ptr = literal_pool = true;
2835 }
2836
2837 /* Prefer to use pointer as base, not index. */
2838 if (base && indx && !base_ptr
2839 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2840 {
2841 rtx tmp = base;
2842 base = indx;
2843 indx = tmp;
2844 }
2845
2846 /* Validate displacement. */
2847 if (!disp)
2848 {
2849 /* If virtual registers are involved, the displacement will change later
2850 anyway as the virtual registers get eliminated. This could make a
2851 valid displacement invalid, but it is more likely to make an invalid
2852 displacement valid, because we sometimes access the register save area
2853 via negative offsets to one of those registers.
2854 Thus we don't check the displacement for validity here. If after
2855 elimination the displacement turns out to be invalid after all,
2856 this is fixed up by reload in any case. */
2857 /* LRA maintains always displacements up to date and we need to
2858 know the displacement is right during all LRA not only at the
2859 final elimination. */
2860 if (lra_in_progress
2861 || (base != arg_pointer_rtx
2862 && indx != arg_pointer_rtx
2863 && base != return_address_pointer_rtx
2864 && indx != return_address_pointer_rtx
2865 && base != frame_pointer_rtx
2866 && indx != frame_pointer_rtx
2867 && base != virtual_stack_vars_rtx
2868 && indx != virtual_stack_vars_rtx))
2869 if (!DISP_IN_RANGE (offset))
2870 return false;
2871 }
2872 else
2873 {
2874 /* All the special cases are pointers. */
2875 pointer = true;
2876
2877 /* In the small-PIC case, the linker converts @GOT
2878 and @GOTNTPOFF offsets to possible displacements. */
2879 if (GET_CODE (disp) == UNSPEC
2880 && (XINT (disp, 1) == UNSPEC_GOT
2881 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2882 && flag_pic == 1)
2883 {
2884 ;
2885 }
2886
2887 /* Accept pool label offsets. */
2888 else if (GET_CODE (disp) == UNSPEC
2889 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2890 ;
2891
2892 /* Accept literal pool references. */
2893 else if (GET_CODE (disp) == UNSPEC
2894 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2895 {
2896 /* In case CSE pulled a non literal pool reference out of
2897 the pool we have to reject the address. This is
2898 especially important when loading the GOT pointer on non
2899 zarch CPUs. In this case the literal pool contains an lt
2900 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2901 will most likely exceed the displacement. */
2902 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2903 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2904 return false;
2905
2906 orig_disp = gen_rtx_CONST (Pmode, disp);
2907 if (offset)
2908 {
2909 /* If we have an offset, make sure it does not
2910 exceed the size of the constant pool entry. */
2911 rtx sym = XVECEXP (disp, 0, 0);
2912 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2913 return false;
2914
2915 orig_disp = plus_constant (Pmode, orig_disp, offset);
2916 }
2917 }
2918
2919 else
2920 return false;
2921 }
2922
2923 if (!base && !indx)
2924 pointer = true;
2925
2926 if (out)
2927 {
2928 out->base = base;
2929 out->indx = indx;
2930 out->disp = orig_disp;
2931 out->pointer = pointer;
2932 out->literal_pool = literal_pool;
2933 }
2934
2935 return true;
2936 }
2937
2938 /* Decompose a RTL expression OP for a shift count into its components,
2939 and return the base register in BASE and the offset in OFFSET.
2940
2941 Return true if OP is a valid shift count, false if not. */
2942
2943 bool
2944 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2945 {
2946 HOST_WIDE_INT off = 0;
2947
2948 /* We can have an integer constant, an address register,
2949 or a sum of the two. */
2950 if (GET_CODE (op) == CONST_INT)
2951 {
2952 off = INTVAL (op);
2953 op = NULL_RTX;
2954 }
2955 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2956 {
2957 off = INTVAL (XEXP (op, 1));
2958 op = XEXP (op, 0);
2959 }
2960 while (op && GET_CODE (op) == SUBREG)
2961 op = SUBREG_REG (op);
2962
2963 if (op && GET_CODE (op) != REG)
2964 return false;
2965
2966 if (offset)
2967 *offset = off;
2968 if (base)
2969 *base = op;
2970
2971 return true;
2972 }
2973
2974
2975 /* Return true if CODE is a valid address without index. */
2976
2977 bool
2978 s390_legitimate_address_without_index_p (rtx op)
2979 {
2980 struct s390_address addr;
2981
2982 if (!s390_decompose_address (XEXP (op, 0), &addr))
2983 return false;
2984 if (addr.indx)
2985 return false;
2986
2987 return true;
2988 }
2989
2990
2991 /* Return TRUE if ADDR is an operand valid for a load/store relative
2992 instruction. Be aware that the alignment of the operand needs to
2993 be checked separately.
2994 Valid addresses are single references or a sum of a reference and a
2995 constant integer. Return these parts in SYMREF and ADDEND. You can
2996 pass NULL in REF and/or ADDEND if you are not interested in these
2997 values. Literal pool references are *not* considered symbol
2998 references. */
2999
3000 static bool
3001 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3002 {
3003 HOST_WIDE_INT tmpaddend = 0;
3004
3005 if (GET_CODE (addr) == CONST)
3006 addr = XEXP (addr, 0);
3007
3008 if (GET_CODE (addr) == PLUS)
3009 {
3010 if (!CONST_INT_P (XEXP (addr, 1)))
3011 return false;
3012
3013 tmpaddend = INTVAL (XEXP (addr, 1));
3014 addr = XEXP (addr, 0);
3015 }
3016
3017 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3018 || (GET_CODE (addr) == UNSPEC
3019 && (XINT (addr, 1) == UNSPEC_GOTENT
3020 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3021 {
3022 if (symref)
3023 *symref = addr;
3024 if (addend)
3025 *addend = tmpaddend;
3026
3027 return true;
3028 }
3029 return false;
3030 }
3031
3032 /* Return true if the address in OP is valid for constraint letter C
3033 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3034 pool MEMs should be accepted. Only the Q, R, S, T constraint
3035 letters are allowed for C. */
3036
3037 static int
3038 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3039 {
3040 struct s390_address addr;
3041 bool decomposed = false;
3042
3043 /* This check makes sure that no symbolic address (except literal
3044 pool references) are accepted by the R or T constraints. */
3045 if (s390_loadrelative_operand_p (op, NULL, NULL))
3046 return 0;
3047
3048 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3049 if (!lit_pool_ok)
3050 {
3051 if (!s390_decompose_address (op, &addr))
3052 return 0;
3053 if (addr.literal_pool)
3054 return 0;
3055 decomposed = true;
3056 }
3057
3058 switch (c)
3059 {
3060 case 'Q': /* no index short displacement */
3061 if (!decomposed && !s390_decompose_address (op, &addr))
3062 return 0;
3063 if (addr.indx)
3064 return 0;
3065 if (!s390_short_displacement (addr.disp))
3066 return 0;
3067 break;
3068
3069 case 'R': /* with index short displacement */
3070 if (TARGET_LONG_DISPLACEMENT)
3071 {
3072 if (!decomposed && !s390_decompose_address (op, &addr))
3073 return 0;
3074 if (!s390_short_displacement (addr.disp))
3075 return 0;
3076 }
3077 /* Any invalid address here will be fixed up by reload,
3078 so accept it for the most generic constraint. */
3079 break;
3080
3081 case 'S': /* no index long displacement */
3082 if (!TARGET_LONG_DISPLACEMENT)
3083 return 0;
3084 if (!decomposed && !s390_decompose_address (op, &addr))
3085 return 0;
3086 if (addr.indx)
3087 return 0;
3088 if (s390_short_displacement (addr.disp))
3089 return 0;
3090 break;
3091
3092 case 'T': /* with index long displacement */
3093 if (!TARGET_LONG_DISPLACEMENT)
3094 return 0;
3095 /* Any invalid address here will be fixed up by reload,
3096 so accept it for the most generic constraint. */
3097 if ((decomposed || s390_decompose_address (op, &addr))
3098 && s390_short_displacement (addr.disp))
3099 return 0;
3100 break;
3101 default:
3102 return 0;
3103 }
3104 return 1;
3105 }
3106
3107
3108 /* Evaluates constraint strings described by the regular expression
3109 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
3110 the constraint given in STR, or 0 else. */
3111
3112 int
3113 s390_mem_constraint (const char *str, rtx op)
3114 {
3115 char c = str[0];
3116
3117 switch (c)
3118 {
3119 case 'A':
3120 /* Check for offsettable variants of memory constraints. */
3121 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3122 return 0;
3123 if ((reload_completed || reload_in_progress)
3124 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3125 return 0;
3126 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3127 case 'B':
3128 /* Check for non-literal-pool variants of memory constraints. */
3129 if (!MEM_P (op))
3130 return 0;
3131 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3132 case 'Q':
3133 case 'R':
3134 case 'S':
3135 case 'T':
3136 if (GET_CODE (op) != MEM)
3137 return 0;
3138 return s390_check_qrst_address (c, XEXP (op, 0), true);
3139 case 'U':
3140 return (s390_check_qrst_address ('Q', op, true)
3141 || s390_check_qrst_address ('R', op, true));
3142 case 'W':
3143 return (s390_check_qrst_address ('S', op, true)
3144 || s390_check_qrst_address ('T', op, true));
3145 case 'Y':
3146 /* Simply check for the basic form of a shift count. Reload will
3147 take care of making sure we have a proper base register. */
3148 if (!s390_decompose_shift_count (op, NULL, NULL))
3149 return 0;
3150 break;
3151 case 'Z':
3152 return s390_check_qrst_address (str[1], op, true);
3153 default:
3154 return 0;
3155 }
3156 return 1;
3157 }
3158
3159
3160 /* Evaluates constraint strings starting with letter O. Input
3161 parameter C is the second letter following the "O" in the constraint
3162 string. Returns 1 if VALUE meets the respective constraint and 0
3163 otherwise. */
3164
3165 int
3166 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3167 {
3168 if (!TARGET_EXTIMM)
3169 return 0;
3170
3171 switch (c)
3172 {
3173 case 's':
3174 return trunc_int_for_mode (value, SImode) == value;
3175
3176 case 'p':
3177 return value == 0
3178 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3179
3180 case 'n':
3181 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3182
3183 default:
3184 gcc_unreachable ();
3185 }
3186 }
3187
3188
3189 /* Evaluates constraint strings starting with letter N. Parameter STR
3190 contains the letters following letter "N" in the constraint string.
3191 Returns true if VALUE matches the constraint. */
3192
3193 int
3194 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3195 {
3196 machine_mode mode, part_mode;
3197 int def;
3198 int part, part_goal;
3199
3200
3201 if (str[0] == 'x')
3202 part_goal = -1;
3203 else
3204 part_goal = str[0] - '0';
3205
3206 switch (str[1])
3207 {
3208 case 'Q':
3209 part_mode = QImode;
3210 break;
3211 case 'H':
3212 part_mode = HImode;
3213 break;
3214 case 'S':
3215 part_mode = SImode;
3216 break;
3217 default:
3218 return 0;
3219 }
3220
3221 switch (str[2])
3222 {
3223 case 'H':
3224 mode = HImode;
3225 break;
3226 case 'S':
3227 mode = SImode;
3228 break;
3229 case 'D':
3230 mode = DImode;
3231 break;
3232 default:
3233 return 0;
3234 }
3235
3236 switch (str[3])
3237 {
3238 case '0':
3239 def = 0;
3240 break;
3241 case 'F':
3242 def = -1;
3243 break;
3244 default:
3245 return 0;
3246 }
3247
3248 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3249 return 0;
3250
3251 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3252 if (part < 0)
3253 return 0;
3254 if (part_goal != -1 && part_goal != part)
3255 return 0;
3256
3257 return 1;
3258 }
3259
3260
3261 /* Returns true if the input parameter VALUE is a float zero. */
3262
3263 int
3264 s390_float_const_zero_p (rtx value)
3265 {
3266 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3267 && value == CONST0_RTX (GET_MODE (value)));
3268 }
3269
3270 /* Implement TARGET_REGISTER_MOVE_COST. */
3271
3272 static int
3273 s390_register_move_cost (machine_mode mode,
3274 reg_class_t from, reg_class_t to)
3275 {
3276 /* On s390, copy between fprs and gprs is expensive. */
3277
3278 /* It becomes somewhat faster having ldgr/lgdr. */
3279 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3280 {
3281 /* ldgr is single cycle. */
3282 if (reg_classes_intersect_p (from, GENERAL_REGS)
3283 && reg_classes_intersect_p (to, FP_REGS))
3284 return 1;
3285 /* lgdr needs 3 cycles. */
3286 if (reg_classes_intersect_p (to, GENERAL_REGS)
3287 && reg_classes_intersect_p (from, FP_REGS))
3288 return 3;
3289 }
3290
3291 /* Otherwise copying is done via memory. */
3292 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3293 && reg_classes_intersect_p (to, FP_REGS))
3294 || (reg_classes_intersect_p (from, FP_REGS)
3295 && reg_classes_intersect_p (to, GENERAL_REGS)))
3296 return 10;
3297
3298 return 1;
3299 }
3300
3301 /* Implement TARGET_MEMORY_MOVE_COST. */
3302
3303 static int
3304 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3305 reg_class_t rclass ATTRIBUTE_UNUSED,
3306 bool in ATTRIBUTE_UNUSED)
3307 {
3308 return 2;
3309 }
3310
3311 /* Compute a (partial) cost for rtx X. Return true if the complete
3312 cost has been computed, and false if subexpressions should be
3313 scanned. In either case, *TOTAL contains the cost result.
3314 CODE contains GET_CODE (x), OUTER_CODE contains the code
3315 of the superexpression of x. */
3316
3317 static bool
3318 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3319 int *total, bool speed ATTRIBUTE_UNUSED)
3320 {
3321 switch (code)
3322 {
3323 case CONST:
3324 case CONST_INT:
3325 case LABEL_REF:
3326 case SYMBOL_REF:
3327 case CONST_DOUBLE:
3328 case MEM:
3329 *total = 0;
3330 return true;
3331
3332 case ASHIFT:
3333 case ASHIFTRT:
3334 case LSHIFTRT:
3335 case ROTATE:
3336 case ROTATERT:
3337 case AND:
3338 case IOR:
3339 case XOR:
3340 case NEG:
3341 case NOT:
3342 *total = COSTS_N_INSNS (1);
3343 return false;
3344
3345 case PLUS:
3346 case MINUS:
3347 *total = COSTS_N_INSNS (1);
3348 return false;
3349
3350 case MULT:
3351 switch (GET_MODE (x))
3352 {
3353 case SImode:
3354 {
3355 rtx left = XEXP (x, 0);
3356 rtx right = XEXP (x, 1);
3357 if (GET_CODE (right) == CONST_INT
3358 && CONST_OK_FOR_K (INTVAL (right)))
3359 *total = s390_cost->mhi;
3360 else if (GET_CODE (left) == SIGN_EXTEND)
3361 *total = s390_cost->mh;
3362 else
3363 *total = s390_cost->ms; /* msr, ms, msy */
3364 break;
3365 }
3366 case DImode:
3367 {
3368 rtx left = XEXP (x, 0);
3369 rtx right = XEXP (x, 1);
3370 if (TARGET_ZARCH)
3371 {
3372 if (GET_CODE (right) == CONST_INT
3373 && CONST_OK_FOR_K (INTVAL (right)))
3374 *total = s390_cost->mghi;
3375 else if (GET_CODE (left) == SIGN_EXTEND)
3376 *total = s390_cost->msgf;
3377 else
3378 *total = s390_cost->msg; /* msgr, msg */
3379 }
3380 else /* TARGET_31BIT */
3381 {
3382 if (GET_CODE (left) == SIGN_EXTEND
3383 && GET_CODE (right) == SIGN_EXTEND)
3384 /* mulsidi case: mr, m */
3385 *total = s390_cost->m;
3386 else if (GET_CODE (left) == ZERO_EXTEND
3387 && GET_CODE (right) == ZERO_EXTEND
3388 && TARGET_CPU_ZARCH)
3389 /* umulsidi case: ml, mlr */
3390 *total = s390_cost->ml;
3391 else
3392 /* Complex calculation is required. */
3393 *total = COSTS_N_INSNS (40);
3394 }
3395 break;
3396 }
3397 case SFmode:
3398 case DFmode:
3399 *total = s390_cost->mult_df;
3400 break;
3401 case TFmode:
3402 *total = s390_cost->mxbr;
3403 break;
3404 default:
3405 return false;
3406 }
3407 return false;
3408
3409 case FMA:
3410 switch (GET_MODE (x))
3411 {
3412 case DFmode:
3413 *total = s390_cost->madbr;
3414 break;
3415 case SFmode:
3416 *total = s390_cost->maebr;
3417 break;
3418 default:
3419 return false;
3420 }
3421 /* Negate in the third argument is free: FMSUB. */
3422 if (GET_CODE (XEXP (x, 2)) == NEG)
3423 {
3424 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
3425 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
3426 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
3427 return true;
3428 }
3429 return false;
3430
3431 case UDIV:
3432 case UMOD:
3433 if (GET_MODE (x) == TImode) /* 128 bit division */
3434 *total = s390_cost->dlgr;
3435 else if (GET_MODE (x) == DImode)
3436 {
3437 rtx right = XEXP (x, 1);
3438 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3439 *total = s390_cost->dlr;
3440 else /* 64 by 64 bit division */
3441 *total = s390_cost->dlgr;
3442 }
3443 else if (GET_MODE (x) == SImode) /* 32 bit division */
3444 *total = s390_cost->dlr;
3445 return false;
3446
3447 case DIV:
3448 case MOD:
3449 if (GET_MODE (x) == DImode)
3450 {
3451 rtx right = XEXP (x, 1);
3452 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3453 if (TARGET_ZARCH)
3454 *total = s390_cost->dsgfr;
3455 else
3456 *total = s390_cost->dr;
3457 else /* 64 by 64 bit division */
3458 *total = s390_cost->dsgr;
3459 }
3460 else if (GET_MODE (x) == SImode) /* 32 bit division */
3461 *total = s390_cost->dlr;
3462 else if (GET_MODE (x) == SFmode)
3463 {
3464 *total = s390_cost->debr;
3465 }
3466 else if (GET_MODE (x) == DFmode)
3467 {
3468 *total = s390_cost->ddbr;
3469 }
3470 else if (GET_MODE (x) == TFmode)
3471 {
3472 *total = s390_cost->dxbr;
3473 }
3474 return false;
3475
3476 case SQRT:
3477 if (GET_MODE (x) == SFmode)
3478 *total = s390_cost->sqebr;
3479 else if (GET_MODE (x) == DFmode)
3480 *total = s390_cost->sqdbr;
3481 else /* TFmode */
3482 *total = s390_cost->sqxbr;
3483 return false;
3484
3485 case SIGN_EXTEND:
3486 case ZERO_EXTEND:
3487 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3488 || outer_code == PLUS || outer_code == MINUS
3489 || outer_code == COMPARE)
3490 *total = 0;
3491 return false;
3492
3493 case COMPARE:
3494 *total = COSTS_N_INSNS (1);
3495 if (GET_CODE (XEXP (x, 0)) == AND
3496 && GET_CODE (XEXP (x, 1)) == CONST_INT
3497 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3498 {
3499 rtx op0 = XEXP (XEXP (x, 0), 0);
3500 rtx op1 = XEXP (XEXP (x, 0), 1);
3501 rtx op2 = XEXP (x, 1);
3502
3503 if (memory_operand (op0, GET_MODE (op0))
3504 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3505 return true;
3506 if (register_operand (op0, GET_MODE (op0))
3507 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3508 return true;
3509 }
3510 return false;
3511
3512 default:
3513 return false;
3514 }
3515 }
3516
3517 /* Return the cost of an address rtx ADDR. */
3518
3519 static int
3520 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3521 addr_space_t as ATTRIBUTE_UNUSED,
3522 bool speed ATTRIBUTE_UNUSED)
3523 {
3524 struct s390_address ad;
3525 if (!s390_decompose_address (addr, &ad))
3526 return 1000;
3527
3528 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3529 }
3530
3531 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3532 otherwise return 0. */
3533
3534 int
3535 tls_symbolic_operand (rtx op)
3536 {
3537 if (GET_CODE (op) != SYMBOL_REF)
3538 return 0;
3539 return SYMBOL_REF_TLS_MODEL (op);
3540 }
3541 \f
3542 /* Split DImode access register reference REG (on 64-bit) into its constituent
3543 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3544 gen_highpart cannot be used as they assume all registers are word-sized,
3545 while our access registers have only half that size. */
3546
3547 void
3548 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3549 {
3550 gcc_assert (TARGET_64BIT);
3551 gcc_assert (ACCESS_REG_P (reg));
3552 gcc_assert (GET_MODE (reg) == DImode);
3553 gcc_assert (!(REGNO (reg) & 1));
3554
3555 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3556 *hi = gen_rtx_REG (SImode, REGNO (reg));
3557 }
3558
3559 /* Return true if OP contains a symbol reference */
3560
3561 bool
3562 symbolic_reference_mentioned_p (rtx op)
3563 {
3564 const char *fmt;
3565 int i;
3566
3567 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3568 return 1;
3569
3570 fmt = GET_RTX_FORMAT (GET_CODE (op));
3571 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3572 {
3573 if (fmt[i] == 'E')
3574 {
3575 int j;
3576
3577 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3578 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3579 return 1;
3580 }
3581
3582 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3583 return 1;
3584 }
3585
3586 return 0;
3587 }
3588
3589 /* Return true if OP contains a reference to a thread-local symbol. */
3590
3591 bool
3592 tls_symbolic_reference_mentioned_p (rtx op)
3593 {
3594 const char *fmt;
3595 int i;
3596
3597 if (GET_CODE (op) == SYMBOL_REF)
3598 return tls_symbolic_operand (op);
3599
3600 fmt = GET_RTX_FORMAT (GET_CODE (op));
3601 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3602 {
3603 if (fmt[i] == 'E')
3604 {
3605 int j;
3606
3607 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3608 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3609 return true;
3610 }
3611
3612 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3613 return true;
3614 }
3615
3616 return false;
3617 }
3618
3619
3620 /* Return true if OP is a legitimate general operand when
3621 generating PIC code. It is given that flag_pic is on
3622 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3623
3624 int
3625 legitimate_pic_operand_p (rtx op)
3626 {
3627 /* Accept all non-symbolic constants. */
3628 if (!SYMBOLIC_CONST (op))
3629 return 1;
3630
3631 /* Reject everything else; must be handled
3632 via emit_symbolic_move. */
3633 return 0;
3634 }
3635
3636 /* Returns true if the constant value OP is a legitimate general operand.
3637 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3638
3639 static bool
3640 s390_legitimate_constant_p (machine_mode mode, rtx op)
3641 {
3642 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3643 {
3644 if (GET_MODE_SIZE (mode) != 16)
3645 return 0;
3646
3647 if (!const0_operand (op, mode)
3648 && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3649 && !s390_bytemask_vector_p (op, NULL))
3650 return 0;
3651 }
3652
3653 /* Accept all non-symbolic constants. */
3654 if (!SYMBOLIC_CONST (op))
3655 return 1;
3656
3657 /* Accept immediate LARL operands. */
3658 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3659 return 1;
3660
3661 /* Thread-local symbols are never legal constants. This is
3662 so that emit_call knows that computing such addresses
3663 might require a function call. */
3664 if (TLS_SYMBOLIC_CONST (op))
3665 return 0;
3666
3667 /* In the PIC case, symbolic constants must *not* be
3668 forced into the literal pool. We accept them here,
3669 so that they will be handled by emit_symbolic_move. */
3670 if (flag_pic)
3671 return 1;
3672
3673 /* All remaining non-PIC symbolic constants are
3674 forced into the literal pool. */
3675 return 0;
3676 }
3677
3678 /* Determine if it's legal to put X into the constant pool. This
3679 is not possible if X contains the address of a symbol that is
3680 not constant (TLS) or not known at final link time (PIC). */
3681
3682 static bool
3683 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3684 {
3685 switch (GET_CODE (x))
3686 {
3687 case CONST_INT:
3688 case CONST_DOUBLE:
3689 case CONST_VECTOR:
3690 /* Accept all non-symbolic constants. */
3691 return false;
3692
3693 case LABEL_REF:
3694 /* Labels are OK iff we are non-PIC. */
3695 return flag_pic != 0;
3696
3697 case SYMBOL_REF:
3698 /* 'Naked' TLS symbol references are never OK,
3699 non-TLS symbols are OK iff we are non-PIC. */
3700 if (tls_symbolic_operand (x))
3701 return true;
3702 else
3703 return flag_pic != 0;
3704
3705 case CONST:
3706 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3707 case PLUS:
3708 case MINUS:
3709 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3710 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3711
3712 case UNSPEC:
3713 switch (XINT (x, 1))
3714 {
3715 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3716 case UNSPEC_LTREL_OFFSET:
3717 case UNSPEC_GOT:
3718 case UNSPEC_GOTOFF:
3719 case UNSPEC_PLTOFF:
3720 case UNSPEC_TLSGD:
3721 case UNSPEC_TLSLDM:
3722 case UNSPEC_NTPOFF:
3723 case UNSPEC_DTPOFF:
3724 case UNSPEC_GOTNTPOFF:
3725 case UNSPEC_INDNTPOFF:
3726 return false;
3727
3728 /* If the literal pool shares the code section, be put
3729 execute template placeholders into the pool as well. */
3730 case UNSPEC_INSN:
3731 return TARGET_CPU_ZARCH;
3732
3733 default:
3734 return true;
3735 }
3736 break;
3737
3738 default:
3739 gcc_unreachable ();
3740 }
3741 }
3742
3743 /* Returns true if the constant value OP is a legitimate general
3744 operand during and after reload. The difference to
3745 legitimate_constant_p is that this function will not accept
3746 a constant that would need to be forced to the literal pool
3747 before it can be used as operand.
3748 This function accepts all constants which can be loaded directly
3749 into a GPR. */
3750
3751 bool
3752 legitimate_reload_constant_p (rtx op)
3753 {
3754 /* Accept la(y) operands. */
3755 if (GET_CODE (op) == CONST_INT
3756 && DISP_IN_RANGE (INTVAL (op)))
3757 return true;
3758
3759 /* Accept l(g)hi/l(g)fi operands. */
3760 if (GET_CODE (op) == CONST_INT
3761 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3762 return true;
3763
3764 /* Accept lliXX operands. */
3765 if (TARGET_ZARCH
3766 && GET_CODE (op) == CONST_INT
3767 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3768 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3769 return true;
3770
3771 if (TARGET_EXTIMM
3772 && GET_CODE (op) == CONST_INT
3773 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3774 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3775 return true;
3776
3777 /* Accept larl operands. */
3778 if (TARGET_CPU_ZARCH
3779 && larl_operand (op, VOIDmode))
3780 return true;
3781
3782 /* Accept floating-point zero operands that fit into a single GPR. */
3783 if (GET_CODE (op) == CONST_DOUBLE
3784 && s390_float_const_zero_p (op)
3785 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3786 return true;
3787
3788 /* Accept double-word operands that can be split. */
3789 if (GET_CODE (op) == CONST_INT
3790 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
3791 {
3792 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3793 rtx hi = operand_subword (op, 0, 0, dword_mode);
3794 rtx lo = operand_subword (op, 1, 0, dword_mode);
3795 return legitimate_reload_constant_p (hi)
3796 && legitimate_reload_constant_p (lo);
3797 }
3798
3799 /* Everything else cannot be handled without reload. */
3800 return false;
3801 }
3802
3803 /* Returns true if the constant value OP is a legitimate fp operand
3804 during and after reload.
3805 This function accepts all constants which can be loaded directly
3806 into an FPR. */
3807
3808 static bool
3809 legitimate_reload_fp_constant_p (rtx op)
3810 {
3811 /* Accept floating-point zero operands if the load zero instruction
3812 can be used. Prior to z196 the load fp zero instruction caused a
3813 performance penalty if the result is used as BFP number. */
3814 if (TARGET_Z196
3815 && GET_CODE (op) == CONST_DOUBLE
3816 && s390_float_const_zero_p (op))
3817 return true;
3818
3819 return false;
3820 }
3821
3822 /* Returns true if the constant value OP is a legitimate vector operand
3823 during and after reload.
3824 This function accepts all constants which can be loaded directly
3825 into an VR. */
3826
3827 static bool
3828 legitimate_reload_vector_constant_p (rtx op)
3829 {
3830 /* FIXME: Support constant vectors with all the same 16 bit unsigned
3831 operands. These can be loaded with vrepi. */
3832
3833 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3834 && (const0_operand (op, GET_MODE (op))
3835 || constm1_operand (op, GET_MODE (op))
3836 || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3837 || s390_bytemask_vector_p (op, NULL)))
3838 return true;
3839
3840 return false;
3841 }
3842
3843 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3844 return the class of reg to actually use. */
3845
3846 static reg_class_t
3847 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3848 {
3849 switch (GET_CODE (op))
3850 {
3851 /* Constants we cannot reload into general registers
3852 must be forced into the literal pool. */
3853 case CONST_VECTOR:
3854 case CONST_DOUBLE:
3855 case CONST_INT:
3856 if (reg_class_subset_p (GENERAL_REGS, rclass)
3857 && legitimate_reload_constant_p (op))
3858 return GENERAL_REGS;
3859 else if (reg_class_subset_p (ADDR_REGS, rclass)
3860 && legitimate_reload_constant_p (op))
3861 return ADDR_REGS;
3862 else if (reg_class_subset_p (FP_REGS, rclass)
3863 && legitimate_reload_fp_constant_p (op))
3864 return FP_REGS;
3865 else if (reg_class_subset_p (VEC_REGS, rclass)
3866 && legitimate_reload_vector_constant_p (op))
3867 return VEC_REGS;
3868
3869 return NO_REGS;
3870
3871 /* If a symbolic constant or a PLUS is reloaded,
3872 it is most likely being used as an address, so
3873 prefer ADDR_REGS. If 'class' is not a superset
3874 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3875 case CONST:
3876 /* Symrefs cannot be pushed into the literal pool with -fPIC
3877 so we *MUST NOT* return NO_REGS for these cases
3878 (s390_cannot_force_const_mem will return true).
3879
3880 On the other hand we MUST return NO_REGS for symrefs with
3881 invalid addend which might have been pushed to the literal
3882 pool (no -fPIC). Usually we would expect them to be
3883 handled via secondary reload but this does not happen if
3884 they are used as literal pool slot replacement in reload
3885 inheritance (see emit_input_reload_insns). */
3886 if (TARGET_CPU_ZARCH
3887 && GET_CODE (XEXP (op, 0)) == PLUS
3888 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3889 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3890 {
3891 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3892 return ADDR_REGS;
3893 else
3894 return NO_REGS;
3895 }
3896 /* fallthrough */
3897 case LABEL_REF:
3898 case SYMBOL_REF:
3899 if (!legitimate_reload_constant_p (op))
3900 return NO_REGS;
3901 /* fallthrough */
3902 case PLUS:
3903 /* load address will be used. */
3904 if (reg_class_subset_p (ADDR_REGS, rclass))
3905 return ADDR_REGS;
3906 else
3907 return NO_REGS;
3908
3909 default:
3910 break;
3911 }
3912
3913 return rclass;
3914 }
3915
3916 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3917 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3918 aligned. */
3919
3920 bool
3921 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3922 {
3923 HOST_WIDE_INT addend;
3924 rtx symref;
3925
3926 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3927 return false;
3928
3929 if (addend & (alignment - 1))
3930 return false;
3931
3932 if (GET_CODE (symref) == SYMBOL_REF
3933 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3934 return true;
3935
3936 if (GET_CODE (symref) == UNSPEC
3937 && alignment <= UNITS_PER_LONG)
3938 return true;
3939
3940 return false;
3941 }
3942
3943 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3944 operand SCRATCH is used to reload the even part of the address and
3945 adding one. */
3946
3947 void
3948 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3949 {
3950 HOST_WIDE_INT addend;
3951 rtx symref;
3952
3953 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3954 gcc_unreachable ();
3955
3956 if (!(addend & 1))
3957 /* Easy case. The addend is even so larl will do fine. */
3958 emit_move_insn (reg, addr);
3959 else
3960 {
3961 /* We can leave the scratch register untouched if the target
3962 register is a valid base register. */
3963 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3964 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3965 scratch = reg;
3966
3967 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3968 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3969
3970 if (addend != 1)
3971 emit_move_insn (scratch,
3972 gen_rtx_CONST (Pmode,
3973 gen_rtx_PLUS (Pmode, symref,
3974 GEN_INT (addend - 1))));
3975 else
3976 emit_move_insn (scratch, symref);
3977
3978 /* Increment the address using la in order to avoid clobbering cc. */
3979 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3980 }
3981 }
3982
3983 /* Generate what is necessary to move between REG and MEM using
3984 SCRATCH. The direction is given by TOMEM. */
3985
3986 void
3987 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3988 {
3989 /* Reload might have pulled a constant out of the literal pool.
3990 Force it back in. */
3991 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3992 || GET_CODE (mem) == CONST_VECTOR
3993 || GET_CODE (mem) == CONST)
3994 mem = force_const_mem (GET_MODE (reg), mem);
3995
3996 gcc_assert (MEM_P (mem));
3997
3998 /* For a load from memory we can leave the scratch register
3999 untouched if the target register is a valid base register. */
4000 if (!tomem
4001 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4002 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4003 && GET_MODE (reg) == GET_MODE (scratch))
4004 scratch = reg;
4005
4006 /* Load address into scratch register. Since we can't have a
4007 secondary reload for a secondary reload we have to cover the case
4008 where larl would need a secondary reload here as well. */
4009 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4010
4011 /* Now we can use a standard load/store to do the move. */
4012 if (tomem)
4013 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4014 else
4015 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4016 }
4017
4018 /* Inform reload about cases where moving X with a mode MODE to a register in
4019 RCLASS requires an extra scratch or immediate register. Return the class
4020 needed for the immediate register. */
4021
4022 static reg_class_t
4023 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4024 machine_mode mode, secondary_reload_info *sri)
4025 {
4026 enum reg_class rclass = (enum reg_class) rclass_i;
4027
4028 /* Intermediate register needed. */
4029 if (reg_classes_intersect_p (CC_REGS, rclass))
4030 return GENERAL_REGS;
4031
4032 if (TARGET_VX)
4033 {
4034 /* The vst/vl vector move instructions allow only for short
4035 displacements. */
4036 if (MEM_P (x)
4037 && GET_CODE (XEXP (x, 0)) == PLUS
4038 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4039 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4040 && reg_class_subset_p (rclass, VEC_REGS)
4041 && (!reg_class_subset_p (rclass, FP_REGS)
4042 || (GET_MODE_SIZE (mode) > 8
4043 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4044 {
4045 if (in_p)
4046 sri->icode = (TARGET_64BIT ?
4047 CODE_FOR_reloaddi_la_in :
4048 CODE_FOR_reloadsi_la_in);
4049 else
4050 sri->icode = (TARGET_64BIT ?
4051 CODE_FOR_reloaddi_la_out :
4052 CODE_FOR_reloadsi_la_out);
4053 }
4054 }
4055
4056 if (TARGET_Z10)
4057 {
4058 HOST_WIDE_INT offset;
4059 rtx symref;
4060
4061 /* On z10 several optimizer steps may generate larl operands with
4062 an odd addend. */
4063 if (in_p
4064 && s390_loadrelative_operand_p (x, &symref, &offset)
4065 && mode == Pmode
4066 && !SYMBOL_REF_ALIGN1_P (symref)
4067 && (offset & 1) == 1)
4068 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4069 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4070
4071 /* Handle all the (mem (symref)) accesses we cannot use the z10
4072 instructions for. */
4073 if (MEM_P (x)
4074 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4075 && (mode == QImode
4076 || !reg_class_subset_p (rclass, GENERAL_REGS)
4077 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4078 || !s390_check_symref_alignment (XEXP (x, 0),
4079 GET_MODE_SIZE (mode))))
4080 {
4081 #define __SECONDARY_RELOAD_CASE(M,m) \
4082 case M##mode: \
4083 if (TARGET_64BIT) \
4084 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4085 CODE_FOR_reload##m##di_tomem_z10; \
4086 else \
4087 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4088 CODE_FOR_reload##m##si_tomem_z10; \
4089 break;
4090
4091 switch (GET_MODE (x))
4092 {
4093 __SECONDARY_RELOAD_CASE (QI, qi);
4094 __SECONDARY_RELOAD_CASE (HI, hi);
4095 __SECONDARY_RELOAD_CASE (SI, si);
4096 __SECONDARY_RELOAD_CASE (DI, di);
4097 __SECONDARY_RELOAD_CASE (TI, ti);
4098 __SECONDARY_RELOAD_CASE (SF, sf);
4099 __SECONDARY_RELOAD_CASE (DF, df);
4100 __SECONDARY_RELOAD_CASE (TF, tf);
4101 __SECONDARY_RELOAD_CASE (SD, sd);
4102 __SECONDARY_RELOAD_CASE (DD, dd);
4103 __SECONDARY_RELOAD_CASE (TD, td);
4104 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4105 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4106 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4107 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4108 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4109 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4110 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4111 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4112 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4113 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4114 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4115 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4116 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4117 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4118 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4119 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4120 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4121 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4122 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4123 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4124 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4125 default:
4126 gcc_unreachable ();
4127 }
4128 #undef __SECONDARY_RELOAD_CASE
4129 }
4130 }
4131
4132 /* We need a scratch register when loading a PLUS expression which
4133 is not a legitimate operand of the LOAD ADDRESS instruction. */
4134 /* LRA can deal with transformation of plus op very well -- so we
4135 don't need to prompt LRA in this case. */
4136 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4137 sri->icode = (TARGET_64BIT ?
4138 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4139
4140 /* Performing a multiword move from or to memory we have to make sure the
4141 second chunk in memory is addressable without causing a displacement
4142 overflow. If that would be the case we calculate the address in
4143 a scratch register. */
4144 if (MEM_P (x)
4145 && GET_CODE (XEXP (x, 0)) == PLUS
4146 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4147 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4148 + GET_MODE_SIZE (mode) - 1))
4149 {
4150 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4151 in a s_operand address since we may fallback to lm/stm. So we only
4152 have to care about overflows in the b+i+d case. */
4153 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4154 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4155 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4156 /* For FP_REGS no lm/stm is available so this check is triggered
4157 for displacement overflows in b+i+d and b+d like addresses. */
4158 || (reg_classes_intersect_p (FP_REGS, rclass)
4159 && s390_class_max_nregs (FP_REGS, mode) > 1))
4160 {
4161 if (in_p)
4162 sri->icode = (TARGET_64BIT ?
4163 CODE_FOR_reloaddi_la_in :
4164 CODE_FOR_reloadsi_la_in);
4165 else
4166 sri->icode = (TARGET_64BIT ?
4167 CODE_FOR_reloaddi_la_out :
4168 CODE_FOR_reloadsi_la_out);
4169 }
4170 }
4171
4172 /* A scratch address register is needed when a symbolic constant is
4173 copied to r0 compiling with -fPIC. In other cases the target
4174 register might be used as temporary (see legitimize_pic_address). */
4175 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4176 sri->icode = (TARGET_64BIT ?
4177 CODE_FOR_reloaddi_PIC_addr :
4178 CODE_FOR_reloadsi_PIC_addr);
4179
4180 /* Either scratch or no register needed. */
4181 return NO_REGS;
4182 }
4183
4184 /* Generate code to load SRC, which is PLUS that is not a
4185 legitimate operand for the LA instruction, into TARGET.
4186 SCRATCH may be used as scratch register. */
4187
4188 void
4189 s390_expand_plus_operand (rtx target, rtx src,
4190 rtx scratch)
4191 {
4192 rtx sum1, sum2;
4193 struct s390_address ad;
4194
4195 /* src must be a PLUS; get its two operands. */
4196 gcc_assert (GET_CODE (src) == PLUS);
4197 gcc_assert (GET_MODE (src) == Pmode);
4198
4199 /* Check if any of the two operands is already scheduled
4200 for replacement by reload. This can happen e.g. when
4201 float registers occur in an address. */
4202 sum1 = find_replacement (&XEXP (src, 0));
4203 sum2 = find_replacement (&XEXP (src, 1));
4204 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4205
4206 /* If the address is already strictly valid, there's nothing to do. */
4207 if (!s390_decompose_address (src, &ad)
4208 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4209 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4210 {
4211 /* Otherwise, one of the operands cannot be an address register;
4212 we reload its value into the scratch register. */
4213 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4214 {
4215 emit_move_insn (scratch, sum1);
4216 sum1 = scratch;
4217 }
4218 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4219 {
4220 emit_move_insn (scratch, sum2);
4221 sum2 = scratch;
4222 }
4223
4224 /* According to the way these invalid addresses are generated
4225 in reload.c, it should never happen (at least on s390) that
4226 *neither* of the PLUS components, after find_replacements
4227 was applied, is an address register. */
4228 if (sum1 == scratch && sum2 == scratch)
4229 {
4230 debug_rtx (src);
4231 gcc_unreachable ();
4232 }
4233
4234 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4235 }
4236
4237 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4238 is only ever performed on addresses, so we can mark the
4239 sum as legitimate for LA in any case. */
4240 s390_load_address (target, src);
4241 }
4242
4243
4244 /* Return true if ADDR is a valid memory address.
4245 STRICT specifies whether strict register checking applies. */
4246
4247 static bool
4248 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4249 {
4250 struct s390_address ad;
4251
4252 if (TARGET_Z10
4253 && larl_operand (addr, VOIDmode)
4254 && (mode == VOIDmode
4255 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4256 return true;
4257
4258 if (!s390_decompose_address (addr, &ad))
4259 return false;
4260
4261 if (strict)
4262 {
4263 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4264 return false;
4265
4266 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4267 return false;
4268 }
4269 else
4270 {
4271 if (ad.base
4272 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4273 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4274 return false;
4275
4276 if (ad.indx
4277 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4278 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4279 return false;
4280 }
4281 return true;
4282 }
4283
4284 /* Return true if OP is a valid operand for the LA instruction.
4285 In 31-bit, we need to prove that the result is used as an
4286 address, as LA performs only a 31-bit addition. */
4287
4288 bool
4289 legitimate_la_operand_p (rtx op)
4290 {
4291 struct s390_address addr;
4292 if (!s390_decompose_address (op, &addr))
4293 return false;
4294
4295 return (TARGET_64BIT || addr.pointer);
4296 }
4297
4298 /* Return true if it is valid *and* preferable to use LA to
4299 compute the sum of OP1 and OP2. */
4300
4301 bool
4302 preferred_la_operand_p (rtx op1, rtx op2)
4303 {
4304 struct s390_address addr;
4305
4306 if (op2 != const0_rtx)
4307 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4308
4309 if (!s390_decompose_address (op1, &addr))
4310 return false;
4311 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4312 return false;
4313 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4314 return false;
4315
4316 /* Avoid LA instructions with index register on z196; it is
4317 preferable to use regular add instructions when possible.
4318 Starting with zEC12 the la with index register is "uncracked"
4319 again. */
4320 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4321 return false;
4322
4323 if (!TARGET_64BIT && !addr.pointer)
4324 return false;
4325
4326 if (addr.pointer)
4327 return true;
4328
4329 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4330 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4331 return true;
4332
4333 return false;
4334 }
4335
4336 /* Emit a forced load-address operation to load SRC into DST.
4337 This will use the LOAD ADDRESS instruction even in situations
4338 where legitimate_la_operand_p (SRC) returns false. */
4339
4340 void
4341 s390_load_address (rtx dst, rtx src)
4342 {
4343 if (TARGET_64BIT)
4344 emit_move_insn (dst, src);
4345 else
4346 emit_insn (gen_force_la_31 (dst, src));
4347 }
4348
4349 /* Return a legitimate reference for ORIG (an address) using the
4350 register REG. If REG is 0, a new pseudo is generated.
4351
4352 There are two types of references that must be handled:
4353
4354 1. Global data references must load the address from the GOT, via
4355 the PIC reg. An insn is emitted to do this load, and the reg is
4356 returned.
4357
4358 2. Static data references, constant pool addresses, and code labels
4359 compute the address as an offset from the GOT, whose base is in
4360 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4361 differentiate them from global data objects. The returned
4362 address is the PIC reg + an unspec constant.
4363
4364 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4365 reg also appears in the address. */
4366
4367 rtx
4368 legitimize_pic_address (rtx orig, rtx reg)
4369 {
4370 rtx addr = orig;
4371 rtx addend = const0_rtx;
4372 rtx new_rtx = orig;
4373
4374 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4375
4376 if (GET_CODE (addr) == CONST)
4377 addr = XEXP (addr, 0);
4378
4379 if (GET_CODE (addr) == PLUS)
4380 {
4381 addend = XEXP (addr, 1);
4382 addr = XEXP (addr, 0);
4383 }
4384
4385 if ((GET_CODE (addr) == LABEL_REF
4386 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4387 || (GET_CODE (addr) == UNSPEC &&
4388 (XINT (addr, 1) == UNSPEC_GOTENT
4389 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4390 && GET_CODE (addend) == CONST_INT)
4391 {
4392 /* This can be locally addressed. */
4393
4394 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4395 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4396 gen_rtx_CONST (Pmode, addr) : addr);
4397
4398 if (TARGET_CPU_ZARCH
4399 && larl_operand (const_addr, VOIDmode)
4400 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4401 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4402 {
4403 if (INTVAL (addend) & 1)
4404 {
4405 /* LARL can't handle odd offsets, so emit a pair of LARL
4406 and LA. */
4407 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4408
4409 if (!DISP_IN_RANGE (INTVAL (addend)))
4410 {
4411 HOST_WIDE_INT even = INTVAL (addend) - 1;
4412 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4413 addr = gen_rtx_CONST (Pmode, addr);
4414 addend = const1_rtx;
4415 }
4416
4417 emit_move_insn (temp, addr);
4418 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4419
4420 if (reg != 0)
4421 {
4422 s390_load_address (reg, new_rtx);
4423 new_rtx = reg;
4424 }
4425 }
4426 else
4427 {
4428 /* If the offset is even, we can just use LARL. This
4429 will happen automatically. */
4430 }
4431 }
4432 else
4433 {
4434 /* No larl - Access local symbols relative to the GOT. */
4435
4436 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4437
4438 if (reload_in_progress || reload_completed)
4439 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4440
4441 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4442 if (addend != const0_rtx)
4443 addr = gen_rtx_PLUS (Pmode, addr, addend);
4444 addr = gen_rtx_CONST (Pmode, addr);
4445 addr = force_const_mem (Pmode, addr);
4446 emit_move_insn (temp, addr);
4447
4448 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4449 if (reg != 0)
4450 {
4451 s390_load_address (reg, new_rtx);
4452 new_rtx = reg;
4453 }
4454 }
4455 }
4456 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4457 {
4458 /* A non-local symbol reference without addend.
4459
4460 The symbol ref is wrapped into an UNSPEC to make sure the
4461 proper operand modifier (@GOT or @GOTENT) will be emitted.
4462 This will tell the linker to put the symbol into the GOT.
4463
4464 Additionally the code dereferencing the GOT slot is emitted here.
4465
4466 An addend to the symref needs to be added afterwards.
4467 legitimize_pic_address calls itself recursively to handle
4468 that case. So no need to do it here. */
4469
4470 if (reg == 0)
4471 reg = gen_reg_rtx (Pmode);
4472
4473 if (TARGET_Z10)
4474 {
4475 /* Use load relative if possible.
4476 lgrl <target>, sym@GOTENT */
4477 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4478 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4479 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4480
4481 emit_move_insn (reg, new_rtx);
4482 new_rtx = reg;
4483 }
4484 else if (flag_pic == 1)
4485 {
4486 /* Assume GOT offset is a valid displacement operand (< 4k
4487 or < 512k with z990). This is handled the same way in
4488 both 31- and 64-bit code (@GOT).
4489 lg <target>, sym@GOT(r12) */
4490
4491 if (reload_in_progress || reload_completed)
4492 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4493
4494 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4495 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4496 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4497 new_rtx = gen_const_mem (Pmode, new_rtx);
4498 emit_move_insn (reg, new_rtx);
4499 new_rtx = reg;
4500 }
4501 else if (TARGET_CPU_ZARCH)
4502 {
4503 /* If the GOT offset might be >= 4k, we determine the position
4504 of the GOT entry via a PC-relative LARL (@GOTENT).
4505 larl temp, sym@GOTENT
4506 lg <target>, 0(temp) */
4507
4508 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4509
4510 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4511 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4512
4513 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4514 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4515 emit_move_insn (temp, new_rtx);
4516
4517 new_rtx = gen_const_mem (Pmode, temp);
4518 emit_move_insn (reg, new_rtx);
4519
4520 new_rtx = reg;
4521 }
4522 else
4523 {
4524 /* If the GOT offset might be >= 4k, we have to load it
4525 from the literal pool (@GOT).
4526
4527 lg temp, lit-litbase(r13)
4528 lg <target>, 0(temp)
4529 lit: .long sym@GOT */
4530
4531 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4532
4533 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4534 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4535
4536 if (reload_in_progress || reload_completed)
4537 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4538
4539 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4540 addr = gen_rtx_CONST (Pmode, addr);
4541 addr = force_const_mem (Pmode, addr);
4542 emit_move_insn (temp, addr);
4543
4544 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4545 new_rtx = gen_const_mem (Pmode, new_rtx);
4546 emit_move_insn (reg, new_rtx);
4547 new_rtx = reg;
4548 }
4549 }
4550 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4551 {
4552 gcc_assert (XVECLEN (addr, 0) == 1);
4553 switch (XINT (addr, 1))
4554 {
4555 /* These address symbols (or PLT slots) relative to the GOT
4556 (not GOT slots!). In general this will exceed the
4557 displacement range so these value belong into the literal
4558 pool. */
4559 case UNSPEC_GOTOFF:
4560 case UNSPEC_PLTOFF:
4561 new_rtx = force_const_mem (Pmode, orig);
4562 break;
4563
4564 /* For -fPIC the GOT size might exceed the displacement
4565 range so make sure the value is in the literal pool. */
4566 case UNSPEC_GOT:
4567 if (flag_pic == 2)
4568 new_rtx = force_const_mem (Pmode, orig);
4569 break;
4570
4571 /* For @GOTENT larl is used. This is handled like local
4572 symbol refs. */
4573 case UNSPEC_GOTENT:
4574 gcc_unreachable ();
4575 break;
4576
4577 /* @PLT is OK as is on 64-bit, must be converted to
4578 GOT-relative @PLTOFF on 31-bit. */
4579 case UNSPEC_PLT:
4580 if (!TARGET_CPU_ZARCH)
4581 {
4582 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4583
4584 if (reload_in_progress || reload_completed)
4585 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4586
4587 addr = XVECEXP (addr, 0, 0);
4588 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4589 UNSPEC_PLTOFF);
4590 if (addend != const0_rtx)
4591 addr = gen_rtx_PLUS (Pmode, addr, addend);
4592 addr = gen_rtx_CONST (Pmode, addr);
4593 addr = force_const_mem (Pmode, addr);
4594 emit_move_insn (temp, addr);
4595
4596 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4597 if (reg != 0)
4598 {
4599 s390_load_address (reg, new_rtx);
4600 new_rtx = reg;
4601 }
4602 }
4603 else
4604 /* On 64 bit larl can be used. This case is handled like
4605 local symbol refs. */
4606 gcc_unreachable ();
4607 break;
4608
4609 /* Everything else cannot happen. */
4610 default:
4611 gcc_unreachable ();
4612 }
4613 }
4614 else if (addend != const0_rtx)
4615 {
4616 /* Otherwise, compute the sum. */
4617
4618 rtx base = legitimize_pic_address (addr, reg);
4619 new_rtx = legitimize_pic_address (addend,
4620 base == reg ? NULL_RTX : reg);
4621 if (GET_CODE (new_rtx) == CONST_INT)
4622 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4623 else
4624 {
4625 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4626 {
4627 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4628 new_rtx = XEXP (new_rtx, 1);
4629 }
4630 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4631 }
4632
4633 if (GET_CODE (new_rtx) == CONST)
4634 new_rtx = XEXP (new_rtx, 0);
4635 new_rtx = force_operand (new_rtx, 0);
4636 }
4637
4638 return new_rtx;
4639 }
4640
4641 /* Load the thread pointer into a register. */
4642
4643 rtx
4644 s390_get_thread_pointer (void)
4645 {
4646 rtx tp = gen_reg_rtx (Pmode);
4647
4648 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4649 mark_reg_pointer (tp, BITS_PER_WORD);
4650
4651 return tp;
4652 }
4653
4654 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4655 in s390_tls_symbol which always refers to __tls_get_offset.
4656 The returned offset is written to RESULT_REG and an USE rtx is
4657 generated for TLS_CALL. */
4658
4659 static GTY(()) rtx s390_tls_symbol;
4660
4661 static void
4662 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4663 {
4664 rtx insn;
4665
4666 if (!flag_pic)
4667 emit_insn (s390_load_got ());
4668
4669 if (!s390_tls_symbol)
4670 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4671
4672 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4673 gen_rtx_REG (Pmode, RETURN_REGNUM));
4674
4675 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4676 RTL_CONST_CALL_P (insn) = 1;
4677 }
4678
4679 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4680 this (thread-local) address. REG may be used as temporary. */
4681
4682 static rtx
4683 legitimize_tls_address (rtx addr, rtx reg)
4684 {
4685 rtx new_rtx, tls_call, temp, base, r2, insn;
4686
4687 if (GET_CODE (addr) == SYMBOL_REF)
4688 switch (tls_symbolic_operand (addr))
4689 {
4690 case TLS_MODEL_GLOBAL_DYNAMIC:
4691 start_sequence ();
4692 r2 = gen_rtx_REG (Pmode, 2);
4693 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4694 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4695 new_rtx = force_const_mem (Pmode, new_rtx);
4696 emit_move_insn (r2, new_rtx);
4697 s390_emit_tls_call_insn (r2, tls_call);
4698 insn = get_insns ();
4699 end_sequence ();
4700
4701 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4702 temp = gen_reg_rtx (Pmode);
4703 emit_libcall_block (insn, temp, r2, new_rtx);
4704
4705 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4706 if (reg != 0)
4707 {
4708 s390_load_address (reg, new_rtx);
4709 new_rtx = reg;
4710 }
4711 break;
4712
4713 case TLS_MODEL_LOCAL_DYNAMIC:
4714 start_sequence ();
4715 r2 = gen_rtx_REG (Pmode, 2);
4716 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4717 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4718 new_rtx = force_const_mem (Pmode, new_rtx);
4719 emit_move_insn (r2, new_rtx);
4720 s390_emit_tls_call_insn (r2, tls_call);
4721 insn = get_insns ();
4722 end_sequence ();
4723
4724 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4725 temp = gen_reg_rtx (Pmode);
4726 emit_libcall_block (insn, temp, r2, new_rtx);
4727
4728 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4729 base = gen_reg_rtx (Pmode);
4730 s390_load_address (base, new_rtx);
4731
4732 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4733 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4734 new_rtx = force_const_mem (Pmode, new_rtx);
4735 temp = gen_reg_rtx (Pmode);
4736 emit_move_insn (temp, new_rtx);
4737
4738 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4739 if (reg != 0)
4740 {
4741 s390_load_address (reg, new_rtx);
4742 new_rtx = reg;
4743 }
4744 break;
4745
4746 case TLS_MODEL_INITIAL_EXEC:
4747 if (flag_pic == 1)
4748 {
4749 /* Assume GOT offset < 4k. This is handled the same way
4750 in both 31- and 64-bit code. */
4751
4752 if (reload_in_progress || reload_completed)
4753 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4754
4755 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4756 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4757 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4758 new_rtx = gen_const_mem (Pmode, new_rtx);
4759 temp = gen_reg_rtx (Pmode);
4760 emit_move_insn (temp, new_rtx);
4761 }
4762 else if (TARGET_CPU_ZARCH)
4763 {
4764 /* If the GOT offset might be >= 4k, we determine the position
4765 of the GOT entry via a PC-relative LARL. */
4766
4767 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4768 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4769 temp = gen_reg_rtx (Pmode);
4770 emit_move_insn (temp, new_rtx);
4771
4772 new_rtx = gen_const_mem (Pmode, temp);
4773 temp = gen_reg_rtx (Pmode);
4774 emit_move_insn (temp, new_rtx);
4775 }
4776 else if (flag_pic)
4777 {
4778 /* If the GOT offset might be >= 4k, we have to load it
4779 from the literal pool. */
4780
4781 if (reload_in_progress || reload_completed)
4782 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4783
4784 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4785 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4786 new_rtx = force_const_mem (Pmode, new_rtx);
4787 temp = gen_reg_rtx (Pmode);
4788 emit_move_insn (temp, new_rtx);
4789
4790 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4791 new_rtx = gen_const_mem (Pmode, new_rtx);
4792
4793 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4794 temp = gen_reg_rtx (Pmode);
4795 emit_insn (gen_rtx_SET (temp, new_rtx));
4796 }
4797 else
4798 {
4799 /* In position-dependent code, load the absolute address of
4800 the GOT entry from the literal pool. */
4801
4802 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4803 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4804 new_rtx = force_const_mem (Pmode, new_rtx);
4805 temp = gen_reg_rtx (Pmode);
4806 emit_move_insn (temp, new_rtx);
4807
4808 new_rtx = temp;
4809 new_rtx = gen_const_mem (Pmode, new_rtx);
4810 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4811 temp = gen_reg_rtx (Pmode);
4812 emit_insn (gen_rtx_SET (temp, new_rtx));
4813 }
4814
4815 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4816 if (reg != 0)
4817 {
4818 s390_load_address (reg, new_rtx);
4819 new_rtx = reg;
4820 }
4821 break;
4822
4823 case TLS_MODEL_LOCAL_EXEC:
4824 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4825 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4826 new_rtx = force_const_mem (Pmode, new_rtx);
4827 temp = gen_reg_rtx (Pmode);
4828 emit_move_insn (temp, new_rtx);
4829
4830 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4831 if (reg != 0)
4832 {
4833 s390_load_address (reg, new_rtx);
4834 new_rtx = reg;
4835 }
4836 break;
4837
4838 default:
4839 gcc_unreachable ();
4840 }
4841
4842 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4843 {
4844 switch (XINT (XEXP (addr, 0), 1))
4845 {
4846 case UNSPEC_INDNTPOFF:
4847 gcc_assert (TARGET_CPU_ZARCH);
4848 new_rtx = addr;
4849 break;
4850
4851 default:
4852 gcc_unreachable ();
4853 }
4854 }
4855
4856 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4857 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4858 {
4859 new_rtx = XEXP (XEXP (addr, 0), 0);
4860 if (GET_CODE (new_rtx) != SYMBOL_REF)
4861 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4862
4863 new_rtx = legitimize_tls_address (new_rtx, reg);
4864 new_rtx = plus_constant (Pmode, new_rtx,
4865 INTVAL (XEXP (XEXP (addr, 0), 1)));
4866 new_rtx = force_operand (new_rtx, 0);
4867 }
4868
4869 else
4870 gcc_unreachable (); /* for now ... */
4871
4872 return new_rtx;
4873 }
4874
4875 /* Emit insns making the address in operands[1] valid for a standard
4876 move to operands[0]. operands[1] is replaced by an address which
4877 should be used instead of the former RTX to emit the move
4878 pattern. */
4879
4880 void
4881 emit_symbolic_move (rtx *operands)
4882 {
4883 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4884
4885 if (GET_CODE (operands[0]) == MEM)
4886 operands[1] = force_reg (Pmode, operands[1]);
4887 else if (TLS_SYMBOLIC_CONST (operands[1]))
4888 operands[1] = legitimize_tls_address (operands[1], temp);
4889 else if (flag_pic)
4890 operands[1] = legitimize_pic_address (operands[1], temp);
4891 }
4892
4893 /* Try machine-dependent ways of modifying an illegitimate address X
4894 to be legitimate. If we find one, return the new, valid address.
4895
4896 OLDX is the address as it was before break_out_memory_refs was called.
4897 In some cases it is useful to look at this to decide what needs to be done.
4898
4899 MODE is the mode of the operand pointed to by X.
4900
4901 When -fpic is used, special handling is needed for symbolic references.
4902 See comments by legitimize_pic_address for details. */
4903
4904 static rtx
4905 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4906 machine_mode mode ATTRIBUTE_UNUSED)
4907 {
4908 rtx constant_term = const0_rtx;
4909
4910 if (TLS_SYMBOLIC_CONST (x))
4911 {
4912 x = legitimize_tls_address (x, 0);
4913
4914 if (s390_legitimate_address_p (mode, x, FALSE))
4915 return x;
4916 }
4917 else if (GET_CODE (x) == PLUS
4918 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4919 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4920 {
4921 return x;
4922 }
4923 else if (flag_pic)
4924 {
4925 if (SYMBOLIC_CONST (x)
4926 || (GET_CODE (x) == PLUS
4927 && (SYMBOLIC_CONST (XEXP (x, 0))
4928 || SYMBOLIC_CONST (XEXP (x, 1)))))
4929 x = legitimize_pic_address (x, 0);
4930
4931 if (s390_legitimate_address_p (mode, x, FALSE))
4932 return x;
4933 }
4934
4935 x = eliminate_constant_term (x, &constant_term);
4936
4937 /* Optimize loading of large displacements by splitting them
4938 into the multiple of 4K and the rest; this allows the
4939 former to be CSE'd if possible.
4940
4941 Don't do this if the displacement is added to a register
4942 pointing into the stack frame, as the offsets will
4943 change later anyway. */
4944
4945 if (GET_CODE (constant_term) == CONST_INT
4946 && !TARGET_LONG_DISPLACEMENT
4947 && !DISP_IN_RANGE (INTVAL (constant_term))
4948 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4949 {
4950 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4951 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4952
4953 rtx temp = gen_reg_rtx (Pmode);
4954 rtx val = force_operand (GEN_INT (upper), temp);
4955 if (val != temp)
4956 emit_move_insn (temp, val);
4957
4958 x = gen_rtx_PLUS (Pmode, x, temp);
4959 constant_term = GEN_INT (lower);
4960 }
4961
4962 if (GET_CODE (x) == PLUS)
4963 {
4964 if (GET_CODE (XEXP (x, 0)) == REG)
4965 {
4966 rtx temp = gen_reg_rtx (Pmode);
4967 rtx val = force_operand (XEXP (x, 1), temp);
4968 if (val != temp)
4969 emit_move_insn (temp, val);
4970
4971 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4972 }
4973
4974 else if (GET_CODE (XEXP (x, 1)) == REG)
4975 {
4976 rtx temp = gen_reg_rtx (Pmode);
4977 rtx val = force_operand (XEXP (x, 0), temp);
4978 if (val != temp)
4979 emit_move_insn (temp, val);
4980
4981 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4982 }
4983 }
4984
4985 if (constant_term != const0_rtx)
4986 x = gen_rtx_PLUS (Pmode, x, constant_term);
4987
4988 return x;
4989 }
4990
4991 /* Try a machine-dependent way of reloading an illegitimate address AD
4992 operand. If we find one, push the reload and return the new address.
4993
4994 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4995 and TYPE is the reload type of the current reload. */
4996
4997 rtx
4998 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
4999 int opnum, int type)
5000 {
5001 if (!optimize || TARGET_LONG_DISPLACEMENT)
5002 return NULL_RTX;
5003
5004 if (GET_CODE (ad) == PLUS)
5005 {
5006 rtx tem = simplify_binary_operation (PLUS, Pmode,
5007 XEXP (ad, 0), XEXP (ad, 1));
5008 if (tem)
5009 ad = tem;
5010 }
5011
5012 if (GET_CODE (ad) == PLUS
5013 && GET_CODE (XEXP (ad, 0)) == REG
5014 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5015 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5016 {
5017 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5018 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5019 rtx cst, tem, new_rtx;
5020
5021 cst = GEN_INT (upper);
5022 if (!legitimate_reload_constant_p (cst))
5023 cst = force_const_mem (Pmode, cst);
5024
5025 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5026 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5027
5028 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5029 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5030 opnum, (enum reload_type) type);
5031 return new_rtx;
5032 }
5033
5034 return NULL_RTX;
5035 }
5036
5037 /* Emit code to move LEN bytes from DST to SRC. */
5038
5039 bool
5040 s390_expand_movmem (rtx dst, rtx src, rtx len)
5041 {
5042 /* When tuning for z10 or higher we rely on the Glibc functions to
5043 do the right thing. Only for constant lengths below 64k we will
5044 generate inline code. */
5045 if (s390_tune >= PROCESSOR_2097_Z10
5046 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5047 return false;
5048
5049 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5050 {
5051 if (INTVAL (len) > 0)
5052 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5053 }
5054
5055 else if (TARGET_MVCLE)
5056 {
5057 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5058 }
5059
5060 else
5061 {
5062 rtx dst_addr, src_addr, count, blocks, temp;
5063 rtx_code_label *loop_start_label = gen_label_rtx ();
5064 rtx_code_label *loop_end_label = gen_label_rtx ();
5065 rtx_code_label *end_label = gen_label_rtx ();
5066 machine_mode mode;
5067
5068 mode = GET_MODE (len);
5069 if (mode == VOIDmode)
5070 mode = Pmode;
5071
5072 dst_addr = gen_reg_rtx (Pmode);
5073 src_addr = gen_reg_rtx (Pmode);
5074 count = gen_reg_rtx (mode);
5075 blocks = gen_reg_rtx (mode);
5076
5077 convert_move (count, len, 1);
5078 emit_cmp_and_jump_insns (count, const0_rtx,
5079 EQ, NULL_RTX, mode, 1, end_label);
5080
5081 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5082 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5083 dst = change_address (dst, VOIDmode, dst_addr);
5084 src = change_address (src, VOIDmode, src_addr);
5085
5086 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5087 OPTAB_DIRECT);
5088 if (temp != count)
5089 emit_move_insn (count, temp);
5090
5091 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5092 OPTAB_DIRECT);
5093 if (temp != blocks)
5094 emit_move_insn (blocks, temp);
5095
5096 emit_cmp_and_jump_insns (blocks, const0_rtx,
5097 EQ, NULL_RTX, mode, 1, loop_end_label);
5098
5099 emit_label (loop_start_label);
5100
5101 if (TARGET_Z10
5102 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5103 {
5104 rtx prefetch;
5105
5106 /* Issue a read prefetch for the +3 cache line. */
5107 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5108 const0_rtx, const0_rtx);
5109 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5110 emit_insn (prefetch);
5111
5112 /* Issue a write prefetch for the +3 cache line. */
5113 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5114 const1_rtx, const0_rtx);
5115 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5116 emit_insn (prefetch);
5117 }
5118
5119 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5120 s390_load_address (dst_addr,
5121 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5122 s390_load_address (src_addr,
5123 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5124
5125 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5126 OPTAB_DIRECT);
5127 if (temp != blocks)
5128 emit_move_insn (blocks, temp);
5129
5130 emit_cmp_and_jump_insns (blocks, const0_rtx,
5131 EQ, NULL_RTX, mode, 1, loop_end_label);
5132
5133 emit_jump (loop_start_label);
5134 emit_label (loop_end_label);
5135
5136 emit_insn (gen_movmem_short (dst, src,
5137 convert_to_mode (Pmode, count, 1)));
5138 emit_label (end_label);
5139 }
5140 return true;
5141 }
5142
5143 /* Emit code to set LEN bytes at DST to VAL.
5144 Make use of clrmem if VAL is zero. */
5145
5146 void
5147 s390_expand_setmem (rtx dst, rtx len, rtx val)
5148 {
5149 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5150 return;
5151
5152 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5153
5154 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5155 {
5156 if (val == const0_rtx && INTVAL (len) <= 256)
5157 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5158 else
5159 {
5160 /* Initialize memory by storing the first byte. */
5161 emit_move_insn (adjust_address (dst, QImode, 0), val);
5162
5163 if (INTVAL (len) > 1)
5164 {
5165 /* Initiate 1 byte overlap move.
5166 The first byte of DST is propagated through DSTP1.
5167 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5168 DST is set to size 1 so the rest of the memory location
5169 does not count as source operand. */
5170 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5171 set_mem_size (dst, 1);
5172
5173 emit_insn (gen_movmem_short (dstp1, dst,
5174 GEN_INT (INTVAL (len) - 2)));
5175 }
5176 }
5177 }
5178
5179 else if (TARGET_MVCLE)
5180 {
5181 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5182 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
5183 }
5184
5185 else
5186 {
5187 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5188 rtx_code_label *loop_start_label = gen_label_rtx ();
5189 rtx_code_label *loop_end_label = gen_label_rtx ();
5190 rtx_code_label *end_label = gen_label_rtx ();
5191 machine_mode mode;
5192
5193 mode = GET_MODE (len);
5194 if (mode == VOIDmode)
5195 mode = Pmode;
5196
5197 dst_addr = gen_reg_rtx (Pmode);
5198 count = gen_reg_rtx (mode);
5199 blocks = gen_reg_rtx (mode);
5200
5201 convert_move (count, len, 1);
5202 emit_cmp_and_jump_insns (count, const0_rtx,
5203 EQ, NULL_RTX, mode, 1, end_label);
5204
5205 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5206 dst = change_address (dst, VOIDmode, dst_addr);
5207
5208 if (val == const0_rtx)
5209 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5210 OPTAB_DIRECT);
5211 else
5212 {
5213 dstp1 = adjust_address (dst, VOIDmode, 1);
5214 set_mem_size (dst, 1);
5215
5216 /* Initialize memory by storing the first byte. */
5217 emit_move_insn (adjust_address (dst, QImode, 0), val);
5218
5219 /* If count is 1 we are done. */
5220 emit_cmp_and_jump_insns (count, const1_rtx,
5221 EQ, NULL_RTX, mode, 1, end_label);
5222
5223 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5224 OPTAB_DIRECT);
5225 }
5226 if (temp != count)
5227 emit_move_insn (count, temp);
5228
5229 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5230 OPTAB_DIRECT);
5231 if (temp != blocks)
5232 emit_move_insn (blocks, temp);
5233
5234 emit_cmp_and_jump_insns (blocks, const0_rtx,
5235 EQ, NULL_RTX, mode, 1, loop_end_label);
5236
5237 emit_label (loop_start_label);
5238
5239 if (TARGET_Z10
5240 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5241 {
5242 /* Issue a write prefetch for the +4 cache line. */
5243 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5244 GEN_INT (1024)),
5245 const1_rtx, const0_rtx);
5246 emit_insn (prefetch);
5247 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5248 }
5249
5250 if (val == const0_rtx)
5251 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5252 else
5253 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5254 s390_load_address (dst_addr,
5255 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5256
5257 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5258 OPTAB_DIRECT);
5259 if (temp != blocks)
5260 emit_move_insn (blocks, temp);
5261
5262 emit_cmp_and_jump_insns (blocks, const0_rtx,
5263 EQ, NULL_RTX, mode, 1, loop_end_label);
5264
5265 emit_jump (loop_start_label);
5266 emit_label (loop_end_label);
5267
5268 if (val == const0_rtx)
5269 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5270 else
5271 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5272 emit_label (end_label);
5273 }
5274 }
5275
5276 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5277 and return the result in TARGET. */
5278
5279 bool
5280 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5281 {
5282 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5283 rtx tmp;
5284
5285 /* When tuning for z10 or higher we rely on the Glibc functions to
5286 do the right thing. Only for constant lengths below 64k we will
5287 generate inline code. */
5288 if (s390_tune >= PROCESSOR_2097_Z10
5289 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5290 return false;
5291
5292 /* As the result of CMPINT is inverted compared to what we need,
5293 we have to swap the operands. */
5294 tmp = op0; op0 = op1; op1 = tmp;
5295
5296 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5297 {
5298 if (INTVAL (len) > 0)
5299 {
5300 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5301 emit_insn (gen_cmpint (target, ccreg));
5302 }
5303 else
5304 emit_move_insn (target, const0_rtx);
5305 }
5306 else if (TARGET_MVCLE)
5307 {
5308 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5309 emit_insn (gen_cmpint (target, ccreg));
5310 }
5311 else
5312 {
5313 rtx addr0, addr1, count, blocks, temp;
5314 rtx_code_label *loop_start_label = gen_label_rtx ();
5315 rtx_code_label *loop_end_label = gen_label_rtx ();
5316 rtx_code_label *end_label = gen_label_rtx ();
5317 machine_mode mode;
5318
5319 mode = GET_MODE (len);
5320 if (mode == VOIDmode)
5321 mode = Pmode;
5322
5323 addr0 = gen_reg_rtx (Pmode);
5324 addr1 = gen_reg_rtx (Pmode);
5325 count = gen_reg_rtx (mode);
5326 blocks = gen_reg_rtx (mode);
5327
5328 convert_move (count, len, 1);
5329 emit_cmp_and_jump_insns (count, const0_rtx,
5330 EQ, NULL_RTX, mode, 1, end_label);
5331
5332 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5333 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5334 op0 = change_address (op0, VOIDmode, addr0);
5335 op1 = change_address (op1, VOIDmode, addr1);
5336
5337 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5338 OPTAB_DIRECT);
5339 if (temp != count)
5340 emit_move_insn (count, temp);
5341
5342 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5343 OPTAB_DIRECT);
5344 if (temp != blocks)
5345 emit_move_insn (blocks, temp);
5346
5347 emit_cmp_and_jump_insns (blocks, const0_rtx,
5348 EQ, NULL_RTX, mode, 1, loop_end_label);
5349
5350 emit_label (loop_start_label);
5351
5352 if (TARGET_Z10
5353 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5354 {
5355 rtx prefetch;
5356
5357 /* Issue a read prefetch for the +2 cache line of operand 1. */
5358 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5359 const0_rtx, const0_rtx);
5360 emit_insn (prefetch);
5361 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5362
5363 /* Issue a read prefetch for the +2 cache line of operand 2. */
5364 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5365 const0_rtx, const0_rtx);
5366 emit_insn (prefetch);
5367 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5368 }
5369
5370 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5371 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5372 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5373 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5374 temp = gen_rtx_SET (pc_rtx, temp);
5375 emit_jump_insn (temp);
5376
5377 s390_load_address (addr0,
5378 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5379 s390_load_address (addr1,
5380 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5381
5382 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5383 OPTAB_DIRECT);
5384 if (temp != blocks)
5385 emit_move_insn (blocks, temp);
5386
5387 emit_cmp_and_jump_insns (blocks, const0_rtx,
5388 EQ, NULL_RTX, mode, 1, loop_end_label);
5389
5390 emit_jump (loop_start_label);
5391 emit_label (loop_end_label);
5392
5393 emit_insn (gen_cmpmem_short (op0, op1,
5394 convert_to_mode (Pmode, count, 1)));
5395 emit_label (end_label);
5396
5397 emit_insn (gen_cmpint (target, ccreg));
5398 }
5399 return true;
5400 }
5401
5402 /* Emit a conditional jump to LABEL for condition code mask MASK using
5403 comparsion operator COMPARISON. Return the emitted jump insn. */
5404
5405 static rtx
5406 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5407 {
5408 rtx temp;
5409
5410 gcc_assert (comparison == EQ || comparison == NE);
5411 gcc_assert (mask > 0 && mask < 15);
5412
5413 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5414 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5415 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5416 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5417 temp = gen_rtx_SET (pc_rtx, temp);
5418 return emit_jump_insn (temp);
5419 }
5420
5421 /* Emit the instructions to implement strlen of STRING and store the
5422 result in TARGET. The string has the known ALIGNMENT. This
5423 version uses vector instructions and is therefore not appropriate
5424 for targets prior to z13. */
5425
5426 void
5427 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5428 {
5429 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5430 int very_likely = REG_BR_PROB_BASE - 1;
5431 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5432 rtx str_reg = gen_reg_rtx (V16QImode);
5433 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5434 rtx str_idx_reg = gen_reg_rtx (Pmode);
5435 rtx result_reg = gen_reg_rtx (V16QImode);
5436 rtx is_aligned_label = gen_label_rtx ();
5437 rtx into_loop_label = NULL_RTX;
5438 rtx loop_start_label = gen_label_rtx ();
5439 rtx temp;
5440 rtx len = gen_reg_rtx (QImode);
5441 rtx cond;
5442
5443 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5444 emit_move_insn (str_idx_reg, const0_rtx);
5445
5446 if (INTVAL (alignment) < 16)
5447 {
5448 /* Check whether the address happens to be aligned properly so
5449 jump directly to the aligned loop. */
5450 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5451 str_addr_base_reg, GEN_INT (15)),
5452 const0_rtx, EQ, NULL_RTX,
5453 Pmode, 1, is_aligned_label);
5454
5455 temp = gen_reg_rtx (Pmode);
5456 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5457 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5458 gcc_assert (REG_P (temp));
5459 highest_index_to_load_reg =
5460 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5461 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5462 gcc_assert (REG_P (highest_index_to_load_reg));
5463 emit_insn (gen_vllv16qi (str_reg,
5464 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5465 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5466
5467 into_loop_label = gen_label_rtx ();
5468 s390_emit_jump (into_loop_label, NULL_RTX);
5469 emit_barrier ();
5470 }
5471
5472 emit_label (is_aligned_label);
5473 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5474
5475 /* Reaching this point we are only performing 16 bytes aligned
5476 loads. */
5477 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5478
5479 emit_label (loop_start_label);
5480 LABEL_NUSES (loop_start_label) = 1;
5481
5482 /* Load 16 bytes of the string into VR. */
5483 emit_move_insn (str_reg,
5484 gen_rtx_MEM (V16QImode,
5485 gen_rtx_PLUS (Pmode, str_idx_reg,
5486 str_addr_base_reg)));
5487 if (into_loop_label != NULL_RTX)
5488 {
5489 emit_label (into_loop_label);
5490 LABEL_NUSES (into_loop_label) = 1;
5491 }
5492
5493 /* Increment string index by 16 bytes. */
5494 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5495 str_idx_reg, 1, OPTAB_DIRECT);
5496
5497 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5498 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5499
5500 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5501 REG_BR_PROB, very_likely);
5502 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5503
5504 /* If the string pointer wasn't aligned we have loaded less then 16
5505 bytes and the remaining bytes got filled with zeros (by vll).
5506 Now we have to check whether the resulting index lies within the
5507 bytes actually part of the string. */
5508
5509 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5510 highest_index_to_load_reg);
5511 s390_load_address (highest_index_to_load_reg,
5512 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5513 const1_rtx));
5514 if (TARGET_64BIT)
5515 emit_insn (gen_movdicc (str_idx_reg, cond,
5516 highest_index_to_load_reg, str_idx_reg));
5517 else
5518 emit_insn (gen_movsicc (str_idx_reg, cond,
5519 highest_index_to_load_reg, str_idx_reg));
5520
5521 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5522 very_unlikely);
5523
5524 expand_binop (Pmode, add_optab, str_idx_reg,
5525 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5526 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5527 here. */
5528 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5529 convert_to_mode (Pmode, len, 1),
5530 target, 1, OPTAB_DIRECT);
5531 if (temp != target)
5532 emit_move_insn (target, temp);
5533 }
5534
5535 /* Expand conditional increment or decrement using alc/slb instructions.
5536 Should generate code setting DST to either SRC or SRC + INCREMENT,
5537 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5538 Returns true if successful, false otherwise.
5539
5540 That makes it possible to implement some if-constructs without jumps e.g.:
5541 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5542 unsigned int a, b, c;
5543 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5544 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5545 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5546 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5547
5548 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5549 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5550 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5551 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5552 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5553
5554 bool
5555 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5556 rtx dst, rtx src, rtx increment)
5557 {
5558 machine_mode cmp_mode;
5559 machine_mode cc_mode;
5560 rtx op_res;
5561 rtx insn;
5562 rtvec p;
5563 int ret;
5564
5565 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5566 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5567 cmp_mode = SImode;
5568 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5569 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5570 cmp_mode = DImode;
5571 else
5572 return false;
5573
5574 /* Try ADD LOGICAL WITH CARRY. */
5575 if (increment == const1_rtx)
5576 {
5577 /* Determine CC mode to use. */
5578 if (cmp_code == EQ || cmp_code == NE)
5579 {
5580 if (cmp_op1 != const0_rtx)
5581 {
5582 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5583 NULL_RTX, 0, OPTAB_WIDEN);
5584 cmp_op1 = const0_rtx;
5585 }
5586
5587 cmp_code = cmp_code == EQ ? LEU : GTU;
5588 }
5589
5590 if (cmp_code == LTU || cmp_code == LEU)
5591 {
5592 rtx tem = cmp_op0;
5593 cmp_op0 = cmp_op1;
5594 cmp_op1 = tem;
5595 cmp_code = swap_condition (cmp_code);
5596 }
5597
5598 switch (cmp_code)
5599 {
5600 case GTU:
5601 cc_mode = CCUmode;
5602 break;
5603
5604 case GEU:
5605 cc_mode = CCL3mode;
5606 break;
5607
5608 default:
5609 return false;
5610 }
5611
5612 /* Emit comparison instruction pattern. */
5613 if (!register_operand (cmp_op0, cmp_mode))
5614 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5615
5616 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5617 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5618 /* We use insn_invalid_p here to add clobbers if required. */
5619 ret = insn_invalid_p (emit_insn (insn), false);
5620 gcc_assert (!ret);
5621
5622 /* Emit ALC instruction pattern. */
5623 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5624 gen_rtx_REG (cc_mode, CC_REGNUM),
5625 const0_rtx);
5626
5627 if (src != const0_rtx)
5628 {
5629 if (!register_operand (src, GET_MODE (dst)))
5630 src = force_reg (GET_MODE (dst), src);
5631
5632 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5633 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5634 }
5635
5636 p = rtvec_alloc (2);
5637 RTVEC_ELT (p, 0) =
5638 gen_rtx_SET (dst, op_res);
5639 RTVEC_ELT (p, 1) =
5640 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5641 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5642
5643 return true;
5644 }
5645
5646 /* Try SUBTRACT LOGICAL WITH BORROW. */
5647 if (increment == constm1_rtx)
5648 {
5649 /* Determine CC mode to use. */
5650 if (cmp_code == EQ || cmp_code == NE)
5651 {
5652 if (cmp_op1 != const0_rtx)
5653 {
5654 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5655 NULL_RTX, 0, OPTAB_WIDEN);
5656 cmp_op1 = const0_rtx;
5657 }
5658
5659 cmp_code = cmp_code == EQ ? LEU : GTU;
5660 }
5661
5662 if (cmp_code == GTU || cmp_code == GEU)
5663 {
5664 rtx tem = cmp_op0;
5665 cmp_op0 = cmp_op1;
5666 cmp_op1 = tem;
5667 cmp_code = swap_condition (cmp_code);
5668 }
5669
5670 switch (cmp_code)
5671 {
5672 case LEU:
5673 cc_mode = CCUmode;
5674 break;
5675
5676 case LTU:
5677 cc_mode = CCL3mode;
5678 break;
5679
5680 default:
5681 return false;
5682 }
5683
5684 /* Emit comparison instruction pattern. */
5685 if (!register_operand (cmp_op0, cmp_mode))
5686 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5687
5688 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5689 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5690 /* We use insn_invalid_p here to add clobbers if required. */
5691 ret = insn_invalid_p (emit_insn (insn), false);
5692 gcc_assert (!ret);
5693
5694 /* Emit SLB instruction pattern. */
5695 if (!register_operand (src, GET_MODE (dst)))
5696 src = force_reg (GET_MODE (dst), src);
5697
5698 op_res = gen_rtx_MINUS (GET_MODE (dst),
5699 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5700 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5701 gen_rtx_REG (cc_mode, CC_REGNUM),
5702 const0_rtx));
5703 p = rtvec_alloc (2);
5704 RTVEC_ELT (p, 0) =
5705 gen_rtx_SET (dst, op_res);
5706 RTVEC_ELT (p, 1) =
5707 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5708 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5709
5710 return true;
5711 }
5712
5713 return false;
5714 }
5715
5716 /* Expand code for the insv template. Return true if successful. */
5717
5718 bool
5719 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5720 {
5721 int bitsize = INTVAL (op1);
5722 int bitpos = INTVAL (op2);
5723 machine_mode mode = GET_MODE (dest);
5724 machine_mode smode;
5725 int smode_bsize, mode_bsize;
5726 rtx op, clobber;
5727
5728 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5729 return false;
5730
5731 /* Generate INSERT IMMEDIATE (IILL et al). */
5732 /* (set (ze (reg)) (const_int)). */
5733 if (TARGET_ZARCH
5734 && register_operand (dest, word_mode)
5735 && (bitpos % 16) == 0
5736 && (bitsize % 16) == 0
5737 && const_int_operand (src, VOIDmode))
5738 {
5739 HOST_WIDE_INT val = INTVAL (src);
5740 int regpos = bitpos + bitsize;
5741
5742 while (regpos > bitpos)
5743 {
5744 machine_mode putmode;
5745 int putsize;
5746
5747 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5748 putmode = SImode;
5749 else
5750 putmode = HImode;
5751
5752 putsize = GET_MODE_BITSIZE (putmode);
5753 regpos -= putsize;
5754 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5755 GEN_INT (putsize),
5756 GEN_INT (regpos)),
5757 gen_int_mode (val, putmode));
5758 val >>= putsize;
5759 }
5760 gcc_assert (regpos == bitpos);
5761 return true;
5762 }
5763
5764 smode = smallest_mode_for_size (bitsize, MODE_INT);
5765 smode_bsize = GET_MODE_BITSIZE (smode);
5766 mode_bsize = GET_MODE_BITSIZE (mode);
5767
5768 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5769 if (bitpos == 0
5770 && (bitsize % BITS_PER_UNIT) == 0
5771 && MEM_P (dest)
5772 && (register_operand (src, word_mode)
5773 || const_int_operand (src, VOIDmode)))
5774 {
5775 /* Emit standard pattern if possible. */
5776 if (smode_bsize == bitsize)
5777 {
5778 emit_move_insn (adjust_address (dest, smode, 0),
5779 gen_lowpart (smode, src));
5780 return true;
5781 }
5782
5783 /* (set (ze (mem)) (const_int)). */
5784 else if (const_int_operand (src, VOIDmode))
5785 {
5786 int size = bitsize / BITS_PER_UNIT;
5787 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5788 BLKmode,
5789 UNITS_PER_WORD - size);
5790
5791 dest = adjust_address (dest, BLKmode, 0);
5792 set_mem_size (dest, size);
5793 s390_expand_movmem (dest, src_mem, GEN_INT (size));
5794 return true;
5795 }
5796
5797 /* (set (ze (mem)) (reg)). */
5798 else if (register_operand (src, word_mode))
5799 {
5800 if (bitsize <= 32)
5801 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5802 const0_rtx), src);
5803 else
5804 {
5805 /* Emit st,stcmh sequence. */
5806 int stcmh_width = bitsize - 32;
5807 int size = stcmh_width / BITS_PER_UNIT;
5808
5809 emit_move_insn (adjust_address (dest, SImode, size),
5810 gen_lowpart (SImode, src));
5811 set_mem_size (dest, size);
5812 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5813 GEN_INT (stcmh_width),
5814 const0_rtx),
5815 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5816 }
5817 return true;
5818 }
5819 }
5820
5821 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
5822 if ((bitpos % BITS_PER_UNIT) == 0
5823 && (bitsize % BITS_PER_UNIT) == 0
5824 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5825 && MEM_P (src)
5826 && (mode == DImode || mode == SImode)
5827 && register_operand (dest, mode))
5828 {
5829 /* Emit a strict_low_part pattern if possible. */
5830 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5831 {
5832 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5833 op = gen_rtx_SET (op, gen_lowpart (smode, src));
5834 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5835 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5836 return true;
5837 }
5838
5839 /* ??? There are more powerful versions of ICM that are not
5840 completely represented in the md file. */
5841 }
5842
5843 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
5844 if (TARGET_Z10 && (mode == DImode || mode == SImode))
5845 {
5846 machine_mode mode_s = GET_MODE (src);
5847
5848 if (mode_s == VOIDmode)
5849 {
5850 /* Assume const_int etc already in the proper mode. */
5851 src = force_reg (mode, src);
5852 }
5853 else if (mode_s != mode)
5854 {
5855 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
5856 src = force_reg (mode_s, src);
5857 src = gen_lowpart (mode, src);
5858 }
5859
5860 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
5861 op = gen_rtx_SET (op, src);
5862
5863 if (!TARGET_ZEC12)
5864 {
5865 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5866 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
5867 }
5868 emit_insn (op);
5869
5870 return true;
5871 }
5872
5873 return false;
5874 }
5875
5876 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
5877 register that holds VAL of mode MODE shifted by COUNT bits. */
5878
5879 static inline rtx
5880 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
5881 {
5882 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
5883 NULL_RTX, 1, OPTAB_DIRECT);
5884 return expand_simple_binop (SImode, ASHIFT, val, count,
5885 NULL_RTX, 1, OPTAB_DIRECT);
5886 }
5887
5888 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
5889 the result in TARGET. */
5890
5891 void
5892 s390_expand_vec_compare (rtx target, enum rtx_code cond,
5893 rtx cmp_op1, rtx cmp_op2)
5894 {
5895 machine_mode mode = GET_MODE (target);
5896 bool neg_p = false, swap_p = false;
5897 rtx tmp;
5898
5899 if (GET_MODE (cmp_op1) == V2DFmode)
5900 {
5901 switch (cond)
5902 {
5903 /* NE a != b -> !(a == b) */
5904 case NE: cond = EQ; neg_p = true; break;
5905 /* UNGT a u> b -> !(b >= a) */
5906 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
5907 /* UNGE a u>= b -> !(b > a) */
5908 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
5909 /* LE: a <= b -> b >= a */
5910 case LE: cond = GE; swap_p = true; break;
5911 /* UNLE: a u<= b -> !(a > b) */
5912 case UNLE: cond = GT; neg_p = true; break;
5913 /* LT: a < b -> b > a */
5914 case LT: cond = GT; swap_p = true; break;
5915 /* UNLT: a u< b -> !(a >= b) */
5916 case UNLT: cond = GE; neg_p = true; break;
5917 case UNEQ:
5918 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
5919 return;
5920 case LTGT:
5921 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
5922 return;
5923 case ORDERED:
5924 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
5925 return;
5926 case UNORDERED:
5927 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
5928 return;
5929 default: break;
5930 }
5931 }
5932 else
5933 {
5934 switch (cond)
5935 {
5936 /* NE: a != b -> !(a == b) */
5937 case NE: cond = EQ; neg_p = true; break;
5938 /* GE: a >= b -> !(b > a) */
5939 case GE: cond = GT; neg_p = true; swap_p = true; break;
5940 /* GEU: a >= b -> !(b > a) */
5941 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
5942 /* LE: a <= b -> !(a > b) */
5943 case LE: cond = GT; neg_p = true; break;
5944 /* LEU: a <= b -> !(a > b) */
5945 case LEU: cond = GTU; neg_p = true; break;
5946 /* LT: a < b -> b > a */
5947 case LT: cond = GT; swap_p = true; break;
5948 /* LTU: a < b -> b > a */
5949 case LTU: cond = GTU; swap_p = true; break;
5950 default: break;
5951 }
5952 }
5953
5954 if (swap_p)
5955 {
5956 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
5957 }
5958
5959 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
5960 mode,
5961 cmp_op1, cmp_op2)));
5962 if (neg_p)
5963 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
5964 }
5965
5966 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
5967 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
5968 elements in CMP1 and CMP2 fulfill the comparison. */
5969 void
5970 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
5971 rtx cmp1, rtx cmp2, bool all_p)
5972 {
5973 enum rtx_code new_code = code;
5974 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
5975 rtx tmp_reg = gen_reg_rtx (SImode);
5976 bool swap_p = false;
5977
5978 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
5979 {
5980 switch (code)
5981 {
5982 case EQ: cmp_mode = CCVEQmode; break;
5983 case NE: cmp_mode = CCVEQmode; break;
5984 case GT: cmp_mode = CCVHmode; break;
5985 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
5986 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
5987 case LE: cmp_mode = CCVHmode; new_code = LE; break;
5988 case GTU: cmp_mode = CCVHUmode; break;
5989 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
5990 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
5991 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
5992 default: gcc_unreachable ();
5993 }
5994 scratch_mode = GET_MODE (cmp1);
5995 }
5996 else if (GET_MODE (cmp1) == V2DFmode)
5997 {
5998 switch (code)
5999 {
6000 case EQ: cmp_mode = CCVEQmode; break;
6001 case NE: cmp_mode = CCVEQmode; break;
6002 case GT: cmp_mode = CCVFHmode; break;
6003 case GE: cmp_mode = CCVFHEmode; break;
6004 case UNLE: cmp_mode = CCVFHmode; break;
6005 case UNLT: cmp_mode = CCVFHEmode; break;
6006 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
6007 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6008 default: gcc_unreachable ();
6009 }
6010 scratch_mode = V2DImode;
6011 }
6012 else
6013 gcc_unreachable ();
6014
6015 if (!all_p)
6016 switch (cmp_mode)
6017 {
6018 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6019 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6020 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6021 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6022 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6023 default: gcc_unreachable ();
6024 }
6025 else
6026 /* The modes without ANY match the ALL modes. */
6027 full_cmp_mode = cmp_mode;
6028
6029 if (swap_p)
6030 {
6031 rtx tmp = cmp2;
6032 cmp2 = cmp1;
6033 cmp1 = tmp;
6034 }
6035
6036 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6037 gen_rtvec (2, gen_rtx_SET (
6038 gen_rtx_REG (cmp_mode, CC_REGNUM),
6039 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6040 gen_rtx_CLOBBER (VOIDmode,
6041 gen_rtx_SCRATCH (scratch_mode)))));
6042 emit_move_insn (target, const0_rtx);
6043 emit_move_insn (tmp_reg, const1_rtx);
6044
6045 emit_move_insn (target,
6046 gen_rtx_IF_THEN_ELSE (SImode,
6047 gen_rtx_fmt_ee (new_code, VOIDmode,
6048 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6049 const0_rtx),
6050 target, tmp_reg));
6051 }
6052
6053 /* Generate a vector comparison expression loading either elements of
6054 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6055 and CMP_OP2. */
6056
6057 void
6058 s390_expand_vcond (rtx target, rtx then, rtx els,
6059 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6060 {
6061 rtx tmp;
6062 machine_mode result_mode;
6063 rtx result_target;
6064
6065 /* We always use an integral type vector to hold the comparison
6066 result. */
6067 result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
6068 result_target = gen_reg_rtx (result_mode);
6069
6070 /* Alternatively this could be done by reload by lowering the cmp*
6071 predicates. But it appears to be better for scheduling etc. to
6072 have that in early. */
6073 if (!REG_P (cmp_op1))
6074 cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
6075
6076 if (!REG_P (cmp_op2))
6077 cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
6078
6079 s390_expand_vec_compare (result_target, cond,
6080 cmp_op1, cmp_op2);
6081
6082 /* If the results are supposed to be either -1 or 0 we are done
6083 since this is what our compare instructions generate anyway. */
6084 if (constm1_operand (then, GET_MODE (then))
6085 && const0_operand (els, GET_MODE (els)))
6086 {
6087 emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
6088 result_target, 0));
6089 return;
6090 }
6091
6092 /* Otherwise we will do a vsel afterwards. */
6093 /* This gets triggered e.g.
6094 with gcc.c-torture/compile/pr53410-1.c */
6095 if (!REG_P (then))
6096 then = force_reg (GET_MODE (target), then);
6097
6098 if (!REG_P (els))
6099 els = force_reg (GET_MODE (target), els);
6100
6101 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6102 result_target,
6103 CONST0_RTX (result_mode));
6104
6105 /* We compared the result against zero above so we have to swap then
6106 and els here. */
6107 tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
6108
6109 gcc_assert (GET_MODE (target) == GET_MODE (then));
6110 emit_insn (gen_rtx_SET (target, tmp));
6111 }
6112
6113 /* Emit the RTX necessary to initialize the vector TARGET with values
6114 in VALS. */
6115 void
6116 s390_expand_vec_init (rtx target, rtx vals)
6117 {
6118 machine_mode mode = GET_MODE (target);
6119 machine_mode inner_mode = GET_MODE_INNER (mode);
6120 int n_elts = GET_MODE_NUNITS (mode);
6121 bool all_same = true, all_regs = true, all_const_int = true;
6122 rtx x;
6123 int i;
6124
6125 for (i = 0; i < n_elts; ++i)
6126 {
6127 x = XVECEXP (vals, 0, i);
6128
6129 if (!CONST_INT_P (x))
6130 all_const_int = false;
6131
6132 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6133 all_same = false;
6134
6135 if (!REG_P (x))
6136 all_regs = false;
6137 }
6138
6139 /* Use vector gen mask or vector gen byte mask if possible. */
6140 if (all_same && all_const_int
6141 && (XVECEXP (vals, 0, 0) == const0_rtx
6142 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6143 NULL, NULL)
6144 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6145 {
6146 emit_insn (gen_rtx_SET (target,
6147 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6148 return;
6149 }
6150
6151 if (all_same)
6152 {
6153 emit_insn (gen_rtx_SET (target,
6154 gen_rtx_VEC_DUPLICATE (mode,
6155 XVECEXP (vals, 0, 0))));
6156 return;
6157 }
6158
6159 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6160 {
6161 /* Use vector load pair. */
6162 emit_insn (gen_rtx_SET (target,
6163 gen_rtx_VEC_CONCAT (mode,
6164 XVECEXP (vals, 0, 0),
6165 XVECEXP (vals, 0, 1))));
6166 return;
6167 }
6168
6169 /* We are about to set the vector elements one by one. Zero out the
6170 full register first in order to help the data flow framework to
6171 detect it as full VR set. */
6172 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6173
6174 /* Unfortunately the vec_init expander is not allowed to fail. So
6175 we have to implement the fallback ourselves. */
6176 for (i = 0; i < n_elts; i++)
6177 emit_insn (gen_rtx_SET (target,
6178 gen_rtx_UNSPEC (mode,
6179 gen_rtvec (3, XVECEXP (vals, 0, i),
6180 GEN_INT (i), target),
6181 UNSPEC_VEC_SET)));
6182 }
6183
6184 /* Structure to hold the initial parameters for a compare_and_swap operation
6185 in HImode and QImode. */
6186
6187 struct alignment_context
6188 {
6189 rtx memsi; /* SI aligned memory location. */
6190 rtx shift; /* Bit offset with regard to lsb. */
6191 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6192 rtx modemaski; /* ~modemask */
6193 bool aligned; /* True if memory is aligned, false else. */
6194 };
6195
6196 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6197 structure AC for transparent simplifying, if the memory alignment is known
6198 to be at least 32bit. MEM is the memory location for the actual operation
6199 and MODE its mode. */
6200
6201 static void
6202 init_alignment_context (struct alignment_context *ac, rtx mem,
6203 machine_mode mode)
6204 {
6205 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6206 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6207
6208 if (ac->aligned)
6209 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6210 else
6211 {
6212 /* Alignment is unknown. */
6213 rtx byteoffset, addr, align;
6214
6215 /* Force the address into a register. */
6216 addr = force_reg (Pmode, XEXP (mem, 0));
6217
6218 /* Align it to SImode. */
6219 align = expand_simple_binop (Pmode, AND, addr,
6220 GEN_INT (-GET_MODE_SIZE (SImode)),
6221 NULL_RTX, 1, OPTAB_DIRECT);
6222 /* Generate MEM. */
6223 ac->memsi = gen_rtx_MEM (SImode, align);
6224 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6225 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6226 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6227
6228 /* Calculate shiftcount. */
6229 byteoffset = expand_simple_binop (Pmode, AND, addr,
6230 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6231 NULL_RTX, 1, OPTAB_DIRECT);
6232 /* As we already have some offset, evaluate the remaining distance. */
6233 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6234 NULL_RTX, 1, OPTAB_DIRECT);
6235 }
6236
6237 /* Shift is the byte count, but we need the bitcount. */
6238 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6239 NULL_RTX, 1, OPTAB_DIRECT);
6240
6241 /* Calculate masks. */
6242 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6243 GEN_INT (GET_MODE_MASK (mode)),
6244 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6245 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6246 NULL_RTX, 1);
6247 }
6248
6249 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6250 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6251 perform the merge in SEQ2. */
6252
6253 static rtx
6254 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6255 machine_mode mode, rtx val, rtx ins)
6256 {
6257 rtx tmp;
6258
6259 if (ac->aligned)
6260 {
6261 start_sequence ();
6262 tmp = copy_to_mode_reg (SImode, val);
6263 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6264 const0_rtx, ins))
6265 {
6266 *seq1 = NULL;
6267 *seq2 = get_insns ();
6268 end_sequence ();
6269 return tmp;
6270 }
6271 end_sequence ();
6272 }
6273
6274 /* Failed to use insv. Generate a two part shift and mask. */
6275 start_sequence ();
6276 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6277 *seq1 = get_insns ();
6278 end_sequence ();
6279
6280 start_sequence ();
6281 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6282 *seq2 = get_insns ();
6283 end_sequence ();
6284
6285 return tmp;
6286 }
6287
6288 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6289 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6290 value to set if CMP == MEM. */
6291
6292 void
6293 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6294 rtx cmp, rtx new_rtx, bool is_weak)
6295 {
6296 struct alignment_context ac;
6297 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6298 rtx res = gen_reg_rtx (SImode);
6299 rtx_code_label *csloop = NULL, *csend = NULL;
6300
6301 gcc_assert (MEM_P (mem));
6302
6303 init_alignment_context (&ac, mem, mode);
6304
6305 /* Load full word. Subsequent loads are performed by CS. */
6306 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6307 NULL_RTX, 1, OPTAB_DIRECT);
6308
6309 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6310 possible, we try to use insv to make this happen efficiently. If
6311 that fails we'll generate code both inside and outside the loop. */
6312 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6313 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6314
6315 if (seq0)
6316 emit_insn (seq0);
6317 if (seq1)
6318 emit_insn (seq1);
6319
6320 /* Start CS loop. */
6321 if (!is_weak)
6322 {
6323 /* Begin assuming success. */
6324 emit_move_insn (btarget, const1_rtx);
6325
6326 csloop = gen_label_rtx ();
6327 csend = gen_label_rtx ();
6328 emit_label (csloop);
6329 }
6330
6331 /* val = "<mem>00..0<mem>"
6332 * cmp = "00..0<cmp>00..0"
6333 * new = "00..0<new>00..0"
6334 */
6335
6336 emit_insn (seq2);
6337 emit_insn (seq3);
6338
6339 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6340 if (is_weak)
6341 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6342 else
6343 {
6344 rtx tmp;
6345
6346 /* Jump to end if we're done (likely?). */
6347 s390_emit_jump (csend, cc);
6348
6349 /* Check for changes outside mode, and loop internal if so.
6350 Arrange the moves so that the compare is adjacent to the
6351 branch so that we can generate CRJ. */
6352 tmp = copy_to_reg (val);
6353 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6354 1, OPTAB_DIRECT);
6355 cc = s390_emit_compare (NE, val, tmp);
6356 s390_emit_jump (csloop, cc);
6357
6358 /* Failed. */
6359 emit_move_insn (btarget, const0_rtx);
6360 emit_label (csend);
6361 }
6362
6363 /* Return the correct part of the bitfield. */
6364 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6365 NULL_RTX, 1, OPTAB_DIRECT), 1);
6366 }
6367
6368 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6369 and VAL the value to play with. If AFTER is true then store the value
6370 MEM holds after the operation, if AFTER is false then store the value MEM
6371 holds before the operation. If TARGET is zero then discard that value, else
6372 store it to TARGET. */
6373
6374 void
6375 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6376 rtx target, rtx mem, rtx val, bool after)
6377 {
6378 struct alignment_context ac;
6379 rtx cmp;
6380 rtx new_rtx = gen_reg_rtx (SImode);
6381 rtx orig = gen_reg_rtx (SImode);
6382 rtx_code_label *csloop = gen_label_rtx ();
6383
6384 gcc_assert (!target || register_operand (target, VOIDmode));
6385 gcc_assert (MEM_P (mem));
6386
6387 init_alignment_context (&ac, mem, mode);
6388
6389 /* Shift val to the correct bit positions.
6390 Preserve "icm", but prevent "ex icm". */
6391 if (!(ac.aligned && code == SET && MEM_P (val)))
6392 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6393
6394 /* Further preparation insns. */
6395 if (code == PLUS || code == MINUS)
6396 emit_move_insn (orig, val);
6397 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6398 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6399 NULL_RTX, 1, OPTAB_DIRECT);
6400
6401 /* Load full word. Subsequent loads are performed by CS. */
6402 cmp = force_reg (SImode, ac.memsi);
6403
6404 /* Start CS loop. */
6405 emit_label (csloop);
6406 emit_move_insn (new_rtx, cmp);
6407
6408 /* Patch new with val at correct position. */
6409 switch (code)
6410 {
6411 case PLUS:
6412 case MINUS:
6413 val = expand_simple_binop (SImode, code, new_rtx, orig,
6414 NULL_RTX, 1, OPTAB_DIRECT);
6415 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6416 NULL_RTX, 1, OPTAB_DIRECT);
6417 /* FALLTHRU */
6418 case SET:
6419 if (ac.aligned && MEM_P (val))
6420 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6421 0, 0, SImode, val);
6422 else
6423 {
6424 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6425 NULL_RTX, 1, OPTAB_DIRECT);
6426 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6427 NULL_RTX, 1, OPTAB_DIRECT);
6428 }
6429 break;
6430 case AND:
6431 case IOR:
6432 case XOR:
6433 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6434 NULL_RTX, 1, OPTAB_DIRECT);
6435 break;
6436 case MULT: /* NAND */
6437 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6438 NULL_RTX, 1, OPTAB_DIRECT);
6439 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6440 NULL_RTX, 1, OPTAB_DIRECT);
6441 break;
6442 default:
6443 gcc_unreachable ();
6444 }
6445
6446 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6447 ac.memsi, cmp, new_rtx));
6448
6449 /* Return the correct part of the bitfield. */
6450 if (target)
6451 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6452 after ? new_rtx : cmp, ac.shift,
6453 NULL_RTX, 1, OPTAB_DIRECT), 1);
6454 }
6455
6456 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6457 We need to emit DTP-relative relocations. */
6458
6459 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6460
6461 static void
6462 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6463 {
6464 switch (size)
6465 {
6466 case 4:
6467 fputs ("\t.long\t", file);
6468 break;
6469 case 8:
6470 fputs ("\t.quad\t", file);
6471 break;
6472 default:
6473 gcc_unreachable ();
6474 }
6475 output_addr_const (file, x);
6476 fputs ("@DTPOFF", file);
6477 }
6478
6479 /* Return the proper mode for REGNO being represented in the dwarf
6480 unwind table. */
6481 machine_mode
6482 s390_dwarf_frame_reg_mode (int regno)
6483 {
6484 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6485
6486 /* The rightmost 64 bits of vector registers are call-clobbered. */
6487 if (GET_MODE_SIZE (save_mode) > 8)
6488 save_mode = DImode;
6489
6490 return save_mode;
6491 }
6492
6493 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6494 /* Implement TARGET_MANGLE_TYPE. */
6495
6496 static const char *
6497 s390_mangle_type (const_tree type)
6498 {
6499 type = TYPE_MAIN_VARIANT (type);
6500
6501 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6502 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6503 return NULL;
6504
6505 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6506 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6507 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6508 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6509
6510 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6511 && TARGET_LONG_DOUBLE_128)
6512 return "g";
6513
6514 /* For all other types, use normal C++ mangling. */
6515 return NULL;
6516 }
6517 #endif
6518
6519 /* In the name of slightly smaller debug output, and to cater to
6520 general assembler lossage, recognize various UNSPEC sequences
6521 and turn them back into a direct symbol reference. */
6522
6523 static rtx
6524 s390_delegitimize_address (rtx orig_x)
6525 {
6526 rtx x, y;
6527
6528 orig_x = delegitimize_mem_from_attrs (orig_x);
6529 x = orig_x;
6530
6531 /* Extract the symbol ref from:
6532 (plus:SI (reg:SI 12 %r12)
6533 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6534 UNSPEC_GOTOFF/PLTOFF)))
6535 and
6536 (plus:SI (reg:SI 12 %r12)
6537 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6538 UNSPEC_GOTOFF/PLTOFF)
6539 (const_int 4 [0x4])))) */
6540 if (GET_CODE (x) == PLUS
6541 && REG_P (XEXP (x, 0))
6542 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6543 && GET_CODE (XEXP (x, 1)) == CONST)
6544 {
6545 HOST_WIDE_INT offset = 0;
6546
6547 /* The const operand. */
6548 y = XEXP (XEXP (x, 1), 0);
6549
6550 if (GET_CODE (y) == PLUS
6551 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6552 {
6553 offset = INTVAL (XEXP (y, 1));
6554 y = XEXP (y, 0);
6555 }
6556
6557 if (GET_CODE (y) == UNSPEC
6558 && (XINT (y, 1) == UNSPEC_GOTOFF
6559 || XINT (y, 1) == UNSPEC_PLTOFF))
6560 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6561 }
6562
6563 if (GET_CODE (x) != MEM)
6564 return orig_x;
6565
6566 x = XEXP (x, 0);
6567 if (GET_CODE (x) == PLUS
6568 && GET_CODE (XEXP (x, 1)) == CONST
6569 && GET_CODE (XEXP (x, 0)) == REG
6570 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6571 {
6572 y = XEXP (XEXP (x, 1), 0);
6573 if (GET_CODE (y) == UNSPEC
6574 && XINT (y, 1) == UNSPEC_GOT)
6575 y = XVECEXP (y, 0, 0);
6576 else
6577 return orig_x;
6578 }
6579 else if (GET_CODE (x) == CONST)
6580 {
6581 /* Extract the symbol ref from:
6582 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6583 UNSPEC_PLT/GOTENT))) */
6584
6585 y = XEXP (x, 0);
6586 if (GET_CODE (y) == UNSPEC
6587 && (XINT (y, 1) == UNSPEC_GOTENT
6588 || XINT (y, 1) == UNSPEC_PLT))
6589 y = XVECEXP (y, 0, 0);
6590 else
6591 return orig_x;
6592 }
6593 else
6594 return orig_x;
6595
6596 if (GET_MODE (orig_x) != Pmode)
6597 {
6598 if (GET_MODE (orig_x) == BLKmode)
6599 return orig_x;
6600 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6601 if (y == NULL_RTX)
6602 return orig_x;
6603 }
6604 return y;
6605 }
6606
6607 /* Output operand OP to stdio stream FILE.
6608 OP is an address (register + offset) which is not used to address data;
6609 instead the rightmost bits are interpreted as the value. */
6610
6611 static void
6612 print_shift_count_operand (FILE *file, rtx op)
6613 {
6614 HOST_WIDE_INT offset;
6615 rtx base;
6616
6617 /* Extract base register and offset. */
6618 if (!s390_decompose_shift_count (op, &base, &offset))
6619 gcc_unreachable ();
6620
6621 /* Sanity check. */
6622 if (base)
6623 {
6624 gcc_assert (GET_CODE (base) == REG);
6625 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6626 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6627 }
6628
6629 /* Offsets are constricted to twelve bits. */
6630 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6631 if (base)
6632 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6633 }
6634
6635 /* Assigns the number of NOP halfwords to be emitted before and after the
6636 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6637 If hotpatching is disabled for the function, the values are set to zero.
6638 */
6639
6640 static void
6641 s390_function_num_hotpatch_hw (tree decl,
6642 int *hw_before,
6643 int *hw_after)
6644 {
6645 tree attr;
6646
6647 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6648
6649 /* Handle the arguments of the hotpatch attribute. The values
6650 specified via attribute might override the cmdline argument
6651 values. */
6652 if (attr)
6653 {
6654 tree args = TREE_VALUE (attr);
6655
6656 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6657 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6658 }
6659 else
6660 {
6661 /* Use the values specified by the cmdline arguments. */
6662 *hw_before = s390_hotpatch_hw_before_label;
6663 *hw_after = s390_hotpatch_hw_after_label;
6664 }
6665 }
6666
6667 /* Write the extra assembler code needed to declare a function properly. */
6668
6669 void
6670 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6671 tree decl)
6672 {
6673 int hw_before, hw_after;
6674
6675 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
6676 if (hw_before > 0)
6677 {
6678 unsigned int function_alignment;
6679 int i;
6680
6681 /* Add a trampoline code area before the function label and initialize it
6682 with two-byte nop instructions. This area can be overwritten with code
6683 that jumps to a patched version of the function. */
6684 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
6685 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
6686 hw_before);
6687 for (i = 1; i < hw_before; i++)
6688 fputs ("\tnopr\t%r7\n", asm_out_file);
6689
6690 /* Note: The function label must be aligned so that (a) the bytes of the
6691 following nop do not cross a cacheline boundary, and (b) a jump address
6692 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
6693 stored directly before the label without crossing a cacheline
6694 boundary. All this is necessary to make sure the trampoline code can
6695 be changed atomically.
6696 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
6697 if there are NOPs before the function label, the alignment is placed
6698 before them. So it is necessary to duplicate the alignment after the
6699 NOPs. */
6700 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
6701 if (! DECL_USER_ALIGN (decl))
6702 function_alignment = MAX (function_alignment,
6703 (unsigned int) align_functions);
6704 fputs ("\t# alignment for hotpatch\n", asm_out_file);
6705 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6706 }
6707
6708 ASM_OUTPUT_LABEL (asm_out_file, fname);
6709 if (hw_after > 0)
6710 asm_fprintf (asm_out_file,
6711 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6712 hw_after);
6713 }
6714
6715 /* Output machine-dependent UNSPECs occurring in address constant X
6716 in assembler syntax to stdio stream FILE. Returns true if the
6717 constant X could be recognized, false otherwise. */
6718
6719 static bool
6720 s390_output_addr_const_extra (FILE *file, rtx x)
6721 {
6722 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6723 switch (XINT (x, 1))
6724 {
6725 case UNSPEC_GOTENT:
6726 output_addr_const (file, XVECEXP (x, 0, 0));
6727 fprintf (file, "@GOTENT");
6728 return true;
6729 case UNSPEC_GOT:
6730 output_addr_const (file, XVECEXP (x, 0, 0));
6731 fprintf (file, "@GOT");
6732 return true;
6733 case UNSPEC_GOTOFF:
6734 output_addr_const (file, XVECEXP (x, 0, 0));
6735 fprintf (file, "@GOTOFF");
6736 return true;
6737 case UNSPEC_PLT:
6738 output_addr_const (file, XVECEXP (x, 0, 0));
6739 fprintf (file, "@PLT");
6740 return true;
6741 case UNSPEC_PLTOFF:
6742 output_addr_const (file, XVECEXP (x, 0, 0));
6743 fprintf (file, "@PLTOFF");
6744 return true;
6745 case UNSPEC_TLSGD:
6746 output_addr_const (file, XVECEXP (x, 0, 0));
6747 fprintf (file, "@TLSGD");
6748 return true;
6749 case UNSPEC_TLSLDM:
6750 assemble_name (file, get_some_local_dynamic_name ());
6751 fprintf (file, "@TLSLDM");
6752 return true;
6753 case UNSPEC_DTPOFF:
6754 output_addr_const (file, XVECEXP (x, 0, 0));
6755 fprintf (file, "@DTPOFF");
6756 return true;
6757 case UNSPEC_NTPOFF:
6758 output_addr_const (file, XVECEXP (x, 0, 0));
6759 fprintf (file, "@NTPOFF");
6760 return true;
6761 case UNSPEC_GOTNTPOFF:
6762 output_addr_const (file, XVECEXP (x, 0, 0));
6763 fprintf (file, "@GOTNTPOFF");
6764 return true;
6765 case UNSPEC_INDNTPOFF:
6766 output_addr_const (file, XVECEXP (x, 0, 0));
6767 fprintf (file, "@INDNTPOFF");
6768 return true;
6769 }
6770
6771 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6772 switch (XINT (x, 1))
6773 {
6774 case UNSPEC_POOL_OFFSET:
6775 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6776 output_addr_const (file, x);
6777 return true;
6778 }
6779 return false;
6780 }
6781
6782 /* Output address operand ADDR in assembler syntax to
6783 stdio stream FILE. */
6784
6785 void
6786 print_operand_address (FILE *file, rtx addr)
6787 {
6788 struct s390_address ad;
6789
6790 if (s390_loadrelative_operand_p (addr, NULL, NULL))
6791 {
6792 if (!TARGET_Z10)
6793 {
6794 output_operand_lossage ("symbolic memory references are "
6795 "only supported on z10 or later");
6796 return;
6797 }
6798 output_addr_const (file, addr);
6799 return;
6800 }
6801
6802 if (!s390_decompose_address (addr, &ad)
6803 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6804 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
6805 output_operand_lossage ("cannot decompose address");
6806
6807 if (ad.disp)
6808 output_addr_const (file, ad.disp);
6809 else
6810 fprintf (file, "0");
6811
6812 if (ad.base && ad.indx)
6813 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
6814 reg_names[REGNO (ad.base)]);
6815 else if (ad.base)
6816 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6817 }
6818
6819 /* Output operand X in assembler syntax to stdio stream FILE.
6820 CODE specified the format flag. The following format flags
6821 are recognized:
6822
6823 'C': print opcode suffix for branch condition.
6824 'D': print opcode suffix for inverse branch condition.
6825 'E': print opcode suffix for branch on index instruction.
6826 'G': print the size of the operand in bytes.
6827 'J': print tls_load/tls_gdcall/tls_ldcall suffix
6828 'M': print the second word of a TImode operand.
6829 'N': print the second word of a DImode operand.
6830 'O': print only the displacement of a memory reference or address.
6831 'R': print only the base register of a memory reference or address.
6832 'S': print S-type memory reference (base+displacement).
6833 'Y': print shift count operand.
6834
6835 'b': print integer X as if it's an unsigned byte.
6836 'c': print integer X as if it's an signed byte.
6837 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
6838 'f': "end" contiguous bitmask X in SImode.
6839 'h': print integer X as if it's a signed halfword.
6840 'i': print the first nonzero HImode part of X.
6841 'j': print the first HImode part unequal to -1 of X.
6842 'k': print the first nonzero SImode part of X.
6843 'm': print the first SImode part unequal to -1 of X.
6844 'o': print integer X as if it's an unsigned 32bit word.
6845 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
6846 't': CONST_INT: "start" of contiguous bitmask X in SImode.
6847 CONST_VECTOR: Generate a bitmask for vgbm instruction.
6848 'x': print integer X as if it's an unsigned halfword.
6849 'v': print register number as vector register (v1 instead of f1).
6850 */
6851
6852 void
6853 print_operand (FILE *file, rtx x, int code)
6854 {
6855 HOST_WIDE_INT ival;
6856
6857 switch (code)
6858 {
6859 case 'C':
6860 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
6861 return;
6862
6863 case 'D':
6864 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
6865 return;
6866
6867 case 'E':
6868 if (GET_CODE (x) == LE)
6869 fprintf (file, "l");
6870 else if (GET_CODE (x) == GT)
6871 fprintf (file, "h");
6872 else
6873 output_operand_lossage ("invalid comparison operator "
6874 "for 'E' output modifier");
6875 return;
6876
6877 case 'J':
6878 if (GET_CODE (x) == SYMBOL_REF)
6879 {
6880 fprintf (file, "%s", ":tls_load:");
6881 output_addr_const (file, x);
6882 }
6883 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
6884 {
6885 fprintf (file, "%s", ":tls_gdcall:");
6886 output_addr_const (file, XVECEXP (x, 0, 0));
6887 }
6888 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
6889 {
6890 fprintf (file, "%s", ":tls_ldcall:");
6891 const char *name = get_some_local_dynamic_name ();
6892 gcc_assert (name);
6893 assemble_name (file, name);
6894 }
6895 else
6896 output_operand_lossage ("invalid reference for 'J' output modifier");
6897 return;
6898
6899 case 'G':
6900 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
6901 return;
6902
6903 case 'O':
6904 {
6905 struct s390_address ad;
6906 int ret;
6907
6908 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6909
6910 if (!ret
6911 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6912 || ad.indx)
6913 {
6914 output_operand_lossage ("invalid address for 'O' output modifier");
6915 return;
6916 }
6917
6918 if (ad.disp)
6919 output_addr_const (file, ad.disp);
6920 else
6921 fprintf (file, "0");
6922 }
6923 return;
6924
6925 case 'R':
6926 {
6927 struct s390_address ad;
6928 int ret;
6929
6930 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6931
6932 if (!ret
6933 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6934 || ad.indx)
6935 {
6936 output_operand_lossage ("invalid address for 'R' output modifier");
6937 return;
6938 }
6939
6940 if (ad.base)
6941 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
6942 else
6943 fprintf (file, "0");
6944 }
6945 return;
6946
6947 case 'S':
6948 {
6949 struct s390_address ad;
6950 int ret;
6951
6952 if (!MEM_P (x))
6953 {
6954 output_operand_lossage ("memory reference expected for "
6955 "'S' output modifier");
6956 return;
6957 }
6958 ret = s390_decompose_address (XEXP (x, 0), &ad);
6959
6960 if (!ret
6961 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6962 || ad.indx)
6963 {
6964 output_operand_lossage ("invalid address for 'S' output modifier");
6965 return;
6966 }
6967
6968 if (ad.disp)
6969 output_addr_const (file, ad.disp);
6970 else
6971 fprintf (file, "0");
6972
6973 if (ad.base)
6974 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6975 }
6976 return;
6977
6978 case 'N':
6979 if (GET_CODE (x) == REG)
6980 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6981 else if (GET_CODE (x) == MEM)
6982 x = change_address (x, VOIDmode,
6983 plus_constant (Pmode, XEXP (x, 0), 4));
6984 else
6985 output_operand_lossage ("register or memory expression expected "
6986 "for 'N' output modifier");
6987 break;
6988
6989 case 'M':
6990 if (GET_CODE (x) == REG)
6991 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6992 else if (GET_CODE (x) == MEM)
6993 x = change_address (x, VOIDmode,
6994 plus_constant (Pmode, XEXP (x, 0), 8));
6995 else
6996 output_operand_lossage ("register or memory expression expected "
6997 "for 'M' output modifier");
6998 break;
6999
7000 case 'Y':
7001 print_shift_count_operand (file, x);
7002 return;
7003 }
7004
7005 switch (GET_CODE (x))
7006 {
7007 case REG:
7008 /* Print FP regs as fx instead of vx when they are accessed
7009 through non-vector mode. */
7010 if (code == 'v'
7011 || VECTOR_NOFP_REG_P (x)
7012 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7013 || (VECTOR_REG_P (x)
7014 && (GET_MODE_SIZE (GET_MODE (x)) /
7015 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7016 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7017 else
7018 fprintf (file, "%s", reg_names[REGNO (x)]);
7019 break;
7020
7021 case MEM:
7022 output_address (XEXP (x, 0));
7023 break;
7024
7025 case CONST:
7026 case CODE_LABEL:
7027 case LABEL_REF:
7028 case SYMBOL_REF:
7029 output_addr_const (file, x);
7030 break;
7031
7032 case CONST_INT:
7033 ival = INTVAL (x);
7034 switch (code)
7035 {
7036 case 0:
7037 break;
7038 case 'b':
7039 ival &= 0xff;
7040 break;
7041 case 'c':
7042 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7043 break;
7044 case 'x':
7045 ival &= 0xffff;
7046 break;
7047 case 'h':
7048 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7049 break;
7050 case 'i':
7051 ival = s390_extract_part (x, HImode, 0);
7052 break;
7053 case 'j':
7054 ival = s390_extract_part (x, HImode, -1);
7055 break;
7056 case 'k':
7057 ival = s390_extract_part (x, SImode, 0);
7058 break;
7059 case 'm':
7060 ival = s390_extract_part (x, SImode, -1);
7061 break;
7062 case 'o':
7063 ival &= 0xffffffff;
7064 break;
7065 case 'e': case 'f':
7066 case 's': case 't':
7067 {
7068 int pos, len;
7069 bool ok;
7070
7071 len = (code == 's' || code == 'e' ? 64 : 32);
7072 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
7073 gcc_assert (ok);
7074 if (code == 's' || code == 't')
7075 ival = 64 - pos - len;
7076 else
7077 ival = 64 - 1 - pos;
7078 }
7079 break;
7080 default:
7081 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7082 }
7083 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7084 break;
7085
7086 case CONST_DOUBLE:
7087 gcc_assert (GET_MODE (x) == VOIDmode);
7088 if (code == 'b')
7089 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
7090 else if (code == 'x')
7091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
7092 else if (code == 'h')
7093 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7094 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
7095 else
7096 {
7097 if (code == 0)
7098 output_operand_lossage ("invalid constant - try using "
7099 "an output modifier");
7100 else
7101 output_operand_lossage ("invalid constant for output modifier '%c'",
7102 code);
7103 }
7104 break;
7105 case CONST_VECTOR:
7106 switch (code)
7107 {
7108 case 'e':
7109 case 's':
7110 {
7111 int start, stop, inner_len;
7112 bool ok;
7113
7114 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
7115 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
7116 gcc_assert (ok);
7117 if (code == 's' || code == 't')
7118 ival = inner_len - stop - 1;
7119 else
7120 ival = inner_len - start - 1;
7121 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7122 }
7123 break;
7124 case 't':
7125 {
7126 unsigned mask;
7127 bool ok = s390_bytemask_vector_p (x, &mask);
7128 gcc_assert (ok);
7129 fprintf (file, "%u", mask);
7130 }
7131 break;
7132
7133 default:
7134 output_operand_lossage ("invalid constant vector for output "
7135 "modifier '%c'", code);
7136 }
7137 break;
7138
7139 default:
7140 if (code == 0)
7141 output_operand_lossage ("invalid expression - try using "
7142 "an output modifier");
7143 else
7144 output_operand_lossage ("invalid expression for output "
7145 "modifier '%c'", code);
7146 break;
7147 }
7148 }
7149
7150 /* Target hook for assembling integer objects. We need to define it
7151 here to work a round a bug in some versions of GAS, which couldn't
7152 handle values smaller than INT_MIN when printed in decimal. */
7153
7154 static bool
7155 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7156 {
7157 if (size == 8 && aligned_p
7158 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7159 {
7160 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7161 INTVAL (x));
7162 return true;
7163 }
7164 return default_assemble_integer (x, size, aligned_p);
7165 }
7166
7167 /* Returns true if register REGNO is used for forming
7168 a memory address in expression X. */
7169
7170 static bool
7171 reg_used_in_mem_p (int regno, rtx x)
7172 {
7173 enum rtx_code code = GET_CODE (x);
7174 int i, j;
7175 const char *fmt;
7176
7177 if (code == MEM)
7178 {
7179 if (refers_to_regno_p (regno, XEXP (x, 0)))
7180 return true;
7181 }
7182 else if (code == SET
7183 && GET_CODE (SET_DEST (x)) == PC)
7184 {
7185 if (refers_to_regno_p (regno, SET_SRC (x)))
7186 return true;
7187 }
7188
7189 fmt = GET_RTX_FORMAT (code);
7190 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7191 {
7192 if (fmt[i] == 'e'
7193 && reg_used_in_mem_p (regno, XEXP (x, i)))
7194 return true;
7195
7196 else if (fmt[i] == 'E')
7197 for (j = 0; j < XVECLEN (x, i); j++)
7198 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7199 return true;
7200 }
7201 return false;
7202 }
7203
7204 /* Returns true if expression DEP_RTX sets an address register
7205 used by instruction INSN to address memory. */
7206
7207 static bool
7208 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7209 {
7210 rtx target, pat;
7211
7212 if (NONJUMP_INSN_P (dep_rtx))
7213 dep_rtx = PATTERN (dep_rtx);
7214
7215 if (GET_CODE (dep_rtx) == SET)
7216 {
7217 target = SET_DEST (dep_rtx);
7218 if (GET_CODE (target) == STRICT_LOW_PART)
7219 target = XEXP (target, 0);
7220 while (GET_CODE (target) == SUBREG)
7221 target = SUBREG_REG (target);
7222
7223 if (GET_CODE (target) == REG)
7224 {
7225 int regno = REGNO (target);
7226
7227 if (s390_safe_attr_type (insn) == TYPE_LA)
7228 {
7229 pat = PATTERN (insn);
7230 if (GET_CODE (pat) == PARALLEL)
7231 {
7232 gcc_assert (XVECLEN (pat, 0) == 2);
7233 pat = XVECEXP (pat, 0, 0);
7234 }
7235 gcc_assert (GET_CODE (pat) == SET);
7236 return refers_to_regno_p (regno, SET_SRC (pat));
7237 }
7238 else if (get_attr_atype (insn) == ATYPE_AGEN)
7239 return reg_used_in_mem_p (regno, PATTERN (insn));
7240 }
7241 }
7242 return false;
7243 }
7244
7245 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7246
7247 int
7248 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7249 {
7250 rtx dep_rtx = PATTERN (dep_insn);
7251 int i;
7252
7253 if (GET_CODE (dep_rtx) == SET
7254 && addr_generation_dependency_p (dep_rtx, insn))
7255 return 1;
7256 else if (GET_CODE (dep_rtx) == PARALLEL)
7257 {
7258 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7259 {
7260 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7261 return 1;
7262 }
7263 }
7264 return 0;
7265 }
7266
7267
7268 /* A C statement (sans semicolon) to update the integer scheduling priority
7269 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7270 reduce the priority to execute INSN later. Do not define this macro if
7271 you do not need to adjust the scheduling priorities of insns.
7272
7273 A STD instruction should be scheduled earlier,
7274 in order to use the bypass. */
7275 static int
7276 s390_adjust_priority (rtx_insn *insn, int priority)
7277 {
7278 if (! INSN_P (insn))
7279 return priority;
7280
7281 if (s390_tune != PROCESSOR_2084_Z990
7282 && s390_tune != PROCESSOR_2094_Z9_109
7283 && s390_tune != PROCESSOR_2097_Z10
7284 && s390_tune != PROCESSOR_2817_Z196
7285 && s390_tune != PROCESSOR_2827_ZEC12
7286 && s390_tune != PROCESSOR_2964_Z13)
7287 return priority;
7288
7289 switch (s390_safe_attr_type (insn))
7290 {
7291 case TYPE_FSTOREDF:
7292 case TYPE_FSTORESF:
7293 priority = priority << 3;
7294 break;
7295 case TYPE_STORE:
7296 case TYPE_STM:
7297 priority = priority << 1;
7298 break;
7299 default:
7300 break;
7301 }
7302 return priority;
7303 }
7304
7305
7306 /* The number of instructions that can be issued per cycle. */
7307
7308 static int
7309 s390_issue_rate (void)
7310 {
7311 switch (s390_tune)
7312 {
7313 case PROCESSOR_2084_Z990:
7314 case PROCESSOR_2094_Z9_109:
7315 case PROCESSOR_2817_Z196:
7316 return 3;
7317 case PROCESSOR_2097_Z10:
7318 return 2;
7319 /* Starting with EC12 we use the sched_reorder hook to take care
7320 of instruction dispatch constraints. The algorithm only
7321 picks the best instruction and assumes only a single
7322 instruction gets issued per cycle. */
7323 case PROCESSOR_2827_ZEC12:
7324 default:
7325 return 1;
7326 }
7327 }
7328
7329 static int
7330 s390_first_cycle_multipass_dfa_lookahead (void)
7331 {
7332 return 4;
7333 }
7334
7335 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7336 Fix up MEMs as required. */
7337
7338 static void
7339 annotate_constant_pool_refs (rtx *x)
7340 {
7341 int i, j;
7342 const char *fmt;
7343
7344 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7345 || !CONSTANT_POOL_ADDRESS_P (*x));
7346
7347 /* Literal pool references can only occur inside a MEM ... */
7348 if (GET_CODE (*x) == MEM)
7349 {
7350 rtx memref = XEXP (*x, 0);
7351
7352 if (GET_CODE (memref) == SYMBOL_REF
7353 && CONSTANT_POOL_ADDRESS_P (memref))
7354 {
7355 rtx base = cfun->machine->base_reg;
7356 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7357 UNSPEC_LTREF);
7358
7359 *x = replace_equiv_address (*x, addr);
7360 return;
7361 }
7362
7363 if (GET_CODE (memref) == CONST
7364 && GET_CODE (XEXP (memref, 0)) == PLUS
7365 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7366 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7367 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7368 {
7369 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7370 rtx sym = XEXP (XEXP (memref, 0), 0);
7371 rtx base = cfun->machine->base_reg;
7372 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7373 UNSPEC_LTREF);
7374
7375 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7376 return;
7377 }
7378 }
7379
7380 /* ... or a load-address type pattern. */
7381 if (GET_CODE (*x) == SET)
7382 {
7383 rtx addrref = SET_SRC (*x);
7384
7385 if (GET_CODE (addrref) == SYMBOL_REF
7386 && CONSTANT_POOL_ADDRESS_P (addrref))
7387 {
7388 rtx base = cfun->machine->base_reg;
7389 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7390 UNSPEC_LTREF);
7391
7392 SET_SRC (*x) = addr;
7393 return;
7394 }
7395
7396 if (GET_CODE (addrref) == CONST
7397 && GET_CODE (XEXP (addrref, 0)) == PLUS
7398 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7399 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7400 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7401 {
7402 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7403 rtx sym = XEXP (XEXP (addrref, 0), 0);
7404 rtx base = cfun->machine->base_reg;
7405 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7406 UNSPEC_LTREF);
7407
7408 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7409 return;
7410 }
7411 }
7412
7413 /* Annotate LTREL_BASE as well. */
7414 if (GET_CODE (*x) == UNSPEC
7415 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7416 {
7417 rtx base = cfun->machine->base_reg;
7418 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7419 UNSPEC_LTREL_BASE);
7420 return;
7421 }
7422
7423 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7424 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7425 {
7426 if (fmt[i] == 'e')
7427 {
7428 annotate_constant_pool_refs (&XEXP (*x, i));
7429 }
7430 else if (fmt[i] == 'E')
7431 {
7432 for (j = 0; j < XVECLEN (*x, i); j++)
7433 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7434 }
7435 }
7436 }
7437
7438 /* Split all branches that exceed the maximum distance.
7439 Returns true if this created a new literal pool entry. */
7440
7441 static int
7442 s390_split_branches (void)
7443 {
7444 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7445 int new_literal = 0, ret;
7446 rtx_insn *insn;
7447 rtx pat, target;
7448 rtx *label;
7449
7450 /* We need correct insn addresses. */
7451
7452 shorten_branches (get_insns ());
7453
7454 /* Find all branches that exceed 64KB, and split them. */
7455
7456 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7457 {
7458 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7459 continue;
7460
7461 pat = PATTERN (insn);
7462 if (GET_CODE (pat) == PARALLEL)
7463 pat = XVECEXP (pat, 0, 0);
7464 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7465 continue;
7466
7467 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7468 {
7469 label = &SET_SRC (pat);
7470 }
7471 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7472 {
7473 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7474 label = &XEXP (SET_SRC (pat), 1);
7475 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7476 label = &XEXP (SET_SRC (pat), 2);
7477 else
7478 continue;
7479 }
7480 else
7481 continue;
7482
7483 if (get_attr_length (insn) <= 4)
7484 continue;
7485
7486 /* We are going to use the return register as scratch register,
7487 make sure it will be saved/restored by the prologue/epilogue. */
7488 cfun_frame_layout.save_return_addr_p = 1;
7489
7490 if (!flag_pic)
7491 {
7492 new_literal = 1;
7493 rtx mem = force_const_mem (Pmode, *label);
7494 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7495 insn);
7496 INSN_ADDRESSES_NEW (set_insn, -1);
7497 annotate_constant_pool_refs (&PATTERN (set_insn));
7498
7499 target = temp_reg;
7500 }
7501 else
7502 {
7503 new_literal = 1;
7504 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7505 UNSPEC_LTREL_OFFSET);
7506 target = gen_rtx_CONST (Pmode, target);
7507 target = force_const_mem (Pmode, target);
7508 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7509 insn);
7510 INSN_ADDRESSES_NEW (set_insn, -1);
7511 annotate_constant_pool_refs (&PATTERN (set_insn));
7512
7513 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7514 cfun->machine->base_reg),
7515 UNSPEC_LTREL_BASE);
7516 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7517 }
7518
7519 ret = validate_change (insn, label, target, 0);
7520 gcc_assert (ret);
7521 }
7522
7523 return new_literal;
7524 }
7525
7526
7527 /* Find an annotated literal pool symbol referenced in RTX X,
7528 and store it at REF. Will abort if X contains references to
7529 more than one such pool symbol; multiple references to the same
7530 symbol are allowed, however.
7531
7532 The rtx pointed to by REF must be initialized to NULL_RTX
7533 by the caller before calling this routine. */
7534
7535 static void
7536 find_constant_pool_ref (rtx x, rtx *ref)
7537 {
7538 int i, j;
7539 const char *fmt;
7540
7541 /* Ignore LTREL_BASE references. */
7542 if (GET_CODE (x) == UNSPEC
7543 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7544 return;
7545 /* Likewise POOL_ENTRY insns. */
7546 if (GET_CODE (x) == UNSPEC_VOLATILE
7547 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7548 return;
7549
7550 gcc_assert (GET_CODE (x) != SYMBOL_REF
7551 || !CONSTANT_POOL_ADDRESS_P (x));
7552
7553 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7554 {
7555 rtx sym = XVECEXP (x, 0, 0);
7556 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7557 && CONSTANT_POOL_ADDRESS_P (sym));
7558
7559 if (*ref == NULL_RTX)
7560 *ref = sym;
7561 else
7562 gcc_assert (*ref == sym);
7563
7564 return;
7565 }
7566
7567 fmt = GET_RTX_FORMAT (GET_CODE (x));
7568 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7569 {
7570 if (fmt[i] == 'e')
7571 {
7572 find_constant_pool_ref (XEXP (x, i), ref);
7573 }
7574 else if (fmt[i] == 'E')
7575 {
7576 for (j = 0; j < XVECLEN (x, i); j++)
7577 find_constant_pool_ref (XVECEXP (x, i, j), ref);
7578 }
7579 }
7580 }
7581
7582 /* Replace every reference to the annotated literal pool
7583 symbol REF in X by its base plus OFFSET. */
7584
7585 static void
7586 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7587 {
7588 int i, j;
7589 const char *fmt;
7590
7591 gcc_assert (*x != ref);
7592
7593 if (GET_CODE (*x) == UNSPEC
7594 && XINT (*x, 1) == UNSPEC_LTREF
7595 && XVECEXP (*x, 0, 0) == ref)
7596 {
7597 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7598 return;
7599 }
7600
7601 if (GET_CODE (*x) == PLUS
7602 && GET_CODE (XEXP (*x, 1)) == CONST_INT
7603 && GET_CODE (XEXP (*x, 0)) == UNSPEC
7604 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7605 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7606 {
7607 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7608 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7609 return;
7610 }
7611
7612 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7613 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7614 {
7615 if (fmt[i] == 'e')
7616 {
7617 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7618 }
7619 else if (fmt[i] == 'E')
7620 {
7621 for (j = 0; j < XVECLEN (*x, i); j++)
7622 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7623 }
7624 }
7625 }
7626
7627 /* Check whether X contains an UNSPEC_LTREL_BASE.
7628 Return its constant pool symbol if found, NULL_RTX otherwise. */
7629
7630 static rtx
7631 find_ltrel_base (rtx x)
7632 {
7633 int i, j;
7634 const char *fmt;
7635
7636 if (GET_CODE (x) == UNSPEC
7637 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7638 return XVECEXP (x, 0, 0);
7639
7640 fmt = GET_RTX_FORMAT (GET_CODE (x));
7641 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7642 {
7643 if (fmt[i] == 'e')
7644 {
7645 rtx fnd = find_ltrel_base (XEXP (x, i));
7646 if (fnd)
7647 return fnd;
7648 }
7649 else if (fmt[i] == 'E')
7650 {
7651 for (j = 0; j < XVECLEN (x, i); j++)
7652 {
7653 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
7654 if (fnd)
7655 return fnd;
7656 }
7657 }
7658 }
7659
7660 return NULL_RTX;
7661 }
7662
7663 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
7664
7665 static void
7666 replace_ltrel_base (rtx *x)
7667 {
7668 int i, j;
7669 const char *fmt;
7670
7671 if (GET_CODE (*x) == UNSPEC
7672 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7673 {
7674 *x = XVECEXP (*x, 0, 1);
7675 return;
7676 }
7677
7678 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7679 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7680 {
7681 if (fmt[i] == 'e')
7682 {
7683 replace_ltrel_base (&XEXP (*x, i));
7684 }
7685 else if (fmt[i] == 'E')
7686 {
7687 for (j = 0; j < XVECLEN (*x, i); j++)
7688 replace_ltrel_base (&XVECEXP (*x, i, j));
7689 }
7690 }
7691 }
7692
7693
7694 /* We keep a list of constants which we have to add to internal
7695 constant tables in the middle of large functions. */
7696
7697 #define NR_C_MODES 31
7698 machine_mode constant_modes[NR_C_MODES] =
7699 {
7700 TFmode, TImode, TDmode,
7701 V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode,
7702 DFmode, DImode, DDmode,
7703 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7704 SFmode, SImode, SDmode,
7705 V4QImode, V2HImode, V1SImode, V1SFmode,
7706 HImode,
7707 V2QImode, V1HImode,
7708 QImode,
7709 V1QImode
7710 };
7711
7712 struct constant
7713 {
7714 struct constant *next;
7715 rtx value;
7716 rtx_code_label *label;
7717 };
7718
7719 struct constant_pool
7720 {
7721 struct constant_pool *next;
7722 rtx_insn *first_insn;
7723 rtx_insn *pool_insn;
7724 bitmap insns;
7725 rtx_insn *emit_pool_after;
7726
7727 struct constant *constants[NR_C_MODES];
7728 struct constant *execute;
7729 rtx_code_label *label;
7730 int size;
7731 };
7732
7733 /* Allocate new constant_pool structure. */
7734
7735 static struct constant_pool *
7736 s390_alloc_pool (void)
7737 {
7738 struct constant_pool *pool;
7739 int i;
7740
7741 pool = (struct constant_pool *) xmalloc (sizeof *pool);
7742 pool->next = NULL;
7743 for (i = 0; i < NR_C_MODES; i++)
7744 pool->constants[i] = NULL;
7745
7746 pool->execute = NULL;
7747 pool->label = gen_label_rtx ();
7748 pool->first_insn = NULL;
7749 pool->pool_insn = NULL;
7750 pool->insns = BITMAP_ALLOC (NULL);
7751 pool->size = 0;
7752 pool->emit_pool_after = NULL;
7753
7754 return pool;
7755 }
7756
7757 /* Create new constant pool covering instructions starting at INSN
7758 and chain it to the end of POOL_LIST. */
7759
7760 static struct constant_pool *
7761 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7762 {
7763 struct constant_pool *pool, **prev;
7764
7765 pool = s390_alloc_pool ();
7766 pool->first_insn = insn;
7767
7768 for (prev = pool_list; *prev; prev = &(*prev)->next)
7769 ;
7770 *prev = pool;
7771
7772 return pool;
7773 }
7774
7775 /* End range of instructions covered by POOL at INSN and emit
7776 placeholder insn representing the pool. */
7777
7778 static void
7779 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7780 {
7781 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
7782
7783 if (!insn)
7784 insn = get_last_insn ();
7785
7786 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
7787 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7788 }
7789
7790 /* Add INSN to the list of insns covered by POOL. */
7791
7792 static void
7793 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
7794 {
7795 bitmap_set_bit (pool->insns, INSN_UID (insn));
7796 }
7797
7798 /* Return pool out of POOL_LIST that covers INSN. */
7799
7800 static struct constant_pool *
7801 s390_find_pool (struct constant_pool *pool_list, rtx insn)
7802 {
7803 struct constant_pool *pool;
7804
7805 for (pool = pool_list; pool; pool = pool->next)
7806 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
7807 break;
7808
7809 return pool;
7810 }
7811
7812 /* Add constant VAL of mode MODE to the constant pool POOL. */
7813
7814 static void
7815 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
7816 {
7817 struct constant *c;
7818 int i;
7819
7820 for (i = 0; i < NR_C_MODES; i++)
7821 if (constant_modes[i] == mode)
7822 break;
7823 gcc_assert (i != NR_C_MODES);
7824
7825 for (c = pool->constants[i]; c != NULL; c = c->next)
7826 if (rtx_equal_p (val, c->value))
7827 break;
7828
7829 if (c == NULL)
7830 {
7831 c = (struct constant *) xmalloc (sizeof *c);
7832 c->value = val;
7833 c->label = gen_label_rtx ();
7834 c->next = pool->constants[i];
7835 pool->constants[i] = c;
7836 pool->size += GET_MODE_SIZE (mode);
7837 }
7838 }
7839
7840 /* Return an rtx that represents the offset of X from the start of
7841 pool POOL. */
7842
7843 static rtx
7844 s390_pool_offset (struct constant_pool *pool, rtx x)
7845 {
7846 rtx label;
7847
7848 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
7849 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
7850 UNSPEC_POOL_OFFSET);
7851 return gen_rtx_CONST (GET_MODE (x), x);
7852 }
7853
7854 /* Find constant VAL of mode MODE in the constant pool POOL.
7855 Return an RTX describing the distance from the start of
7856 the pool to the location of the new constant. */
7857
7858 static rtx
7859 s390_find_constant (struct constant_pool *pool, rtx val,
7860 machine_mode mode)
7861 {
7862 struct constant *c;
7863 int i;
7864
7865 for (i = 0; i < NR_C_MODES; i++)
7866 if (constant_modes[i] == mode)
7867 break;
7868 gcc_assert (i != NR_C_MODES);
7869
7870 for (c = pool->constants[i]; c != NULL; c = c->next)
7871 if (rtx_equal_p (val, c->value))
7872 break;
7873
7874 gcc_assert (c);
7875
7876 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7877 }
7878
7879 /* Check whether INSN is an execute. Return the label_ref to its
7880 execute target template if so, NULL_RTX otherwise. */
7881
7882 static rtx
7883 s390_execute_label (rtx insn)
7884 {
7885 if (NONJUMP_INSN_P (insn)
7886 && GET_CODE (PATTERN (insn)) == PARALLEL
7887 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
7888 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
7889 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
7890
7891 return NULL_RTX;
7892 }
7893
7894 /* Add execute target for INSN to the constant pool POOL. */
7895
7896 static void
7897 s390_add_execute (struct constant_pool *pool, rtx insn)
7898 {
7899 struct constant *c;
7900
7901 for (c = pool->execute; c != NULL; c = c->next)
7902 if (INSN_UID (insn) == INSN_UID (c->value))
7903 break;
7904
7905 if (c == NULL)
7906 {
7907 c = (struct constant *) xmalloc (sizeof *c);
7908 c->value = insn;
7909 c->label = gen_label_rtx ();
7910 c->next = pool->execute;
7911 pool->execute = c;
7912 pool->size += 6;
7913 }
7914 }
7915
7916 /* Find execute target for INSN in the constant pool POOL.
7917 Return an RTX describing the distance from the start of
7918 the pool to the location of the execute target. */
7919
7920 static rtx
7921 s390_find_execute (struct constant_pool *pool, rtx insn)
7922 {
7923 struct constant *c;
7924
7925 for (c = pool->execute; c != NULL; c = c->next)
7926 if (INSN_UID (insn) == INSN_UID (c->value))
7927 break;
7928
7929 gcc_assert (c);
7930
7931 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7932 }
7933
7934 /* For an execute INSN, extract the execute target template. */
7935
7936 static rtx
7937 s390_execute_target (rtx insn)
7938 {
7939 rtx pattern = PATTERN (insn);
7940 gcc_assert (s390_execute_label (insn));
7941
7942 if (XVECLEN (pattern, 0) == 2)
7943 {
7944 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
7945 }
7946 else
7947 {
7948 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
7949 int i;
7950
7951 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
7952 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
7953
7954 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
7955 }
7956
7957 return pattern;
7958 }
7959
7960 /* Indicate that INSN cannot be duplicated. This is the case for
7961 execute insns that carry a unique label. */
7962
7963 static bool
7964 s390_cannot_copy_insn_p (rtx_insn *insn)
7965 {
7966 rtx label = s390_execute_label (insn);
7967 return label && label != const0_rtx;
7968 }
7969
7970 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
7971 do not emit the pool base label. */
7972
7973 static void
7974 s390_dump_pool (struct constant_pool *pool, bool remote_label)
7975 {
7976 struct constant *c;
7977 rtx_insn *insn = pool->pool_insn;
7978 int i;
7979
7980 /* Switch to rodata section. */
7981 if (TARGET_CPU_ZARCH)
7982 {
7983 insn = emit_insn_after (gen_pool_section_start (), insn);
7984 INSN_ADDRESSES_NEW (insn, -1);
7985 }
7986
7987 /* Ensure minimum pool alignment. */
7988 if (TARGET_CPU_ZARCH)
7989 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
7990 else
7991 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
7992 INSN_ADDRESSES_NEW (insn, -1);
7993
7994 /* Emit pool base label. */
7995 if (!remote_label)
7996 {
7997 insn = emit_label_after (pool->label, insn);
7998 INSN_ADDRESSES_NEW (insn, -1);
7999 }
8000
8001 /* Dump constants in descending alignment requirement order,
8002 ensuring proper alignment for every constant. */
8003 for (i = 0; i < NR_C_MODES; i++)
8004 for (c = pool->constants[i]; c; c = c->next)
8005 {
8006 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8007 rtx value = copy_rtx (c->value);
8008 if (GET_CODE (value) == CONST
8009 && GET_CODE (XEXP (value, 0)) == UNSPEC
8010 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8011 && XVECLEN (XEXP (value, 0), 0) == 1)
8012 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8013
8014 insn = emit_label_after (c->label, insn);
8015 INSN_ADDRESSES_NEW (insn, -1);
8016
8017 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8018 gen_rtvec (1, value),
8019 UNSPECV_POOL_ENTRY);
8020 insn = emit_insn_after (value, insn);
8021 INSN_ADDRESSES_NEW (insn, -1);
8022 }
8023
8024 /* Ensure minimum alignment for instructions. */
8025 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8026 INSN_ADDRESSES_NEW (insn, -1);
8027
8028 /* Output in-pool execute template insns. */
8029 for (c = pool->execute; c; c = c->next)
8030 {
8031 insn = emit_label_after (c->label, insn);
8032 INSN_ADDRESSES_NEW (insn, -1);
8033
8034 insn = emit_insn_after (s390_execute_target (c->value), insn);
8035 INSN_ADDRESSES_NEW (insn, -1);
8036 }
8037
8038 /* Switch back to previous section. */
8039 if (TARGET_CPU_ZARCH)
8040 {
8041 insn = emit_insn_after (gen_pool_section_end (), insn);
8042 INSN_ADDRESSES_NEW (insn, -1);
8043 }
8044
8045 insn = emit_barrier_after (insn);
8046 INSN_ADDRESSES_NEW (insn, -1);
8047
8048 /* Remove placeholder insn. */
8049 remove_insn (pool->pool_insn);
8050 }
8051
8052 /* Free all memory used by POOL. */
8053
8054 static void
8055 s390_free_pool (struct constant_pool *pool)
8056 {
8057 struct constant *c, *next;
8058 int i;
8059
8060 for (i = 0; i < NR_C_MODES; i++)
8061 for (c = pool->constants[i]; c; c = next)
8062 {
8063 next = c->next;
8064 free (c);
8065 }
8066
8067 for (c = pool->execute; c; c = next)
8068 {
8069 next = c->next;
8070 free (c);
8071 }
8072
8073 BITMAP_FREE (pool->insns);
8074 free (pool);
8075 }
8076
8077
8078 /* Collect main literal pool. Return NULL on overflow. */
8079
8080 static struct constant_pool *
8081 s390_mainpool_start (void)
8082 {
8083 struct constant_pool *pool;
8084 rtx_insn *insn;
8085
8086 pool = s390_alloc_pool ();
8087
8088 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8089 {
8090 if (NONJUMP_INSN_P (insn)
8091 && GET_CODE (PATTERN (insn)) == SET
8092 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8093 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8094 {
8095 /* There might be two main_pool instructions if base_reg
8096 is call-clobbered; one for shrink-wrapped code and one
8097 for the rest. We want to keep the first. */
8098 if (pool->pool_insn)
8099 {
8100 insn = PREV_INSN (insn);
8101 delete_insn (NEXT_INSN (insn));
8102 continue;
8103 }
8104 pool->pool_insn = insn;
8105 }
8106
8107 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8108 {
8109 s390_add_execute (pool, insn);
8110 }
8111 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8112 {
8113 rtx pool_ref = NULL_RTX;
8114 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8115 if (pool_ref)
8116 {
8117 rtx constant = get_pool_constant (pool_ref);
8118 machine_mode mode = get_pool_mode (pool_ref);
8119 s390_add_constant (pool, constant, mode);
8120 }
8121 }
8122
8123 /* If hot/cold partitioning is enabled we have to make sure that
8124 the literal pool is emitted in the same section where the
8125 initialization of the literal pool base pointer takes place.
8126 emit_pool_after is only used in the non-overflow case on non
8127 Z cpus where we can emit the literal pool at the end of the
8128 function body within the text section. */
8129 if (NOTE_P (insn)
8130 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8131 && !pool->emit_pool_after)
8132 pool->emit_pool_after = PREV_INSN (insn);
8133 }
8134
8135 gcc_assert (pool->pool_insn || pool->size == 0);
8136
8137 if (pool->size >= 4096)
8138 {
8139 /* We're going to chunkify the pool, so remove the main
8140 pool placeholder insn. */
8141 remove_insn (pool->pool_insn);
8142
8143 s390_free_pool (pool);
8144 pool = NULL;
8145 }
8146
8147 /* If the functions ends with the section where the literal pool
8148 should be emitted set the marker to its end. */
8149 if (pool && !pool->emit_pool_after)
8150 pool->emit_pool_after = get_last_insn ();
8151
8152 return pool;
8153 }
8154
8155 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8156 Modify the current function to output the pool constants as well as
8157 the pool register setup instruction. */
8158
8159 static void
8160 s390_mainpool_finish (struct constant_pool *pool)
8161 {
8162 rtx base_reg = cfun->machine->base_reg;
8163
8164 /* If the pool is empty, we're done. */
8165 if (pool->size == 0)
8166 {
8167 /* We don't actually need a base register after all. */
8168 cfun->machine->base_reg = NULL_RTX;
8169
8170 if (pool->pool_insn)
8171 remove_insn (pool->pool_insn);
8172 s390_free_pool (pool);
8173 return;
8174 }
8175
8176 /* We need correct insn addresses. */
8177 shorten_branches (get_insns ());
8178
8179 /* On zSeries, we use a LARL to load the pool register. The pool is
8180 located in the .rodata section, so we emit it after the function. */
8181 if (TARGET_CPU_ZARCH)
8182 {
8183 rtx set = gen_main_base_64 (base_reg, pool->label);
8184 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8185 INSN_ADDRESSES_NEW (insn, -1);
8186 remove_insn (pool->pool_insn);
8187
8188 insn = get_last_insn ();
8189 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8190 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8191
8192 s390_dump_pool (pool, 0);
8193 }
8194
8195 /* On S/390, if the total size of the function's code plus literal pool
8196 does not exceed 4096 bytes, we use BASR to set up a function base
8197 pointer, and emit the literal pool at the end of the function. */
8198 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8199 + pool->size + 8 /* alignment slop */ < 4096)
8200 {
8201 rtx set = gen_main_base_31_small (base_reg, pool->label);
8202 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8203 INSN_ADDRESSES_NEW (insn, -1);
8204 remove_insn (pool->pool_insn);
8205
8206 insn = emit_label_after (pool->label, insn);
8207 INSN_ADDRESSES_NEW (insn, -1);
8208
8209 /* emit_pool_after will be set by s390_mainpool_start to the
8210 last insn of the section where the literal pool should be
8211 emitted. */
8212 insn = pool->emit_pool_after;
8213
8214 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8215 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8216
8217 s390_dump_pool (pool, 1);
8218 }
8219
8220 /* Otherwise, we emit an inline literal pool and use BASR to branch
8221 over it, setting up the pool register at the same time. */
8222 else
8223 {
8224 rtx_code_label *pool_end = gen_label_rtx ();
8225
8226 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8227 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8228 JUMP_LABEL (insn) = pool_end;
8229 INSN_ADDRESSES_NEW (insn, -1);
8230 remove_insn (pool->pool_insn);
8231
8232 insn = emit_label_after (pool->label, insn);
8233 INSN_ADDRESSES_NEW (insn, -1);
8234
8235 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8236 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8237
8238 insn = emit_label_after (pool_end, pool->pool_insn);
8239 INSN_ADDRESSES_NEW (insn, -1);
8240
8241 s390_dump_pool (pool, 1);
8242 }
8243
8244
8245 /* Replace all literal pool references. */
8246
8247 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8248 {
8249 if (INSN_P (insn))
8250 replace_ltrel_base (&PATTERN (insn));
8251
8252 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8253 {
8254 rtx addr, pool_ref = NULL_RTX;
8255 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8256 if (pool_ref)
8257 {
8258 if (s390_execute_label (insn))
8259 addr = s390_find_execute (pool, insn);
8260 else
8261 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8262 get_pool_mode (pool_ref));
8263
8264 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8265 INSN_CODE (insn) = -1;
8266 }
8267 }
8268 }
8269
8270
8271 /* Free the pool. */
8272 s390_free_pool (pool);
8273 }
8274
8275 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8276 We have decided we cannot use this pool, so revert all changes
8277 to the current function that were done by s390_mainpool_start. */
8278 static void
8279 s390_mainpool_cancel (struct constant_pool *pool)
8280 {
8281 /* We didn't actually change the instruction stream, so simply
8282 free the pool memory. */
8283 s390_free_pool (pool);
8284 }
8285
8286
8287 /* Chunkify the literal pool. */
8288
8289 #define S390_POOL_CHUNK_MIN 0xc00
8290 #define S390_POOL_CHUNK_MAX 0xe00
8291
8292 static struct constant_pool *
8293 s390_chunkify_start (void)
8294 {
8295 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8296 int extra_size = 0;
8297 bitmap far_labels;
8298 rtx pending_ltrel = NULL_RTX;
8299 rtx_insn *insn;
8300
8301 rtx (*gen_reload_base) (rtx, rtx) =
8302 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8303
8304
8305 /* We need correct insn addresses. */
8306
8307 shorten_branches (get_insns ());
8308
8309 /* Scan all insns and move literals to pool chunks. */
8310
8311 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8312 {
8313 bool section_switch_p = false;
8314
8315 /* Check for pending LTREL_BASE. */
8316 if (INSN_P (insn))
8317 {
8318 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8319 if (ltrel_base)
8320 {
8321 gcc_assert (ltrel_base == pending_ltrel);
8322 pending_ltrel = NULL_RTX;
8323 }
8324 }
8325
8326 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8327 {
8328 if (!curr_pool)
8329 curr_pool = s390_start_pool (&pool_list, insn);
8330
8331 s390_add_execute (curr_pool, insn);
8332 s390_add_pool_insn (curr_pool, insn);
8333 }
8334 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8335 {
8336 rtx pool_ref = NULL_RTX;
8337 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8338 if (pool_ref)
8339 {
8340 rtx constant = get_pool_constant (pool_ref);
8341 machine_mode mode = get_pool_mode (pool_ref);
8342
8343 if (!curr_pool)
8344 curr_pool = s390_start_pool (&pool_list, insn);
8345
8346 s390_add_constant (curr_pool, constant, mode);
8347 s390_add_pool_insn (curr_pool, insn);
8348
8349 /* Don't split the pool chunk between a LTREL_OFFSET load
8350 and the corresponding LTREL_BASE. */
8351 if (GET_CODE (constant) == CONST
8352 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8353 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8354 {
8355 gcc_assert (!pending_ltrel);
8356 pending_ltrel = pool_ref;
8357 }
8358 }
8359 }
8360
8361 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8362 {
8363 if (curr_pool)
8364 s390_add_pool_insn (curr_pool, insn);
8365 /* An LTREL_BASE must follow within the same basic block. */
8366 gcc_assert (!pending_ltrel);
8367 }
8368
8369 if (NOTE_P (insn))
8370 switch (NOTE_KIND (insn))
8371 {
8372 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8373 section_switch_p = true;
8374 break;
8375 case NOTE_INSN_VAR_LOCATION:
8376 case NOTE_INSN_CALL_ARG_LOCATION:
8377 continue;
8378 default:
8379 break;
8380 }
8381
8382 if (!curr_pool
8383 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8384 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8385 continue;
8386
8387 if (TARGET_CPU_ZARCH)
8388 {
8389 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8390 continue;
8391
8392 s390_end_pool (curr_pool, NULL);
8393 curr_pool = NULL;
8394 }
8395 else
8396 {
8397 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8398 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8399 + extra_size;
8400
8401 /* We will later have to insert base register reload insns.
8402 Those will have an effect on code size, which we need to
8403 consider here. This calculation makes rather pessimistic
8404 worst-case assumptions. */
8405 if (LABEL_P (insn))
8406 extra_size += 6;
8407
8408 if (chunk_size < S390_POOL_CHUNK_MIN
8409 && curr_pool->size < S390_POOL_CHUNK_MIN
8410 && !section_switch_p)
8411 continue;
8412
8413 /* Pool chunks can only be inserted after BARRIERs ... */
8414 if (BARRIER_P (insn))
8415 {
8416 s390_end_pool (curr_pool, insn);
8417 curr_pool = NULL;
8418 extra_size = 0;
8419 }
8420
8421 /* ... so if we don't find one in time, create one. */
8422 else if (chunk_size > S390_POOL_CHUNK_MAX
8423 || curr_pool->size > S390_POOL_CHUNK_MAX
8424 || section_switch_p)
8425 {
8426 rtx_insn *label, *jump, *barrier, *next, *prev;
8427
8428 if (!section_switch_p)
8429 {
8430 /* We can insert the barrier only after a 'real' insn. */
8431 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8432 continue;
8433 if (get_attr_length (insn) == 0)
8434 continue;
8435 /* Don't separate LTREL_BASE from the corresponding
8436 LTREL_OFFSET load. */
8437 if (pending_ltrel)
8438 continue;
8439 next = insn;
8440 do
8441 {
8442 insn = next;
8443 next = NEXT_INSN (insn);
8444 }
8445 while (next
8446 && NOTE_P (next)
8447 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8448 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8449 }
8450 else
8451 {
8452 gcc_assert (!pending_ltrel);
8453
8454 /* The old pool has to end before the section switch
8455 note in order to make it part of the current
8456 section. */
8457 insn = PREV_INSN (insn);
8458 }
8459
8460 label = gen_label_rtx ();
8461 prev = insn;
8462 if (prev && NOTE_P (prev))
8463 prev = prev_nonnote_insn (prev);
8464 if (prev)
8465 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8466 INSN_LOCATION (prev));
8467 else
8468 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8469 barrier = emit_barrier_after (jump);
8470 insn = emit_label_after (label, barrier);
8471 JUMP_LABEL (jump) = label;
8472 LABEL_NUSES (label) = 1;
8473
8474 INSN_ADDRESSES_NEW (jump, -1);
8475 INSN_ADDRESSES_NEW (barrier, -1);
8476 INSN_ADDRESSES_NEW (insn, -1);
8477
8478 s390_end_pool (curr_pool, barrier);
8479 curr_pool = NULL;
8480 extra_size = 0;
8481 }
8482 }
8483 }
8484
8485 if (curr_pool)
8486 s390_end_pool (curr_pool, NULL);
8487 gcc_assert (!pending_ltrel);
8488
8489 /* Find all labels that are branched into
8490 from an insn belonging to a different chunk. */
8491
8492 far_labels = BITMAP_ALLOC (NULL);
8493
8494 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8495 {
8496 rtx_jump_table_data *table;
8497
8498 /* Labels marked with LABEL_PRESERVE_P can be target
8499 of non-local jumps, so we have to mark them.
8500 The same holds for named labels.
8501
8502 Don't do that, however, if it is the label before
8503 a jump table. */
8504
8505 if (LABEL_P (insn)
8506 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8507 {
8508 rtx_insn *vec_insn = NEXT_INSN (insn);
8509 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8510 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8511 }
8512 /* Check potential targets in a table jump (casesi_jump). */
8513 else if (tablejump_p (insn, NULL, &table))
8514 {
8515 rtx vec_pat = PATTERN (table);
8516 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8517
8518 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8519 {
8520 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8521
8522 if (s390_find_pool (pool_list, label)
8523 != s390_find_pool (pool_list, insn))
8524 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8525 }
8526 }
8527 /* If we have a direct jump (conditional or unconditional),
8528 check all potential targets. */
8529 else if (JUMP_P (insn))
8530 {
8531 rtx pat = PATTERN (insn);
8532
8533 if (GET_CODE (pat) == PARALLEL)
8534 pat = XVECEXP (pat, 0, 0);
8535
8536 if (GET_CODE (pat) == SET)
8537 {
8538 rtx label = JUMP_LABEL (insn);
8539 if (label && !ANY_RETURN_P (label))
8540 {
8541 if (s390_find_pool (pool_list, label)
8542 != s390_find_pool (pool_list, insn))
8543 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8544 }
8545 }
8546 }
8547 }
8548
8549 /* Insert base register reload insns before every pool. */
8550
8551 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8552 {
8553 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8554 curr_pool->label);
8555 rtx_insn *insn = curr_pool->first_insn;
8556 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8557 }
8558
8559 /* Insert base register reload insns at every far label. */
8560
8561 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8562 if (LABEL_P (insn)
8563 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8564 {
8565 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8566 if (pool)
8567 {
8568 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8569 pool->label);
8570 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8571 }
8572 }
8573
8574
8575 BITMAP_FREE (far_labels);
8576
8577
8578 /* Recompute insn addresses. */
8579
8580 init_insn_lengths ();
8581 shorten_branches (get_insns ());
8582
8583 return pool_list;
8584 }
8585
8586 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8587 After we have decided to use this list, finish implementing
8588 all changes to the current function as required. */
8589
8590 static void
8591 s390_chunkify_finish (struct constant_pool *pool_list)
8592 {
8593 struct constant_pool *curr_pool = NULL;
8594 rtx_insn *insn;
8595
8596
8597 /* Replace all literal pool references. */
8598
8599 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8600 {
8601 if (INSN_P (insn))
8602 replace_ltrel_base (&PATTERN (insn));
8603
8604 curr_pool = s390_find_pool (pool_list, insn);
8605 if (!curr_pool)
8606 continue;
8607
8608 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8609 {
8610 rtx addr, pool_ref = NULL_RTX;
8611 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8612 if (pool_ref)
8613 {
8614 if (s390_execute_label (insn))
8615 addr = s390_find_execute (curr_pool, insn);
8616 else
8617 addr = s390_find_constant (curr_pool,
8618 get_pool_constant (pool_ref),
8619 get_pool_mode (pool_ref));
8620
8621 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8622 INSN_CODE (insn) = -1;
8623 }
8624 }
8625 }
8626
8627 /* Dump out all literal pools. */
8628
8629 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8630 s390_dump_pool (curr_pool, 0);
8631
8632 /* Free pool list. */
8633
8634 while (pool_list)
8635 {
8636 struct constant_pool *next = pool_list->next;
8637 s390_free_pool (pool_list);
8638 pool_list = next;
8639 }
8640 }
8641
8642 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8643 We have decided we cannot use this list, so revert all changes
8644 to the current function that were done by s390_chunkify_start. */
8645
8646 static void
8647 s390_chunkify_cancel (struct constant_pool *pool_list)
8648 {
8649 struct constant_pool *curr_pool = NULL;
8650 rtx_insn *insn;
8651
8652 /* Remove all pool placeholder insns. */
8653
8654 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8655 {
8656 /* Did we insert an extra barrier? Remove it. */
8657 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
8658 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
8659 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
8660
8661 if (jump && JUMP_P (jump)
8662 && barrier && BARRIER_P (barrier)
8663 && label && LABEL_P (label)
8664 && GET_CODE (PATTERN (jump)) == SET
8665 && SET_DEST (PATTERN (jump)) == pc_rtx
8666 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
8667 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
8668 {
8669 remove_insn (jump);
8670 remove_insn (barrier);
8671 remove_insn (label);
8672 }
8673
8674 remove_insn (curr_pool->pool_insn);
8675 }
8676
8677 /* Remove all base register reload insns. */
8678
8679 for (insn = get_insns (); insn; )
8680 {
8681 rtx_insn *next_insn = NEXT_INSN (insn);
8682
8683 if (NONJUMP_INSN_P (insn)
8684 && GET_CODE (PATTERN (insn)) == SET
8685 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
8686 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
8687 remove_insn (insn);
8688
8689 insn = next_insn;
8690 }
8691
8692 /* Free pool list. */
8693
8694 while (pool_list)
8695 {
8696 struct constant_pool *next = pool_list->next;
8697 s390_free_pool (pool_list);
8698 pool_list = next;
8699 }
8700 }
8701
8702 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
8703
8704 void
8705 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8706 {
8707 REAL_VALUE_TYPE r;
8708
8709 switch (GET_MODE_CLASS (mode))
8710 {
8711 case MODE_FLOAT:
8712 case MODE_DECIMAL_FLOAT:
8713 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8714
8715 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
8716 assemble_real (r, mode, align);
8717 break;
8718
8719 case MODE_INT:
8720 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8721 mark_symbol_refs_as_used (exp);
8722 break;
8723
8724 case MODE_VECTOR_INT:
8725 case MODE_VECTOR_FLOAT:
8726 {
8727 int i;
8728 machine_mode inner_mode;
8729 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8730
8731 inner_mode = GET_MODE_INNER (GET_MODE (exp));
8732 for (i = 0; i < XVECLEN (exp, 0); i++)
8733 s390_output_pool_entry (XVECEXP (exp, 0, i),
8734 inner_mode,
8735 i == 0
8736 ? align
8737 : GET_MODE_BITSIZE (inner_mode));
8738 }
8739 break;
8740
8741 default:
8742 gcc_unreachable ();
8743 }
8744 }
8745
8746
8747 /* Return an RTL expression representing the value of the return address
8748 for the frame COUNT steps up from the current frame. FRAME is the
8749 frame pointer of that frame. */
8750
8751 rtx
8752 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8753 {
8754 int offset;
8755 rtx addr;
8756
8757 /* Without backchain, we fail for all but the current frame. */
8758
8759 if (!TARGET_BACKCHAIN && count > 0)
8760 return NULL_RTX;
8761
8762 /* For the current frame, we need to make sure the initial
8763 value of RETURN_REGNUM is actually saved. */
8764
8765 if (count == 0)
8766 {
8767 /* On non-z architectures branch splitting could overwrite r14. */
8768 if (TARGET_CPU_ZARCH)
8769 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8770 else
8771 {
8772 cfun_frame_layout.save_return_addr_p = true;
8773 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8774 }
8775 }
8776
8777 if (TARGET_PACKED_STACK)
8778 offset = -2 * UNITS_PER_LONG;
8779 else
8780 offset = RETURN_REGNUM * UNITS_PER_LONG;
8781
8782 addr = plus_constant (Pmode, frame, offset);
8783 addr = memory_address (Pmode, addr);
8784 return gen_rtx_MEM (Pmode, addr);
8785 }
8786
8787 /* Return an RTL expression representing the back chain stored in
8788 the current stack frame. */
8789
8790 rtx
8791 s390_back_chain_rtx (void)
8792 {
8793 rtx chain;
8794
8795 gcc_assert (TARGET_BACKCHAIN);
8796
8797 if (TARGET_PACKED_STACK)
8798 chain = plus_constant (Pmode, stack_pointer_rtx,
8799 STACK_POINTER_OFFSET - UNITS_PER_LONG);
8800 else
8801 chain = stack_pointer_rtx;
8802
8803 chain = gen_rtx_MEM (Pmode, chain);
8804 return chain;
8805 }
8806
8807 /* Find first call clobbered register unused in a function.
8808 This could be used as base register in a leaf function
8809 or for holding the return address before epilogue. */
8810
8811 static int
8812 find_unused_clobbered_reg (void)
8813 {
8814 int i;
8815 for (i = 0; i < 6; i++)
8816 if (!df_regs_ever_live_p (i))
8817 return i;
8818 return 0;
8819 }
8820
8821
8822 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
8823 clobbered hard regs in SETREG. */
8824
8825 static void
8826 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
8827 {
8828 char *regs_ever_clobbered = (char *)data;
8829 unsigned int i, regno;
8830 machine_mode mode = GET_MODE (setreg);
8831
8832 if (GET_CODE (setreg) == SUBREG)
8833 {
8834 rtx inner = SUBREG_REG (setreg);
8835 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
8836 return;
8837 regno = subreg_regno (setreg);
8838 }
8839 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
8840 regno = REGNO (setreg);
8841 else
8842 return;
8843
8844 for (i = regno;
8845 i < regno + HARD_REGNO_NREGS (regno, mode);
8846 i++)
8847 regs_ever_clobbered[i] = 1;
8848 }
8849
8850 /* Walks through all basic blocks of the current function looking
8851 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
8852 of the passed integer array REGS_EVER_CLOBBERED are set to one for
8853 each of those regs. */
8854
8855 static void
8856 s390_regs_ever_clobbered (char regs_ever_clobbered[])
8857 {
8858 basic_block cur_bb;
8859 rtx_insn *cur_insn;
8860 unsigned int i;
8861
8862 memset (regs_ever_clobbered, 0, 32);
8863
8864 /* For non-leaf functions we have to consider all call clobbered regs to be
8865 clobbered. */
8866 if (!crtl->is_leaf)
8867 {
8868 for (i = 0; i < 32; i++)
8869 regs_ever_clobbered[i] = call_really_used_regs[i];
8870 }
8871
8872 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
8873 this work is done by liveness analysis (mark_regs_live_at_end).
8874 Special care is needed for functions containing landing pads. Landing pads
8875 may use the eh registers, but the code which sets these registers is not
8876 contained in that function. Hence s390_regs_ever_clobbered is not able to
8877 deal with this automatically. */
8878 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
8879 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
8880 if (crtl->calls_eh_return
8881 || (cfun->machine->has_landing_pad_p
8882 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
8883 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
8884
8885 /* For nonlocal gotos all call-saved registers have to be saved.
8886 This flag is also set for the unwinding code in libgcc.
8887 See expand_builtin_unwind_init. For regs_ever_live this is done by
8888 reload. */
8889 if (crtl->saves_all_registers)
8890 for (i = 0; i < 32; i++)
8891 if (!call_really_used_regs[i])
8892 regs_ever_clobbered[i] = 1;
8893
8894 FOR_EACH_BB_FN (cur_bb, cfun)
8895 {
8896 FOR_BB_INSNS (cur_bb, cur_insn)
8897 {
8898 rtx pat;
8899
8900 if (!INSN_P (cur_insn))
8901 continue;
8902
8903 pat = PATTERN (cur_insn);
8904
8905 /* Ignore GPR restore insns. */
8906 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
8907 {
8908 if (GET_CODE (pat) == SET
8909 && GENERAL_REG_P (SET_DEST (pat)))
8910 {
8911 /* lgdr */
8912 if (GET_MODE (SET_SRC (pat)) == DImode
8913 && FP_REG_P (SET_SRC (pat)))
8914 continue;
8915
8916 /* l / lg */
8917 if (GET_CODE (SET_SRC (pat)) == MEM)
8918 continue;
8919 }
8920
8921 /* lm / lmg */
8922 if (GET_CODE (pat) == PARALLEL
8923 && load_multiple_operation (pat, VOIDmode))
8924 continue;
8925 }
8926
8927 note_stores (pat,
8928 s390_reg_clobbered_rtx,
8929 regs_ever_clobbered);
8930 }
8931 }
8932 }
8933
8934 /* Determine the frame area which actually has to be accessed
8935 in the function epilogue. The values are stored at the
8936 given pointers AREA_BOTTOM (address of the lowest used stack
8937 address) and AREA_TOP (address of the first item which does
8938 not belong to the stack frame). */
8939
8940 static void
8941 s390_frame_area (int *area_bottom, int *area_top)
8942 {
8943 int b, t;
8944
8945 b = INT_MAX;
8946 t = INT_MIN;
8947
8948 if (cfun_frame_layout.first_restore_gpr != -1)
8949 {
8950 b = (cfun_frame_layout.gprs_offset
8951 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
8952 t = b + (cfun_frame_layout.last_restore_gpr
8953 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
8954 }
8955
8956 if (TARGET_64BIT && cfun_save_high_fprs_p)
8957 {
8958 b = MIN (b, cfun_frame_layout.f8_offset);
8959 t = MAX (t, (cfun_frame_layout.f8_offset
8960 + cfun_frame_layout.high_fprs * 8));
8961 }
8962
8963 if (!TARGET_64BIT)
8964 {
8965 if (cfun_fpr_save_p (FPR4_REGNUM))
8966 {
8967 b = MIN (b, cfun_frame_layout.f4_offset);
8968 t = MAX (t, cfun_frame_layout.f4_offset + 8);
8969 }
8970 if (cfun_fpr_save_p (FPR6_REGNUM))
8971 {
8972 b = MIN (b, cfun_frame_layout.f4_offset + 8);
8973 t = MAX (t, cfun_frame_layout.f4_offset + 16);
8974 }
8975 }
8976 *area_bottom = b;
8977 *area_top = t;
8978 }
8979 /* Update gpr_save_slots in the frame layout trying to make use of
8980 FPRs as GPR save slots.
8981 This is a helper routine of s390_register_info. */
8982
8983 static void
8984 s390_register_info_gprtofpr ()
8985 {
8986 int save_reg_slot = FPR0_REGNUM;
8987 int i, j;
8988
8989 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8990 return;
8991
8992 for (i = 15; i >= 6; i--)
8993 {
8994 if (cfun_gpr_save_slot (i) == 0)
8995 continue;
8996
8997 /* Advance to the next FP register which can be used as a
8998 GPR save slot. */
8999 while ((!call_really_used_regs[save_reg_slot]
9000 || df_regs_ever_live_p (save_reg_slot)
9001 || cfun_fpr_save_p (save_reg_slot))
9002 && FP_REGNO_P (save_reg_slot))
9003 save_reg_slot++;
9004 if (!FP_REGNO_P (save_reg_slot))
9005 {
9006 /* We only want to use ldgr/lgdr if we can get rid of
9007 stm/lm entirely. So undo the gpr slot allocation in
9008 case we ran out of FPR save slots. */
9009 for (j = 6; j <= 15; j++)
9010 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9011 cfun_gpr_save_slot (j) = -1;
9012 break;
9013 }
9014 cfun_gpr_save_slot (i) = save_reg_slot++;
9015 }
9016 }
9017
9018 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9019 stdarg.
9020 This is a helper routine for s390_register_info. */
9021
9022 static void
9023 s390_register_info_stdarg_fpr ()
9024 {
9025 int i;
9026 int min_fpr;
9027 int max_fpr;
9028
9029 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9030 f0-f4 for 64 bit. */
9031 if (!cfun->stdarg
9032 || !TARGET_HARD_FLOAT
9033 || !cfun->va_list_fpr_size
9034 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9035 return;
9036
9037 min_fpr = crtl->args.info.fprs;
9038 max_fpr = min_fpr + cfun->va_list_fpr_size;
9039 if (max_fpr > FP_ARG_NUM_REG)
9040 max_fpr = FP_ARG_NUM_REG;
9041
9042 for (i = min_fpr; i < max_fpr; i++)
9043 cfun_set_fpr_save (i + FPR0_REGNUM);
9044 }
9045
9046 /* Reserve the GPR save slots for GPRs which need to be saved due to
9047 stdarg.
9048 This is a helper routine for s390_register_info. */
9049
9050 static void
9051 s390_register_info_stdarg_gpr ()
9052 {
9053 int i;
9054 int min_gpr;
9055 int max_gpr;
9056
9057 if (!cfun->stdarg
9058 || !cfun->va_list_gpr_size
9059 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9060 return;
9061
9062 min_gpr = crtl->args.info.gprs;
9063 max_gpr = min_gpr + cfun->va_list_gpr_size;
9064 if (max_gpr > GP_ARG_NUM_REG)
9065 max_gpr = GP_ARG_NUM_REG;
9066
9067 for (i = min_gpr; i < max_gpr; i++)
9068 cfun_gpr_save_slot (2 + i) = -1;
9069 }
9070
9071 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9072 for registers which need to be saved in function prologue.
9073 This function can be used until the insns emitted for save/restore
9074 of the regs are visible in the RTL stream. */
9075
9076 static void
9077 s390_register_info ()
9078 {
9079 int i, j;
9080 char clobbered_regs[32];
9081
9082 gcc_assert (!epilogue_completed);
9083
9084 if (reload_completed)
9085 /* After reload we rely on our own routine to determine which
9086 registers need saving. */
9087 s390_regs_ever_clobbered (clobbered_regs);
9088 else
9089 /* During reload we use regs_ever_live as a base since reload
9090 does changes in there which we otherwise would not be aware
9091 of. */
9092 for (i = 0; i < 32; i++)
9093 clobbered_regs[i] = df_regs_ever_live_p (i);
9094
9095 for (i = 0; i < 32; i++)
9096 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9097
9098 /* Mark the call-saved FPRs which need to be saved.
9099 This needs to be done before checking the special GPRs since the
9100 stack pointer usage depends on whether high FPRs have to be saved
9101 or not. */
9102 cfun_frame_layout.fpr_bitmap = 0;
9103 cfun_frame_layout.high_fprs = 0;
9104 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9105 if (clobbered_regs[i] && !call_really_used_regs[i])
9106 {
9107 cfun_set_fpr_save (i);
9108 if (i >= FPR8_REGNUM)
9109 cfun_frame_layout.high_fprs++;
9110 }
9111
9112 if (flag_pic)
9113 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
9114 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
9115
9116 clobbered_regs[BASE_REGNUM]
9117 |= (cfun->machine->base_reg
9118 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9119
9120 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9121 |= !!frame_pointer_needed;
9122
9123 /* On pre z900 machines this might take until machine dependent
9124 reorg to decide.
9125 save_return_addr_p will only be set on non-zarch machines so
9126 there is no risk that r14 goes into an FPR instead of a stack
9127 slot. */
9128 clobbered_regs[RETURN_REGNUM]
9129 |= (!crtl->is_leaf
9130 || TARGET_TPF_PROFILING
9131 || cfun->machine->split_branches_pending_p
9132 || cfun_frame_layout.save_return_addr_p
9133 || crtl->calls_eh_return);
9134
9135 clobbered_regs[STACK_POINTER_REGNUM]
9136 |= (!crtl->is_leaf
9137 || TARGET_TPF_PROFILING
9138 || cfun_save_high_fprs_p
9139 || get_frame_size () > 0
9140 || (reload_completed && cfun_frame_layout.frame_size > 0)
9141 || cfun->calls_alloca);
9142
9143 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
9144
9145 for (i = 6; i < 16; i++)
9146 if (clobbered_regs[i])
9147 cfun_gpr_save_slot (i) = -1;
9148
9149 s390_register_info_stdarg_fpr ();
9150 s390_register_info_gprtofpr ();
9151
9152 /* First find the range of GPRs to be restored. Vararg regs don't
9153 need to be restored so we do it before assigning slots to the
9154 vararg GPRs. */
9155 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9156 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9157 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9158 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9159
9160 /* stdarg functions might need to save GPRs 2 to 6. This might
9161 override the GPR->FPR save decision made above for r6 since
9162 vararg regs must go to the stack. */
9163 s390_register_info_stdarg_gpr ();
9164
9165 /* Now the range of GPRs which need saving. */
9166 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9167 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9168 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9169 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9170 }
9171
9172 /* This function is called by s390_optimize_prologue in order to get
9173 rid of unnecessary GPR save/restore instructions. The register info
9174 for the GPRs is re-computed and the ranges are re-calculated. */
9175
9176 static void
9177 s390_optimize_register_info ()
9178 {
9179 char clobbered_regs[32];
9180 int i, j;
9181
9182 gcc_assert (epilogue_completed);
9183 gcc_assert (!cfun->machine->split_branches_pending_p);
9184
9185 s390_regs_ever_clobbered (clobbered_regs);
9186
9187 for (i = 0; i < 32; i++)
9188 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9189
9190 /* There is still special treatment needed for cases invisible to
9191 s390_regs_ever_clobbered. */
9192 clobbered_regs[RETURN_REGNUM]
9193 |= (TARGET_TPF_PROFILING
9194 /* When expanding builtin_return_addr in ESA mode we do not
9195 know whether r14 will later be needed as scratch reg when
9196 doing branch splitting. So the builtin always accesses the
9197 r14 save slot and we need to stick to the save/restore
9198 decision for r14 even if it turns out that it didn't get
9199 clobbered. */
9200 || cfun_frame_layout.save_return_addr_p
9201 || crtl->calls_eh_return);
9202
9203 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
9204
9205 for (i = 6; i < 16; i++)
9206 if (!clobbered_regs[i])
9207 cfun_gpr_save_slot (i) = 0;
9208
9209 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9210 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9211 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9212 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9213
9214 s390_register_info_stdarg_gpr ();
9215
9216 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9217 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9218 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9219 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9220 }
9221
9222 /* Fill cfun->machine with info about frame of current function. */
9223
9224 static void
9225 s390_frame_info (void)
9226 {
9227 HOST_WIDE_INT lowest_offset;
9228
9229 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9230 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9231
9232 /* The va_arg builtin uses a constant distance of 16 *
9233 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9234 pointer. So even if we are going to save the stack pointer in an
9235 FPR we need the stack space in order to keep the offsets
9236 correct. */
9237 if (cfun->stdarg && cfun_save_arg_fprs_p)
9238 {
9239 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9240
9241 if (cfun_frame_layout.first_save_gpr_slot == -1)
9242 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9243 }
9244
9245 cfun_frame_layout.frame_size = get_frame_size ();
9246 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9247 fatal_error (input_location,
9248 "total size of local variables exceeds architecture limit");
9249
9250 if (!TARGET_PACKED_STACK)
9251 {
9252 /* Fixed stack layout. */
9253 cfun_frame_layout.backchain_offset = 0;
9254 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9255 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9256 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9257 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9258 * UNITS_PER_LONG);
9259 }
9260 else if (TARGET_BACKCHAIN)
9261 {
9262 /* Kernel stack layout - packed stack, backchain, no float */
9263 gcc_assert (TARGET_SOFT_FLOAT);
9264 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9265 - UNITS_PER_LONG);
9266
9267 /* The distance between the backchain and the return address
9268 save slot must not change. So we always need a slot for the
9269 stack pointer which resides in between. */
9270 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9271
9272 cfun_frame_layout.gprs_offset
9273 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9274
9275 /* FPRs will not be saved. Nevertheless pick sane values to
9276 keep area calculations valid. */
9277 cfun_frame_layout.f0_offset =
9278 cfun_frame_layout.f4_offset =
9279 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9280 }
9281 else
9282 {
9283 int num_fprs;
9284
9285 /* Packed stack layout without backchain. */
9286
9287 /* With stdarg FPRs need their dedicated slots. */
9288 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9289 : (cfun_fpr_save_p (FPR4_REGNUM) +
9290 cfun_fpr_save_p (FPR6_REGNUM)));
9291 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9292
9293 num_fprs = (cfun->stdarg ? 2
9294 : (cfun_fpr_save_p (FPR0_REGNUM)
9295 + cfun_fpr_save_p (FPR2_REGNUM)));
9296 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9297
9298 cfun_frame_layout.gprs_offset
9299 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9300
9301 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9302 - cfun_frame_layout.high_fprs * 8);
9303 }
9304
9305 if (cfun_save_high_fprs_p)
9306 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9307
9308 if (!crtl->is_leaf)
9309 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9310
9311 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9312 sized area at the bottom of the stack. This is required also for
9313 leaf functions. When GCC generates a local stack reference it
9314 will always add STACK_POINTER_OFFSET to all these references. */
9315 if (crtl->is_leaf
9316 && !TARGET_TPF_PROFILING
9317 && cfun_frame_layout.frame_size == 0
9318 && !cfun->calls_alloca)
9319 return;
9320
9321 /* Calculate the number of bytes we have used in our own register
9322 save area. With the packed stack layout we can re-use the
9323 remaining bytes for normal stack elements. */
9324
9325 if (TARGET_PACKED_STACK)
9326 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9327 cfun_frame_layout.f4_offset),
9328 cfun_frame_layout.gprs_offset);
9329 else
9330 lowest_offset = 0;
9331
9332 if (TARGET_BACKCHAIN)
9333 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9334
9335 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9336
9337 /* If under 31 bit an odd number of gprs has to be saved we have to
9338 adjust the frame size to sustain 8 byte alignment of stack
9339 frames. */
9340 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9341 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9342 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9343 }
9344
9345 /* Generate frame layout. Fills in register and frame data for the current
9346 function in cfun->machine. This routine can be called multiple times;
9347 it will re-do the complete frame layout every time. */
9348
9349 static void
9350 s390_init_frame_layout (void)
9351 {
9352 HOST_WIDE_INT frame_size;
9353 int base_used;
9354
9355 gcc_assert (!reload_completed);
9356
9357 /* On S/390 machines, we may need to perform branch splitting, which
9358 will require both base and return address register. We have no
9359 choice but to assume we're going to need them until right at the
9360 end of the machine dependent reorg phase. */
9361 if (!TARGET_CPU_ZARCH)
9362 cfun->machine->split_branches_pending_p = true;
9363
9364 do
9365 {
9366 frame_size = cfun_frame_layout.frame_size;
9367
9368 /* Try to predict whether we'll need the base register. */
9369 base_used = cfun->machine->split_branches_pending_p
9370 || crtl->uses_const_pool
9371 || (!DISP_IN_RANGE (frame_size)
9372 && !CONST_OK_FOR_K (frame_size));
9373
9374 /* Decide which register to use as literal pool base. In small
9375 leaf functions, try to use an unused call-clobbered register
9376 as base register to avoid save/restore overhead. */
9377 if (!base_used)
9378 cfun->machine->base_reg = NULL_RTX;
9379 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
9380 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
9381 else
9382 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
9383
9384 s390_register_info ();
9385 s390_frame_info ();
9386 }
9387 while (frame_size != cfun_frame_layout.frame_size);
9388 }
9389
9390 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9391 the TX is nonescaping. A transaction is considered escaping if
9392 there is at least one path from tbegin returning CC0 to the
9393 function exit block without an tend.
9394
9395 The check so far has some limitations:
9396 - only single tbegin/tend BBs are supported
9397 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9398 - when CC is copied to a GPR and the CC0 check is done with the GPR
9399 this is not supported
9400 */
9401
9402 static void
9403 s390_optimize_nonescaping_tx (void)
9404 {
9405 const unsigned int CC0 = 1 << 3;
9406 basic_block tbegin_bb = NULL;
9407 basic_block tend_bb = NULL;
9408 basic_block bb;
9409 rtx_insn *insn;
9410 bool result = true;
9411 int bb_index;
9412 rtx_insn *tbegin_insn = NULL;
9413
9414 if (!cfun->machine->tbegin_p)
9415 return;
9416
9417 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9418 {
9419 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9420
9421 if (!bb)
9422 continue;
9423
9424 FOR_BB_INSNS (bb, insn)
9425 {
9426 rtx ite, cc, pat, target;
9427 unsigned HOST_WIDE_INT mask;
9428
9429 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9430 continue;
9431
9432 pat = PATTERN (insn);
9433
9434 if (GET_CODE (pat) == PARALLEL)
9435 pat = XVECEXP (pat, 0, 0);
9436
9437 if (GET_CODE (pat) != SET
9438 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9439 continue;
9440
9441 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9442 {
9443 rtx_insn *tmp;
9444
9445 tbegin_insn = insn;
9446
9447 /* Just return if the tbegin doesn't have clobbers. */
9448 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9449 return;
9450
9451 if (tbegin_bb != NULL)
9452 return;
9453
9454 /* Find the next conditional jump. */
9455 for (tmp = NEXT_INSN (insn);
9456 tmp != NULL_RTX;
9457 tmp = NEXT_INSN (tmp))
9458 {
9459 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9460 return;
9461 if (!JUMP_P (tmp))
9462 continue;
9463
9464 ite = SET_SRC (PATTERN (tmp));
9465 if (GET_CODE (ite) != IF_THEN_ELSE)
9466 continue;
9467
9468 cc = XEXP (XEXP (ite, 0), 0);
9469 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9470 || GET_MODE (cc) != CCRAWmode
9471 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9472 return;
9473
9474 if (bb->succs->length () != 2)
9475 return;
9476
9477 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9478 if (GET_CODE (XEXP (ite, 0)) == NE)
9479 mask ^= 0xf;
9480
9481 if (mask == CC0)
9482 target = XEXP (ite, 1);
9483 else if (mask == (CC0 ^ 0xf))
9484 target = XEXP (ite, 2);
9485 else
9486 return;
9487
9488 {
9489 edge_iterator ei;
9490 edge e1, e2;
9491
9492 ei = ei_start (bb->succs);
9493 e1 = ei_safe_edge (ei);
9494 ei_next (&ei);
9495 e2 = ei_safe_edge (ei);
9496
9497 if (e2->flags & EDGE_FALLTHRU)
9498 {
9499 e2 = e1;
9500 e1 = ei_safe_edge (ei);
9501 }
9502
9503 if (!(e1->flags & EDGE_FALLTHRU))
9504 return;
9505
9506 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9507 }
9508 if (tmp == BB_END (bb))
9509 break;
9510 }
9511 }
9512
9513 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9514 {
9515 if (tend_bb != NULL)
9516 return;
9517 tend_bb = bb;
9518 }
9519 }
9520 }
9521
9522 /* Either we successfully remove the FPR clobbers here or we are not
9523 able to do anything for this TX. Both cases don't qualify for
9524 another look. */
9525 cfun->machine->tbegin_p = false;
9526
9527 if (tbegin_bb == NULL || tend_bb == NULL)
9528 return;
9529
9530 calculate_dominance_info (CDI_POST_DOMINATORS);
9531 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9532 free_dominance_info (CDI_POST_DOMINATORS);
9533
9534 if (!result)
9535 return;
9536
9537 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9538 gen_rtvec (2,
9539 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9540 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9541 INSN_CODE (tbegin_insn) = -1;
9542 df_insn_rescan (tbegin_insn);
9543
9544 return;
9545 }
9546
9547 /* Return true if it is legal to put a value with MODE into REGNO. */
9548
9549 bool
9550 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9551 {
9552 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9553 return false;
9554
9555 switch (REGNO_REG_CLASS (regno))
9556 {
9557 case VEC_REGS:
9558 return ((GET_MODE_CLASS (mode) == MODE_INT
9559 && s390_class_max_nregs (VEC_REGS, mode) == 1)
9560 || mode == DFmode
9561 || s390_vector_mode_supported_p (mode));
9562 break;
9563 case FP_REGS:
9564 if (TARGET_VX
9565 && ((GET_MODE_CLASS (mode) == MODE_INT
9566 && s390_class_max_nregs (FP_REGS, mode) == 1)
9567 || mode == DFmode
9568 || s390_vector_mode_supported_p (mode)))
9569 return true;
9570
9571 if (REGNO_PAIR_OK (regno, mode))
9572 {
9573 if (mode == SImode || mode == DImode)
9574 return true;
9575
9576 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9577 return true;
9578 }
9579 break;
9580 case ADDR_REGS:
9581 if (FRAME_REGNO_P (regno) && mode == Pmode)
9582 return true;
9583
9584 /* fallthrough */
9585 case GENERAL_REGS:
9586 if (REGNO_PAIR_OK (regno, mode))
9587 {
9588 if (TARGET_ZARCH
9589 || (mode != TFmode && mode != TCmode && mode != TDmode))
9590 return true;
9591 }
9592 break;
9593 case CC_REGS:
9594 if (GET_MODE_CLASS (mode) == MODE_CC)
9595 return true;
9596 break;
9597 case ACCESS_REGS:
9598 if (REGNO_PAIR_OK (regno, mode))
9599 {
9600 if (mode == SImode || mode == Pmode)
9601 return true;
9602 }
9603 break;
9604 default:
9605 return false;
9606 }
9607
9608 return false;
9609 }
9610
9611 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
9612
9613 bool
9614 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
9615 {
9616 /* Once we've decided upon a register to use as base register, it must
9617 no longer be used for any other purpose. */
9618 if (cfun->machine->base_reg)
9619 if (REGNO (cfun->machine->base_reg) == old_reg
9620 || REGNO (cfun->machine->base_reg) == new_reg)
9621 return false;
9622
9623 /* Prevent regrename from using call-saved regs which haven't
9624 actually been saved. This is necessary since regrename assumes
9625 the backend save/restore decisions are based on
9626 df_regs_ever_live. Since we have our own routine we have to tell
9627 regrename manually about it. */
9628 if (GENERAL_REGNO_P (new_reg)
9629 && !call_really_used_regs[new_reg]
9630 && cfun_gpr_save_slot (new_reg) == 0)
9631 return false;
9632
9633 return true;
9634 }
9635
9636 /* Return nonzero if register REGNO can be used as a scratch register
9637 in peephole2. */
9638
9639 static bool
9640 s390_hard_regno_scratch_ok (unsigned int regno)
9641 {
9642 /* See s390_hard_regno_rename_ok. */
9643 if (GENERAL_REGNO_P (regno)
9644 && !call_really_used_regs[regno]
9645 && cfun_gpr_save_slot (regno) == 0)
9646 return false;
9647
9648 return true;
9649 }
9650
9651 /* Maximum number of registers to represent a value of mode MODE
9652 in a register of class RCLASS. */
9653
9654 int
9655 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
9656 {
9657 int reg_size;
9658 bool reg_pair_required_p = false;
9659
9660 switch (rclass)
9661 {
9662 case FP_REGS:
9663 case VEC_REGS:
9664 reg_size = TARGET_VX ? 16 : 8;
9665
9666 /* TF and TD modes would fit into a VR but we put them into a
9667 register pair since we do not have 128bit FP instructions on
9668 full VRs. */
9669 if (TARGET_VX
9670 && SCALAR_FLOAT_MODE_P (mode)
9671 && GET_MODE_SIZE (mode) >= 16)
9672 reg_pair_required_p = true;
9673
9674 /* Even if complex types would fit into a single FPR/VR we force
9675 them into a register pair to deal with the parts more easily.
9676 (FIXME: What about complex ints?) */
9677 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9678 reg_pair_required_p = true;
9679 break;
9680 case ACCESS_REGS:
9681 reg_size = 4;
9682 break;
9683 default:
9684 reg_size = UNITS_PER_WORD;
9685 break;
9686 }
9687
9688 if (reg_pair_required_p)
9689 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
9690
9691 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
9692 }
9693
9694 /* Return TRUE if changing mode from FROM to TO should not be allowed
9695 for register class CLASS. */
9696
9697 int
9698 s390_cannot_change_mode_class (machine_mode from_mode,
9699 machine_mode to_mode,
9700 enum reg_class rclass)
9701 {
9702 machine_mode small_mode;
9703 machine_mode big_mode;
9704
9705 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9706 return 0;
9707
9708 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9709 {
9710 small_mode = from_mode;
9711 big_mode = to_mode;
9712 }
9713 else
9714 {
9715 small_mode = to_mode;
9716 big_mode = from_mode;
9717 }
9718
9719 /* Values residing in VRs are little-endian style. All modes are
9720 placed left-aligned in an VR. This means that we cannot allow
9721 switching between modes with differing sizes. Also if the vector
9722 facility is available we still place TFmode values in VR register
9723 pairs, since the only instructions we have operating on TFmodes
9724 only deal with register pairs. Therefore we have to allow DFmode
9725 subregs of TFmodes to enable the TFmode splitters. */
9726 if (reg_classes_intersect_p (VEC_REGS, rclass)
9727 && (GET_MODE_SIZE (small_mode) < 8
9728 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9729 return 1;
9730
9731 /* Likewise for access registers, since they have only half the
9732 word size on 64-bit. */
9733 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9734 return 1;
9735
9736 return 0;
9737 }
9738
9739 /* Return true if we use LRA instead of reload pass. */
9740 static bool
9741 s390_lra_p (void)
9742 {
9743 return s390_lra_flag;
9744 }
9745
9746 /* Return true if register FROM can be eliminated via register TO. */
9747
9748 static bool
9749 s390_can_eliminate (const int from, const int to)
9750 {
9751 /* On zSeries machines, we have not marked the base register as fixed.
9752 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
9753 If a function requires the base register, we say here that this
9754 elimination cannot be performed. This will cause reload to free
9755 up the base register (as if it were fixed). On the other hand,
9756 if the current function does *not* require the base register, we
9757 say here the elimination succeeds, which in turn allows reload
9758 to allocate the base register for any other purpose. */
9759 if (from == BASE_REGNUM && to == BASE_REGNUM)
9760 {
9761 if (TARGET_CPU_ZARCH)
9762 {
9763 s390_init_frame_layout ();
9764 return cfun->machine->base_reg == NULL_RTX;
9765 }
9766
9767 return false;
9768 }
9769
9770 /* Everything else must point into the stack frame. */
9771 gcc_assert (to == STACK_POINTER_REGNUM
9772 || to == HARD_FRAME_POINTER_REGNUM);
9773
9774 gcc_assert (from == FRAME_POINTER_REGNUM
9775 || from == ARG_POINTER_REGNUM
9776 || from == RETURN_ADDRESS_POINTER_REGNUM);
9777
9778 /* Make sure we actually saved the return address. */
9779 if (from == RETURN_ADDRESS_POINTER_REGNUM)
9780 if (!crtl->calls_eh_return
9781 && !cfun->stdarg
9782 && !cfun_frame_layout.save_return_addr_p)
9783 return false;
9784
9785 return true;
9786 }
9787
9788 /* Return offset between register FROM and TO initially after prolog. */
9789
9790 HOST_WIDE_INT
9791 s390_initial_elimination_offset (int from, int to)
9792 {
9793 HOST_WIDE_INT offset;
9794
9795 /* ??? Why are we called for non-eliminable pairs? */
9796 if (!s390_can_eliminate (from, to))
9797 return 0;
9798
9799 switch (from)
9800 {
9801 case FRAME_POINTER_REGNUM:
9802 offset = (get_frame_size()
9803 + STACK_POINTER_OFFSET
9804 + crtl->outgoing_args_size);
9805 break;
9806
9807 case ARG_POINTER_REGNUM:
9808 s390_init_frame_layout ();
9809 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
9810 break;
9811
9812 case RETURN_ADDRESS_POINTER_REGNUM:
9813 s390_init_frame_layout ();
9814
9815 if (cfun_frame_layout.first_save_gpr_slot == -1)
9816 {
9817 /* If it turns out that for stdarg nothing went into the reg
9818 save area we also do not need the return address
9819 pointer. */
9820 if (cfun->stdarg && !cfun_save_arg_fprs_p)
9821 return 0;
9822
9823 gcc_unreachable ();
9824 }
9825
9826 /* In order to make the following work it is not necessary for
9827 r14 to have a save slot. It is sufficient if one other GPR
9828 got one. Since the GPRs are always stored without gaps we
9829 are able to calculate where the r14 save slot would
9830 reside. */
9831 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
9832 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
9833 UNITS_PER_LONG);
9834 break;
9835
9836 case BASE_REGNUM:
9837 offset = 0;
9838 break;
9839
9840 default:
9841 gcc_unreachable ();
9842 }
9843
9844 return offset;
9845 }
9846
9847 /* Emit insn to save fpr REGNUM at offset OFFSET relative
9848 to register BASE. Return generated insn. */
9849
9850 static rtx
9851 save_fpr (rtx base, int offset, int regnum)
9852 {
9853 rtx addr;
9854 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9855
9856 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
9857 set_mem_alias_set (addr, get_varargs_alias_set ());
9858 else
9859 set_mem_alias_set (addr, get_frame_alias_set ());
9860
9861 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
9862 }
9863
9864 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
9865 to register BASE. Return generated insn. */
9866
9867 static rtx
9868 restore_fpr (rtx base, int offset, int regnum)
9869 {
9870 rtx addr;
9871 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9872 set_mem_alias_set (addr, get_frame_alias_set ());
9873
9874 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
9875 }
9876
9877 /* Return true if REGNO is a global register, but not one
9878 of the special ones that need to be saved/restored in anyway. */
9879
9880 static inline bool
9881 global_not_special_regno_p (int regno)
9882 {
9883 return (global_regs[regno]
9884 /* These registers are special and need to be
9885 restored in any case. */
9886 && !(regno == STACK_POINTER_REGNUM
9887 || regno == RETURN_REGNUM
9888 || regno == BASE_REGNUM
9889 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9890 }
9891
9892 /* Generate insn to save registers FIRST to LAST into
9893 the register save area located at offset OFFSET
9894 relative to register BASE. */
9895
9896 static rtx
9897 save_gprs (rtx base, int offset, int first, int last)
9898 {
9899 rtx addr, insn, note;
9900 int i;
9901
9902 addr = plus_constant (Pmode, base, offset);
9903 addr = gen_rtx_MEM (Pmode, addr);
9904
9905 set_mem_alias_set (addr, get_frame_alias_set ());
9906
9907 /* Special-case single register. */
9908 if (first == last)
9909 {
9910 if (TARGET_64BIT)
9911 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
9912 else
9913 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
9914
9915 if (!global_not_special_regno_p (first))
9916 RTX_FRAME_RELATED_P (insn) = 1;
9917 return insn;
9918 }
9919
9920
9921 insn = gen_store_multiple (addr,
9922 gen_rtx_REG (Pmode, first),
9923 GEN_INT (last - first + 1));
9924
9925 if (first <= 6 && cfun->stdarg)
9926 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9927 {
9928 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
9929
9930 if (first + i <= 6)
9931 set_mem_alias_set (mem, get_varargs_alias_set ());
9932 }
9933
9934 /* We need to set the FRAME_RELATED flag on all SETs
9935 inside the store-multiple pattern.
9936
9937 However, we must not emit DWARF records for registers 2..5
9938 if they are stored for use by variable arguments ...
9939
9940 ??? Unfortunately, it is not enough to simply not the
9941 FRAME_RELATED flags for those SETs, because the first SET
9942 of the PARALLEL is always treated as if it had the flag
9943 set, even if it does not. Therefore we emit a new pattern
9944 without those registers as REG_FRAME_RELATED_EXPR note. */
9945
9946 if (first >= 6 && !global_not_special_regno_p (first))
9947 {
9948 rtx pat = PATTERN (insn);
9949
9950 for (i = 0; i < XVECLEN (pat, 0); i++)
9951 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
9952 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
9953 0, i)))))
9954 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
9955
9956 RTX_FRAME_RELATED_P (insn) = 1;
9957 }
9958 else if (last >= 6)
9959 {
9960 int start;
9961
9962 for (start = first >= 6 ? first : 6; start <= last; start++)
9963 if (!global_not_special_regno_p (start))
9964 break;
9965
9966 if (start > last)
9967 return insn;
9968
9969 addr = plus_constant (Pmode, base,
9970 offset + (start - first) * UNITS_PER_LONG);
9971
9972 if (start == last)
9973 {
9974 if (TARGET_64BIT)
9975 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
9976 gen_rtx_REG (Pmode, start));
9977 else
9978 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
9979 gen_rtx_REG (Pmode, start));
9980 note = PATTERN (note);
9981
9982 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9983 RTX_FRAME_RELATED_P (insn) = 1;
9984
9985 return insn;
9986 }
9987
9988 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
9989 gen_rtx_REG (Pmode, start),
9990 GEN_INT (last - start + 1));
9991 note = PATTERN (note);
9992
9993 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9994
9995 for (i = 0; i < XVECLEN (note, 0); i++)
9996 if (GET_CODE (XVECEXP (note, 0, i)) == SET
9997 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
9998 0, i)))))
9999 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10000
10001 RTX_FRAME_RELATED_P (insn) = 1;
10002 }
10003
10004 return insn;
10005 }
10006
10007 /* Generate insn to restore registers FIRST to LAST from
10008 the register save area located at offset OFFSET
10009 relative to register BASE. */
10010
10011 static rtx
10012 restore_gprs (rtx base, int offset, int first, int last)
10013 {
10014 rtx addr, insn;
10015
10016 addr = plus_constant (Pmode, base, offset);
10017 addr = gen_rtx_MEM (Pmode, addr);
10018 set_mem_alias_set (addr, get_frame_alias_set ());
10019
10020 /* Special-case single register. */
10021 if (first == last)
10022 {
10023 if (TARGET_64BIT)
10024 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10025 else
10026 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10027
10028 RTX_FRAME_RELATED_P (insn) = 1;
10029 return insn;
10030 }
10031
10032 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10033 addr,
10034 GEN_INT (last - first + 1));
10035 RTX_FRAME_RELATED_P (insn) = 1;
10036 return insn;
10037 }
10038
10039 /* Return insn sequence to load the GOT register. */
10040
10041 static GTY(()) rtx got_symbol;
10042 rtx_insn *
10043 s390_load_got (void)
10044 {
10045 rtx_insn *insns;
10046
10047 /* We cannot use pic_offset_table_rtx here since we use this
10048 function also for non-pic if __tls_get_offset is called and in
10049 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10050 aren't usable. */
10051 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10052
10053 if (!got_symbol)
10054 {
10055 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10056 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10057 }
10058
10059 start_sequence ();
10060
10061 if (TARGET_CPU_ZARCH)
10062 {
10063 emit_move_insn (got_rtx, got_symbol);
10064 }
10065 else
10066 {
10067 rtx offset;
10068
10069 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10070 UNSPEC_LTREL_OFFSET);
10071 offset = gen_rtx_CONST (Pmode, offset);
10072 offset = force_const_mem (Pmode, offset);
10073
10074 emit_move_insn (got_rtx, offset);
10075
10076 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10077 UNSPEC_LTREL_BASE);
10078 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10079
10080 emit_move_insn (got_rtx, offset);
10081 }
10082
10083 insns = get_insns ();
10084 end_sequence ();
10085 return insns;
10086 }
10087
10088 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10089 and the change to the stack pointer. */
10090
10091 static void
10092 s390_emit_stack_tie (void)
10093 {
10094 rtx mem = gen_frame_mem (BLKmode,
10095 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10096
10097 emit_insn (gen_stack_tie (mem));
10098 }
10099
10100 /* Copy GPRS into FPR save slots. */
10101
10102 static void
10103 s390_save_gprs_to_fprs (void)
10104 {
10105 int i;
10106
10107 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10108 return;
10109
10110 for (i = 6; i < 16; i++)
10111 {
10112 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10113 {
10114 rtx_insn *insn =
10115 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10116 gen_rtx_REG (DImode, i));
10117 RTX_FRAME_RELATED_P (insn) = 1;
10118 }
10119 }
10120 }
10121
10122 /* Restore GPRs from FPR save slots. */
10123
10124 static void
10125 s390_restore_gprs_from_fprs (void)
10126 {
10127 int i;
10128
10129 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10130 return;
10131
10132 for (i = 6; i < 16; i++)
10133 {
10134 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10135 {
10136 rtx_insn *insn =
10137 emit_move_insn (gen_rtx_REG (DImode, i),
10138 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
10139 df_set_regs_ever_live (i, true);
10140 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10141 if (i == STACK_POINTER_REGNUM)
10142 add_reg_note (insn, REG_CFA_DEF_CFA,
10143 plus_constant (Pmode, stack_pointer_rtx,
10144 STACK_POINTER_OFFSET));
10145 RTX_FRAME_RELATED_P (insn) = 1;
10146 }
10147 }
10148 }
10149
10150
10151 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10152 generation. */
10153
10154 namespace {
10155
10156 const pass_data pass_data_s390_early_mach =
10157 {
10158 RTL_PASS, /* type */
10159 "early_mach", /* name */
10160 OPTGROUP_NONE, /* optinfo_flags */
10161 TV_MACH_DEP, /* tv_id */
10162 0, /* properties_required */
10163 0, /* properties_provided */
10164 0, /* properties_destroyed */
10165 0, /* todo_flags_start */
10166 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10167 };
10168
10169 class pass_s390_early_mach : public rtl_opt_pass
10170 {
10171 public:
10172 pass_s390_early_mach (gcc::context *ctxt)
10173 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10174 {}
10175
10176 /* opt_pass methods: */
10177 virtual unsigned int execute (function *);
10178
10179 }; // class pass_s390_early_mach
10180
10181 unsigned int
10182 pass_s390_early_mach::execute (function *fun)
10183 {
10184 rtx_insn *insn;
10185
10186 /* Try to get rid of the FPR clobbers. */
10187 s390_optimize_nonescaping_tx ();
10188
10189 /* Re-compute register info. */
10190 s390_register_info ();
10191
10192 /* If we're using a base register, ensure that it is always valid for
10193 the first non-prologue instruction. */
10194 if (fun->machine->base_reg)
10195 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10196
10197 /* Annotate all constant pool references to let the scheduler know
10198 they implicitly use the base register. */
10199 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10200 if (INSN_P (insn))
10201 {
10202 annotate_constant_pool_refs (&PATTERN (insn));
10203 df_insn_rescan (insn);
10204 }
10205 return 0;
10206 }
10207
10208 } // anon namespace
10209
10210 /* Expand the prologue into a bunch of separate insns. */
10211
10212 void
10213 s390_emit_prologue (void)
10214 {
10215 rtx insn, addr;
10216 rtx temp_reg;
10217 int i;
10218 int offset;
10219 int next_fpr = 0;
10220
10221 /* Choose best register to use for temp use within prologue.
10222 See below for why TPF must use the register 1. */
10223
10224 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10225 && !crtl->is_leaf
10226 && !TARGET_TPF_PROFILING)
10227 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10228 else
10229 temp_reg = gen_rtx_REG (Pmode, 1);
10230
10231 s390_save_gprs_to_fprs ();
10232
10233 /* Save call saved gprs. */
10234 if (cfun_frame_layout.first_save_gpr != -1)
10235 {
10236 insn = save_gprs (stack_pointer_rtx,
10237 cfun_frame_layout.gprs_offset +
10238 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10239 - cfun_frame_layout.first_save_gpr_slot),
10240 cfun_frame_layout.first_save_gpr,
10241 cfun_frame_layout.last_save_gpr);
10242 emit_insn (insn);
10243 }
10244
10245 /* Dummy insn to mark literal pool slot. */
10246
10247 if (cfun->machine->base_reg)
10248 emit_insn (gen_main_pool (cfun->machine->base_reg));
10249
10250 offset = cfun_frame_layout.f0_offset;
10251
10252 /* Save f0 and f2. */
10253 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10254 {
10255 if (cfun_fpr_save_p (i))
10256 {
10257 save_fpr (stack_pointer_rtx, offset, i);
10258 offset += 8;
10259 }
10260 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10261 offset += 8;
10262 }
10263
10264 /* Save f4 and f6. */
10265 offset = cfun_frame_layout.f4_offset;
10266 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10267 {
10268 if (cfun_fpr_save_p (i))
10269 {
10270 insn = save_fpr (stack_pointer_rtx, offset, i);
10271 offset += 8;
10272
10273 /* If f4 and f6 are call clobbered they are saved due to
10274 stdargs and therefore are not frame related. */
10275 if (!call_really_used_regs[i])
10276 RTX_FRAME_RELATED_P (insn) = 1;
10277 }
10278 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10279 offset += 8;
10280 }
10281
10282 if (TARGET_PACKED_STACK
10283 && cfun_save_high_fprs_p
10284 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10285 {
10286 offset = (cfun_frame_layout.f8_offset
10287 + (cfun_frame_layout.high_fprs - 1) * 8);
10288
10289 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10290 if (cfun_fpr_save_p (i))
10291 {
10292 insn = save_fpr (stack_pointer_rtx, offset, i);
10293
10294 RTX_FRAME_RELATED_P (insn) = 1;
10295 offset -= 8;
10296 }
10297 if (offset >= cfun_frame_layout.f8_offset)
10298 next_fpr = i;
10299 }
10300
10301 if (!TARGET_PACKED_STACK)
10302 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10303
10304 if (flag_stack_usage_info)
10305 current_function_static_stack_size = cfun_frame_layout.frame_size;
10306
10307 /* Decrement stack pointer. */
10308
10309 if (cfun_frame_layout.frame_size > 0)
10310 {
10311 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10312 rtx real_frame_off;
10313
10314 if (s390_stack_size)
10315 {
10316 HOST_WIDE_INT stack_guard;
10317
10318 if (s390_stack_guard)
10319 stack_guard = s390_stack_guard;
10320 else
10321 {
10322 /* If no value for stack guard is provided the smallest power of 2
10323 larger than the current frame size is chosen. */
10324 stack_guard = 1;
10325 while (stack_guard < cfun_frame_layout.frame_size)
10326 stack_guard <<= 1;
10327 }
10328
10329 if (cfun_frame_layout.frame_size >= s390_stack_size)
10330 {
10331 warning (0, "frame size of function %qs is %wd"
10332 " bytes exceeding user provided stack limit of "
10333 "%d bytes. "
10334 "An unconditional trap is added.",
10335 current_function_name(), cfun_frame_layout.frame_size,
10336 s390_stack_size);
10337 emit_insn (gen_trap ());
10338 }
10339 else
10340 {
10341 /* stack_guard has to be smaller than s390_stack_size.
10342 Otherwise we would emit an AND with zero which would
10343 not match the test under mask pattern. */
10344 if (stack_guard >= s390_stack_size)
10345 {
10346 warning (0, "frame size of function %qs is %wd"
10347 " bytes which is more than half the stack size. "
10348 "The dynamic check would not be reliable. "
10349 "No check emitted for this function.",
10350 current_function_name(),
10351 cfun_frame_layout.frame_size);
10352 }
10353 else
10354 {
10355 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10356 & ~(stack_guard - 1));
10357
10358 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10359 GEN_INT (stack_check_mask));
10360 if (TARGET_64BIT)
10361 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10362 t, const0_rtx),
10363 t, const0_rtx, const0_rtx));
10364 else
10365 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10366 t, const0_rtx),
10367 t, const0_rtx, const0_rtx));
10368 }
10369 }
10370 }
10371
10372 if (s390_warn_framesize > 0
10373 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10374 warning (0, "frame size of %qs is %wd bytes",
10375 current_function_name (), cfun_frame_layout.frame_size);
10376
10377 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10378 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10379
10380 /* Save incoming stack pointer into temp reg. */
10381 if (TARGET_BACKCHAIN || next_fpr)
10382 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10383
10384 /* Subtract frame size from stack pointer. */
10385
10386 if (DISP_IN_RANGE (INTVAL (frame_off)))
10387 {
10388 insn = gen_rtx_SET (stack_pointer_rtx,
10389 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10390 frame_off));
10391 insn = emit_insn (insn);
10392 }
10393 else
10394 {
10395 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10396 frame_off = force_const_mem (Pmode, frame_off);
10397
10398 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10399 annotate_constant_pool_refs (&PATTERN (insn));
10400 }
10401
10402 RTX_FRAME_RELATED_P (insn) = 1;
10403 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10404 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10405 gen_rtx_SET (stack_pointer_rtx,
10406 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10407 real_frame_off)));
10408
10409 /* Set backchain. */
10410
10411 if (TARGET_BACKCHAIN)
10412 {
10413 if (cfun_frame_layout.backchain_offset)
10414 addr = gen_rtx_MEM (Pmode,
10415 plus_constant (Pmode, stack_pointer_rtx,
10416 cfun_frame_layout.backchain_offset));
10417 else
10418 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10419 set_mem_alias_set (addr, get_frame_alias_set ());
10420 insn = emit_insn (gen_move_insn (addr, temp_reg));
10421 }
10422
10423 /* If we support non-call exceptions (e.g. for Java),
10424 we need to make sure the backchain pointer is set up
10425 before any possibly trapping memory access. */
10426 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10427 {
10428 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10429 emit_clobber (addr);
10430 }
10431 }
10432
10433 /* Save fprs 8 - 15 (64 bit ABI). */
10434
10435 if (cfun_save_high_fprs_p && next_fpr)
10436 {
10437 /* If the stack might be accessed through a different register
10438 we have to make sure that the stack pointer decrement is not
10439 moved below the use of the stack slots. */
10440 s390_emit_stack_tie ();
10441
10442 insn = emit_insn (gen_add2_insn (temp_reg,
10443 GEN_INT (cfun_frame_layout.f8_offset)));
10444
10445 offset = 0;
10446
10447 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10448 if (cfun_fpr_save_p (i))
10449 {
10450 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10451 cfun_frame_layout.frame_size
10452 + cfun_frame_layout.f8_offset
10453 + offset);
10454
10455 insn = save_fpr (temp_reg, offset, i);
10456 offset += 8;
10457 RTX_FRAME_RELATED_P (insn) = 1;
10458 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10459 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10460 gen_rtx_REG (DFmode, i)));
10461 }
10462 }
10463
10464 /* Set frame pointer, if needed. */
10465
10466 if (frame_pointer_needed)
10467 {
10468 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10469 RTX_FRAME_RELATED_P (insn) = 1;
10470 }
10471
10472 /* Set up got pointer, if needed. */
10473
10474 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10475 {
10476 rtx_insn *insns = s390_load_got ();
10477
10478 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10479 annotate_constant_pool_refs (&PATTERN (insn));
10480
10481 emit_insn (insns);
10482 }
10483
10484 if (TARGET_TPF_PROFILING)
10485 {
10486 /* Generate a BAS instruction to serve as a function
10487 entry intercept to facilitate the use of tracing
10488 algorithms located at the branch target. */
10489 emit_insn (gen_prologue_tpf ());
10490
10491 /* Emit a blockage here so that all code
10492 lies between the profiling mechanisms. */
10493 emit_insn (gen_blockage ());
10494 }
10495 }
10496
10497 /* Expand the epilogue into a bunch of separate insns. */
10498
10499 void
10500 s390_emit_epilogue (bool sibcall)
10501 {
10502 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10503 int area_bottom, area_top, offset = 0;
10504 int next_offset;
10505 rtvec p;
10506 int i;
10507
10508 if (TARGET_TPF_PROFILING)
10509 {
10510
10511 /* Generate a BAS instruction to serve as a function
10512 entry intercept to facilitate the use of tracing
10513 algorithms located at the branch target. */
10514
10515 /* Emit a blockage here so that all code
10516 lies between the profiling mechanisms. */
10517 emit_insn (gen_blockage ());
10518
10519 emit_insn (gen_epilogue_tpf ());
10520 }
10521
10522 /* Check whether to use frame or stack pointer for restore. */
10523
10524 frame_pointer = (frame_pointer_needed
10525 ? hard_frame_pointer_rtx : stack_pointer_rtx);
10526
10527 s390_frame_area (&area_bottom, &area_top);
10528
10529 /* Check whether we can access the register save area.
10530 If not, increment the frame pointer as required. */
10531
10532 if (area_top <= area_bottom)
10533 {
10534 /* Nothing to restore. */
10535 }
10536 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10537 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10538 {
10539 /* Area is in range. */
10540 offset = cfun_frame_layout.frame_size;
10541 }
10542 else
10543 {
10544 rtx insn, frame_off, cfa;
10545
10546 offset = area_bottom < 0 ? -area_bottom : 0;
10547 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10548
10549 cfa = gen_rtx_SET (frame_pointer,
10550 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10551 if (DISP_IN_RANGE (INTVAL (frame_off)))
10552 {
10553 insn = gen_rtx_SET (frame_pointer,
10554 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10555 insn = emit_insn (insn);
10556 }
10557 else
10558 {
10559 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10560 frame_off = force_const_mem (Pmode, frame_off);
10561
10562 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10563 annotate_constant_pool_refs (&PATTERN (insn));
10564 }
10565 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10566 RTX_FRAME_RELATED_P (insn) = 1;
10567 }
10568
10569 /* Restore call saved fprs. */
10570
10571 if (TARGET_64BIT)
10572 {
10573 if (cfun_save_high_fprs_p)
10574 {
10575 next_offset = cfun_frame_layout.f8_offset;
10576 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10577 {
10578 if (cfun_fpr_save_p (i))
10579 {
10580 restore_fpr (frame_pointer,
10581 offset + next_offset, i);
10582 cfa_restores
10583 = alloc_reg_note (REG_CFA_RESTORE,
10584 gen_rtx_REG (DFmode, i), cfa_restores);
10585 next_offset += 8;
10586 }
10587 }
10588 }
10589
10590 }
10591 else
10592 {
10593 next_offset = cfun_frame_layout.f4_offset;
10594 /* f4, f6 */
10595 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10596 {
10597 if (cfun_fpr_save_p (i))
10598 {
10599 restore_fpr (frame_pointer,
10600 offset + next_offset, i);
10601 cfa_restores
10602 = alloc_reg_note (REG_CFA_RESTORE,
10603 gen_rtx_REG (DFmode, i), cfa_restores);
10604 next_offset += 8;
10605 }
10606 else if (!TARGET_PACKED_STACK)
10607 next_offset += 8;
10608 }
10609
10610 }
10611
10612 /* Return register. */
10613
10614 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10615
10616 /* Restore call saved gprs. */
10617
10618 if (cfun_frame_layout.first_restore_gpr != -1)
10619 {
10620 rtx insn, addr;
10621 int i;
10622
10623 /* Check for global register and save them
10624 to stack location from where they get restored. */
10625
10626 for (i = cfun_frame_layout.first_restore_gpr;
10627 i <= cfun_frame_layout.last_restore_gpr;
10628 i++)
10629 {
10630 if (global_not_special_regno_p (i))
10631 {
10632 addr = plus_constant (Pmode, frame_pointer,
10633 offset + cfun_frame_layout.gprs_offset
10634 + (i - cfun_frame_layout.first_save_gpr_slot)
10635 * UNITS_PER_LONG);
10636 addr = gen_rtx_MEM (Pmode, addr);
10637 set_mem_alias_set (addr, get_frame_alias_set ());
10638 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
10639 }
10640 else
10641 cfa_restores
10642 = alloc_reg_note (REG_CFA_RESTORE,
10643 gen_rtx_REG (Pmode, i), cfa_restores);
10644 }
10645
10646 if (! sibcall)
10647 {
10648 /* Fetch return address from stack before load multiple,
10649 this will do good for scheduling.
10650
10651 Only do this if we already decided that r14 needs to be
10652 saved to a stack slot. (And not just because r14 happens to
10653 be in between two GPRs which need saving.) Otherwise it
10654 would be difficult to take that decision back in
10655 s390_optimize_prologue. */
10656 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
10657 {
10658 int return_regnum = find_unused_clobbered_reg();
10659 if (!return_regnum)
10660 return_regnum = 4;
10661 return_reg = gen_rtx_REG (Pmode, return_regnum);
10662
10663 addr = plus_constant (Pmode, frame_pointer,
10664 offset + cfun_frame_layout.gprs_offset
10665 + (RETURN_REGNUM
10666 - cfun_frame_layout.first_save_gpr_slot)
10667 * UNITS_PER_LONG);
10668 addr = gen_rtx_MEM (Pmode, addr);
10669 set_mem_alias_set (addr, get_frame_alias_set ());
10670 emit_move_insn (return_reg, addr);
10671
10672 /* Once we did that optimization we have to make sure
10673 s390_optimize_prologue does not try to remove the
10674 store of r14 since we will not be able to find the
10675 load issued here. */
10676 cfun_frame_layout.save_return_addr_p = true;
10677 }
10678 }
10679
10680 insn = restore_gprs (frame_pointer,
10681 offset + cfun_frame_layout.gprs_offset
10682 + (cfun_frame_layout.first_restore_gpr
10683 - cfun_frame_layout.first_save_gpr_slot)
10684 * UNITS_PER_LONG,
10685 cfun_frame_layout.first_restore_gpr,
10686 cfun_frame_layout.last_restore_gpr);
10687 insn = emit_insn (insn);
10688 REG_NOTES (insn) = cfa_restores;
10689 add_reg_note (insn, REG_CFA_DEF_CFA,
10690 plus_constant (Pmode, stack_pointer_rtx,
10691 STACK_POINTER_OFFSET));
10692 RTX_FRAME_RELATED_P (insn) = 1;
10693 }
10694
10695 s390_restore_gprs_from_fprs ();
10696
10697 if (! sibcall)
10698 {
10699
10700 /* Return to caller. */
10701
10702 p = rtvec_alloc (2);
10703
10704 RTVEC_ELT (p, 0) = ret_rtx;
10705 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10706 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10707 }
10708 }
10709
10710 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
10711
10712 static void
10713 s300_set_up_by_prologue (hard_reg_set_container *regs)
10714 {
10715 if (cfun->machine->base_reg
10716 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10717 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10718 }
10719
10720 /* Return true if the function can use simple_return to return outside
10721 of a shrink-wrapped region. At present shrink-wrapping is supported
10722 in all cases. */
10723
10724 bool
10725 s390_can_use_simple_return_insn (void)
10726 {
10727 return true;
10728 }
10729
10730 /* Return true if the epilogue is guaranteed to contain only a return
10731 instruction and if a direct return can therefore be used instead.
10732 One of the main advantages of using direct return instructions
10733 is that we can then use conditional returns. */
10734
10735 bool
10736 s390_can_use_return_insn (void)
10737 {
10738 int i;
10739
10740 if (!reload_completed)
10741 return false;
10742
10743 if (crtl->profile)
10744 return false;
10745
10746 if (TARGET_TPF_PROFILING)
10747 return false;
10748
10749 for (i = 0; i < 16; i++)
10750 if (cfun_gpr_save_slot (i))
10751 return false;
10752
10753 /* For 31 bit this is not covered by the frame_size check below
10754 since f4, f6 are saved in the register save area without needing
10755 additional stack space. */
10756 if (!TARGET_64BIT
10757 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
10758 return false;
10759
10760 if (cfun->machine->base_reg
10761 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10762 return false;
10763
10764 return cfun_frame_layout.frame_size == 0;
10765 }
10766
10767 /* The VX ABI differs for vararg functions. Therefore we need the
10768 prototype of the callee to be available when passing vector type
10769 values. */
10770 static const char *
10771 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
10772 {
10773 return ((TARGET_VX_ABI
10774 && typelist == 0
10775 && VECTOR_TYPE_P (TREE_TYPE (val))
10776 && (funcdecl == NULL_TREE
10777 || (TREE_CODE (funcdecl) == FUNCTION_DECL
10778 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
10779 ? N_("Vector argument passed to unprototyped function")
10780 : NULL);
10781 }
10782
10783
10784 /* Return the size in bytes of a function argument of
10785 type TYPE and/or mode MODE. At least one of TYPE or
10786 MODE must be specified. */
10787
10788 static int
10789 s390_function_arg_size (machine_mode mode, const_tree type)
10790 {
10791 if (type)
10792 return int_size_in_bytes (type);
10793
10794 /* No type info available for some library calls ... */
10795 if (mode != BLKmode)
10796 return GET_MODE_SIZE (mode);
10797
10798 /* If we have neither type nor mode, abort */
10799 gcc_unreachable ();
10800 }
10801
10802 /* Return true if a function argument of type TYPE and mode MODE
10803 is to be passed in a vector register, if available. */
10804
10805 bool
10806 s390_function_arg_vector (machine_mode mode, const_tree type)
10807 {
10808 if (!TARGET_VX_ABI)
10809 return false;
10810
10811 if (s390_function_arg_size (mode, type) > 16)
10812 return false;
10813
10814 /* No type info available for some library calls ... */
10815 if (!type)
10816 return VECTOR_MODE_P (mode);
10817
10818 /* The ABI says that record types with a single member are treated
10819 just like that member would be. */
10820 while (TREE_CODE (type) == RECORD_TYPE)
10821 {
10822 tree field, single = NULL_TREE;
10823
10824 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10825 {
10826 if (TREE_CODE (field) != FIELD_DECL)
10827 continue;
10828
10829 if (single == NULL_TREE)
10830 single = TREE_TYPE (field);
10831 else
10832 return false;
10833 }
10834
10835 if (single == NULL_TREE)
10836 return false;
10837 else
10838 {
10839 /* If the field declaration adds extra byte due to
10840 e.g. padding this is not accepted as vector type. */
10841 if (int_size_in_bytes (single) <= 0
10842 || int_size_in_bytes (single) != int_size_in_bytes (type))
10843 return false;
10844 type = single;
10845 }
10846 }
10847
10848 return VECTOR_TYPE_P (type);
10849 }
10850
10851 /* Return true if a function argument of type TYPE and mode MODE
10852 is to be passed in a floating-point register, if available. */
10853
10854 static bool
10855 s390_function_arg_float (machine_mode mode, const_tree type)
10856 {
10857 if (s390_function_arg_size (mode, type) > 8)
10858 return false;
10859
10860 /* Soft-float changes the ABI: no floating-point registers are used. */
10861 if (TARGET_SOFT_FLOAT)
10862 return false;
10863
10864 /* No type info available for some library calls ... */
10865 if (!type)
10866 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
10867
10868 /* The ABI says that record types with a single member are treated
10869 just like that member would be. */
10870 while (TREE_CODE (type) == RECORD_TYPE)
10871 {
10872 tree field, single = NULL_TREE;
10873
10874 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10875 {
10876 if (TREE_CODE (field) != FIELD_DECL)
10877 continue;
10878
10879 if (single == NULL_TREE)
10880 single = TREE_TYPE (field);
10881 else
10882 return false;
10883 }
10884
10885 if (single == NULL_TREE)
10886 return false;
10887 else
10888 type = single;
10889 }
10890
10891 return TREE_CODE (type) == REAL_TYPE;
10892 }
10893
10894 /* Return true if a function argument of type TYPE and mode MODE
10895 is to be passed in an integer register, or a pair of integer
10896 registers, if available. */
10897
10898 static bool
10899 s390_function_arg_integer (machine_mode mode, const_tree type)
10900 {
10901 int size = s390_function_arg_size (mode, type);
10902 if (size > 8)
10903 return false;
10904
10905 /* No type info available for some library calls ... */
10906 if (!type)
10907 return GET_MODE_CLASS (mode) == MODE_INT
10908 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
10909
10910 /* We accept small integral (and similar) types. */
10911 if (INTEGRAL_TYPE_P (type)
10912 || POINTER_TYPE_P (type)
10913 || TREE_CODE (type) == NULLPTR_TYPE
10914 || TREE_CODE (type) == OFFSET_TYPE
10915 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
10916 return true;
10917
10918 /* We also accept structs of size 1, 2, 4, 8 that are not
10919 passed in floating-point registers. */
10920 if (AGGREGATE_TYPE_P (type)
10921 && exact_log2 (size) >= 0
10922 && !s390_function_arg_float (mode, type))
10923 return true;
10924
10925 return false;
10926 }
10927
10928 /* Return 1 if a function argument of type TYPE and mode MODE
10929 is to be passed by reference. The ABI specifies that only
10930 structures of size 1, 2, 4, or 8 bytes are passed by value,
10931 all other structures (and complex numbers) are passed by
10932 reference. */
10933
10934 static bool
10935 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
10936 machine_mode mode, const_tree type,
10937 bool named ATTRIBUTE_UNUSED)
10938 {
10939 int size = s390_function_arg_size (mode, type);
10940
10941 if (s390_function_arg_vector (mode, type))
10942 return false;
10943
10944 if (size > 8)
10945 return true;
10946
10947 if (type)
10948 {
10949 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
10950 return true;
10951
10952 if (TREE_CODE (type) == COMPLEX_TYPE
10953 || TREE_CODE (type) == VECTOR_TYPE)
10954 return true;
10955 }
10956
10957 return false;
10958 }
10959
10960 /* Update the data in CUM to advance over an argument of mode MODE and
10961 data type TYPE. (TYPE is null for libcalls where that information
10962 may not be available.). The boolean NAMED specifies whether the
10963 argument is a named argument (as opposed to an unnamed argument
10964 matching an ellipsis). */
10965
10966 static void
10967 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
10968 const_tree type, bool named)
10969 {
10970 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10971
10972 if (s390_function_arg_vector (mode, type))
10973 {
10974 /* We are called for unnamed vector stdarg arguments which are
10975 passed on the stack. In this case this hook does not have to
10976 do anything since stack arguments are tracked by common
10977 code. */
10978 if (!named)
10979 return;
10980 cum->vrs += 1;
10981 }
10982 else if (s390_function_arg_float (mode, type))
10983 {
10984 cum->fprs += 1;
10985 }
10986 else if (s390_function_arg_integer (mode, type))
10987 {
10988 int size = s390_function_arg_size (mode, type);
10989 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
10990 }
10991 else
10992 gcc_unreachable ();
10993 }
10994
10995 /* Define where to put the arguments to a function.
10996 Value is zero to push the argument on the stack,
10997 or a hard register in which to store the argument.
10998
10999 MODE is the argument's machine mode.
11000 TYPE is the data type of the argument (as a tree).
11001 This is null for libcalls where that information may
11002 not be available.
11003 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11004 the preceding args and about the function being called.
11005 NAMED is nonzero if this argument is a named parameter
11006 (otherwise it is an extra parameter matching an ellipsis).
11007
11008 On S/390, we use general purpose registers 2 through 6 to
11009 pass integer, pointer, and certain structure arguments, and
11010 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11011 to pass floating point arguments. All remaining arguments
11012 are pushed to the stack. */
11013
11014 static rtx
11015 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11016 const_tree type, bool named)
11017 {
11018 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11019
11020 if (!named)
11021 s390_check_type_for_vector_abi (type, true, false);
11022
11023 if (s390_function_arg_vector (mode, type))
11024 {
11025 /* Vector arguments being part of the ellipsis are passed on the
11026 stack. */
11027 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11028 return NULL_RTX;
11029
11030 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11031 }
11032 else if (s390_function_arg_float (mode, type))
11033 {
11034 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11035 return NULL_RTX;
11036 else
11037 return gen_rtx_REG (mode, cum->fprs + 16);
11038 }
11039 else if (s390_function_arg_integer (mode, type))
11040 {
11041 int size = s390_function_arg_size (mode, type);
11042 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11043
11044 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11045 return NULL_RTX;
11046 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11047 return gen_rtx_REG (mode, cum->gprs + 2);
11048 else if (n_gprs == 2)
11049 {
11050 rtvec p = rtvec_alloc (2);
11051
11052 RTVEC_ELT (p, 0)
11053 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11054 const0_rtx);
11055 RTVEC_ELT (p, 1)
11056 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11057 GEN_INT (4));
11058
11059 return gen_rtx_PARALLEL (mode, p);
11060 }
11061 }
11062
11063 /* After the real arguments, expand_call calls us once again
11064 with a void_type_node type. Whatever we return here is
11065 passed as operand 2 to the call expanders.
11066
11067 We don't need this feature ... */
11068 else if (type == void_type_node)
11069 return const0_rtx;
11070
11071 gcc_unreachable ();
11072 }
11073
11074 /* Return true if return values of type TYPE should be returned
11075 in a memory buffer whose address is passed by the caller as
11076 hidden first argument. */
11077
11078 static bool
11079 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11080 {
11081 /* We accept small integral (and similar) types. */
11082 if (INTEGRAL_TYPE_P (type)
11083 || POINTER_TYPE_P (type)
11084 || TREE_CODE (type) == OFFSET_TYPE
11085 || TREE_CODE (type) == REAL_TYPE)
11086 return int_size_in_bytes (type) > 8;
11087
11088 /* vector types which fit into a VR. */
11089 if (TARGET_VX_ABI
11090 && VECTOR_TYPE_P (type)
11091 && int_size_in_bytes (type) <= 16)
11092 return false;
11093
11094 /* Aggregates and similar constructs are always returned
11095 in memory. */
11096 if (AGGREGATE_TYPE_P (type)
11097 || TREE_CODE (type) == COMPLEX_TYPE
11098 || VECTOR_TYPE_P (type))
11099 return true;
11100
11101 /* ??? We get called on all sorts of random stuff from
11102 aggregate_value_p. We can't abort, but it's not clear
11103 what's safe to return. Pretend it's a struct I guess. */
11104 return true;
11105 }
11106
11107 /* Function arguments and return values are promoted to word size. */
11108
11109 static machine_mode
11110 s390_promote_function_mode (const_tree type, machine_mode mode,
11111 int *punsignedp,
11112 const_tree fntype ATTRIBUTE_UNUSED,
11113 int for_return ATTRIBUTE_UNUSED)
11114 {
11115 if (INTEGRAL_MODE_P (mode)
11116 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11117 {
11118 if (type != NULL_TREE && POINTER_TYPE_P (type))
11119 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11120 return Pmode;
11121 }
11122
11123 return mode;
11124 }
11125
11126 /* Define where to return a (scalar) value of type RET_TYPE.
11127 If RET_TYPE is null, define where to return a (scalar)
11128 value of mode MODE from a libcall. */
11129
11130 static rtx
11131 s390_function_and_libcall_value (machine_mode mode,
11132 const_tree ret_type,
11133 const_tree fntype_or_decl,
11134 bool outgoing ATTRIBUTE_UNUSED)
11135 {
11136 /* For vector return types it is important to use the RET_TYPE
11137 argument whenever available since the middle-end might have
11138 changed the mode to a scalar mode. */
11139 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11140 || (!ret_type && VECTOR_MODE_P (mode)));
11141
11142 /* For normal functions perform the promotion as
11143 promote_function_mode would do. */
11144 if (ret_type)
11145 {
11146 int unsignedp = TYPE_UNSIGNED (ret_type);
11147 mode = promote_function_mode (ret_type, mode, &unsignedp,
11148 fntype_or_decl, 1);
11149 }
11150
11151 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11152 || SCALAR_FLOAT_MODE_P (mode)
11153 || (TARGET_VX_ABI && vector_ret_type_p));
11154 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11155
11156 if (TARGET_VX_ABI && vector_ret_type_p)
11157 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11158 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11159 return gen_rtx_REG (mode, 16);
11160 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11161 || UNITS_PER_LONG == UNITS_PER_WORD)
11162 return gen_rtx_REG (mode, 2);
11163 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11164 {
11165 /* This case is triggered when returning a 64 bit value with
11166 -m31 -mzarch. Although the value would fit into a single
11167 register it has to be forced into a 32 bit register pair in
11168 order to match the ABI. */
11169 rtvec p = rtvec_alloc (2);
11170
11171 RTVEC_ELT (p, 0)
11172 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11173 RTVEC_ELT (p, 1)
11174 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11175
11176 return gen_rtx_PARALLEL (mode, p);
11177 }
11178
11179 gcc_unreachable ();
11180 }
11181
11182 /* Define where to return a scalar return value of type RET_TYPE. */
11183
11184 static rtx
11185 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11186 bool outgoing)
11187 {
11188 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11189 fn_decl_or_type, outgoing);
11190 }
11191
11192 /* Define where to return a scalar libcall return value of mode
11193 MODE. */
11194
11195 static rtx
11196 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11197 {
11198 return s390_function_and_libcall_value (mode, NULL_TREE,
11199 NULL_TREE, true);
11200 }
11201
11202
11203 /* Create and return the va_list datatype.
11204
11205 On S/390, va_list is an array type equivalent to
11206
11207 typedef struct __va_list_tag
11208 {
11209 long __gpr;
11210 long __fpr;
11211 void *__overflow_arg_area;
11212 void *__reg_save_area;
11213 } va_list[1];
11214
11215 where __gpr and __fpr hold the number of general purpose
11216 or floating point arguments used up to now, respectively,
11217 __overflow_arg_area points to the stack location of the
11218 next argument passed on the stack, and __reg_save_area
11219 always points to the start of the register area in the
11220 call frame of the current function. The function prologue
11221 saves all registers used for argument passing into this
11222 area if the function uses variable arguments. */
11223
11224 static tree
11225 s390_build_builtin_va_list (void)
11226 {
11227 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11228
11229 record = lang_hooks.types.make_type (RECORD_TYPE);
11230
11231 type_decl =
11232 build_decl (BUILTINS_LOCATION,
11233 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11234
11235 f_gpr = build_decl (BUILTINS_LOCATION,
11236 FIELD_DECL, get_identifier ("__gpr"),
11237 long_integer_type_node);
11238 f_fpr = build_decl (BUILTINS_LOCATION,
11239 FIELD_DECL, get_identifier ("__fpr"),
11240 long_integer_type_node);
11241 f_ovf = build_decl (BUILTINS_LOCATION,
11242 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11243 ptr_type_node);
11244 f_sav = build_decl (BUILTINS_LOCATION,
11245 FIELD_DECL, get_identifier ("__reg_save_area"),
11246 ptr_type_node);
11247
11248 va_list_gpr_counter_field = f_gpr;
11249 va_list_fpr_counter_field = f_fpr;
11250
11251 DECL_FIELD_CONTEXT (f_gpr) = record;
11252 DECL_FIELD_CONTEXT (f_fpr) = record;
11253 DECL_FIELD_CONTEXT (f_ovf) = record;
11254 DECL_FIELD_CONTEXT (f_sav) = record;
11255
11256 TYPE_STUB_DECL (record) = type_decl;
11257 TYPE_NAME (record) = type_decl;
11258 TYPE_FIELDS (record) = f_gpr;
11259 DECL_CHAIN (f_gpr) = f_fpr;
11260 DECL_CHAIN (f_fpr) = f_ovf;
11261 DECL_CHAIN (f_ovf) = f_sav;
11262
11263 layout_type (record);
11264
11265 /* The correct type is an array type of one element. */
11266 return build_array_type (record, build_index_type (size_zero_node));
11267 }
11268
11269 /* Implement va_start by filling the va_list structure VALIST.
11270 STDARG_P is always true, and ignored.
11271 NEXTARG points to the first anonymous stack argument.
11272
11273 The following global variables are used to initialize
11274 the va_list structure:
11275
11276 crtl->args.info:
11277 holds number of gprs and fprs used for named arguments.
11278 crtl->args.arg_offset_rtx:
11279 holds the offset of the first anonymous stack argument
11280 (relative to the virtual arg pointer). */
11281
11282 static void
11283 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11284 {
11285 HOST_WIDE_INT n_gpr, n_fpr;
11286 int off;
11287 tree f_gpr, f_fpr, f_ovf, f_sav;
11288 tree gpr, fpr, ovf, sav, t;
11289
11290 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11291 f_fpr = DECL_CHAIN (f_gpr);
11292 f_ovf = DECL_CHAIN (f_fpr);
11293 f_sav = DECL_CHAIN (f_ovf);
11294
11295 valist = build_simple_mem_ref (valist);
11296 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11297 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11298 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11299 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11300
11301 /* Count number of gp and fp argument registers used. */
11302
11303 n_gpr = crtl->args.info.gprs;
11304 n_fpr = crtl->args.info.fprs;
11305
11306 if (cfun->va_list_gpr_size)
11307 {
11308 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11309 build_int_cst (NULL_TREE, n_gpr));
11310 TREE_SIDE_EFFECTS (t) = 1;
11311 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11312 }
11313
11314 if (cfun->va_list_fpr_size)
11315 {
11316 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11317 build_int_cst (NULL_TREE, n_fpr));
11318 TREE_SIDE_EFFECTS (t) = 1;
11319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11320 }
11321
11322 /* Find the overflow area.
11323 FIXME: This currently is too pessimistic when the vector ABI is
11324 enabled. In that case we *always* set up the overflow area
11325 pointer. */
11326 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11327 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11328 || TARGET_VX_ABI)
11329 {
11330 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11331
11332 off = INTVAL (crtl->args.arg_offset_rtx);
11333 off = off < 0 ? 0 : off;
11334 if (TARGET_DEBUG_ARG)
11335 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11336 (int)n_gpr, (int)n_fpr, off);
11337
11338 t = fold_build_pointer_plus_hwi (t, off);
11339
11340 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11341 TREE_SIDE_EFFECTS (t) = 1;
11342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11343 }
11344
11345 /* Find the register save area. */
11346 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11347 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11348 {
11349 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11350 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11351
11352 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11353 TREE_SIDE_EFFECTS (t) = 1;
11354 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11355 }
11356 }
11357
11358 /* Implement va_arg by updating the va_list structure
11359 VALIST as required to retrieve an argument of type
11360 TYPE, and returning that argument.
11361
11362 Generates code equivalent to:
11363
11364 if (integral value) {
11365 if (size <= 4 && args.gpr < 5 ||
11366 size > 4 && args.gpr < 4 )
11367 ret = args.reg_save_area[args.gpr+8]
11368 else
11369 ret = *args.overflow_arg_area++;
11370 } else if (vector value) {
11371 ret = *args.overflow_arg_area;
11372 args.overflow_arg_area += size / 8;
11373 } else if (float value) {
11374 if (args.fgpr < 2)
11375 ret = args.reg_save_area[args.fpr+64]
11376 else
11377 ret = *args.overflow_arg_area++;
11378 } else if (aggregate value) {
11379 if (args.gpr < 5)
11380 ret = *args.reg_save_area[args.gpr]
11381 else
11382 ret = **args.overflow_arg_area++;
11383 } */
11384
11385 static tree
11386 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11387 gimple_seq *post_p ATTRIBUTE_UNUSED)
11388 {
11389 tree f_gpr, f_fpr, f_ovf, f_sav;
11390 tree gpr, fpr, ovf, sav, reg, t, u;
11391 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11392 tree lab_false, lab_over;
11393 tree addr = create_tmp_var (ptr_type_node, "addr");
11394 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11395 a stack slot. */
11396
11397 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11398 f_fpr = DECL_CHAIN (f_gpr);
11399 f_ovf = DECL_CHAIN (f_fpr);
11400 f_sav = DECL_CHAIN (f_ovf);
11401
11402 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11403 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11404 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11405
11406 /* The tree for args* cannot be shared between gpr/fpr and ovf since
11407 both appear on a lhs. */
11408 valist = unshare_expr (valist);
11409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11410
11411 size = int_size_in_bytes (type);
11412
11413 s390_check_type_for_vector_abi (type, true, false);
11414
11415 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11416 {
11417 if (TARGET_DEBUG_ARG)
11418 {
11419 fprintf (stderr, "va_arg: aggregate type");
11420 debug_tree (type);
11421 }
11422
11423 /* Aggregates are passed by reference. */
11424 indirect_p = 1;
11425 reg = gpr;
11426 n_reg = 1;
11427
11428 /* kernel stack layout on 31 bit: It is assumed here that no padding
11429 will be added by s390_frame_info because for va_args always an even
11430 number of gprs has to be saved r15-r2 = 14 regs. */
11431 sav_ofs = 2 * UNITS_PER_LONG;
11432 sav_scale = UNITS_PER_LONG;
11433 size = UNITS_PER_LONG;
11434 max_reg = GP_ARG_NUM_REG - n_reg;
11435 left_align_p = false;
11436 }
11437 else if (s390_function_arg_vector (TYPE_MODE (type), type))
11438 {
11439 if (TARGET_DEBUG_ARG)
11440 {
11441 fprintf (stderr, "va_arg: vector type");
11442 debug_tree (type);
11443 }
11444
11445 indirect_p = 0;
11446 reg = NULL_TREE;
11447 n_reg = 0;
11448 sav_ofs = 0;
11449 sav_scale = 8;
11450 max_reg = 0;
11451 left_align_p = true;
11452 }
11453 else if (s390_function_arg_float (TYPE_MODE (type), type))
11454 {
11455 if (TARGET_DEBUG_ARG)
11456 {
11457 fprintf (stderr, "va_arg: float type");
11458 debug_tree (type);
11459 }
11460
11461 /* FP args go in FP registers, if present. */
11462 indirect_p = 0;
11463 reg = fpr;
11464 n_reg = 1;
11465 sav_ofs = 16 * UNITS_PER_LONG;
11466 sav_scale = 8;
11467 max_reg = FP_ARG_NUM_REG - n_reg;
11468 left_align_p = false;
11469 }
11470 else
11471 {
11472 if (TARGET_DEBUG_ARG)
11473 {
11474 fprintf (stderr, "va_arg: other type");
11475 debug_tree (type);
11476 }
11477
11478 /* Otherwise into GP registers. */
11479 indirect_p = 0;
11480 reg = gpr;
11481 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11482
11483 /* kernel stack layout on 31 bit: It is assumed here that no padding
11484 will be added by s390_frame_info because for va_args always an even
11485 number of gprs has to be saved r15-r2 = 14 regs. */
11486 sav_ofs = 2 * UNITS_PER_LONG;
11487
11488 if (size < UNITS_PER_LONG)
11489 sav_ofs += UNITS_PER_LONG - size;
11490
11491 sav_scale = UNITS_PER_LONG;
11492 max_reg = GP_ARG_NUM_REG - n_reg;
11493 left_align_p = false;
11494 }
11495
11496 /* Pull the value out of the saved registers ... */
11497
11498 if (reg != NULL_TREE)
11499 {
11500 /*
11501 if (reg > ((typeof (reg))max_reg))
11502 goto lab_false;
11503
11504 addr = sav + sav_ofs + reg * save_scale;
11505
11506 goto lab_over;
11507
11508 lab_false:
11509 */
11510
11511 lab_false = create_artificial_label (UNKNOWN_LOCATION);
11512 lab_over = create_artificial_label (UNKNOWN_LOCATION);
11513
11514 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
11515 t = build2 (GT_EXPR, boolean_type_node, reg, t);
11516 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11517 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11518 gimplify_and_add (t, pre_p);
11519
11520 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11521 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
11522 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
11523 t = fold_build_pointer_plus (t, u);
11524
11525 gimplify_assign (addr, t, pre_p);
11526
11527 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11528
11529 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11530 }
11531
11532 /* ... Otherwise out of the overflow area. */
11533
11534 t = ovf;
11535 if (size < UNITS_PER_LONG && !left_align_p)
11536 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
11537
11538 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11539
11540 gimplify_assign (addr, t, pre_p);
11541
11542 if (size < UNITS_PER_LONG && left_align_p)
11543 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
11544 else
11545 t = fold_build_pointer_plus_hwi (t, size);
11546
11547 gimplify_assign (ovf, t, pre_p);
11548
11549 if (reg != NULL_TREE)
11550 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
11551
11552
11553 /* Increment register save count. */
11554
11555 if (n_reg > 0)
11556 {
11557 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
11558 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
11559 gimplify_and_add (u, pre_p);
11560 }
11561
11562 if (indirect_p)
11563 {
11564 t = build_pointer_type_for_mode (build_pointer_type (type),
11565 ptr_mode, true);
11566 addr = fold_convert (t, addr);
11567 addr = build_va_arg_indirect_ref (addr);
11568 }
11569 else
11570 {
11571 t = build_pointer_type_for_mode (type, ptr_mode, true);
11572 addr = fold_convert (t, addr);
11573 }
11574
11575 return build_va_arg_indirect_ref (addr);
11576 }
11577
11578 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
11579 expanders.
11580 DEST - Register location where CC will be stored.
11581 TDB - Pointer to a 256 byte area where to store the transaction.
11582 diagnostic block. NULL if TDB is not needed.
11583 RETRY - Retry count value. If non-NULL a retry loop for CC2
11584 is emitted
11585 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
11586 of the tbegin instruction pattern. */
11587
11588 void
11589 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
11590 {
11591 rtx retry_plus_two = gen_reg_rtx (SImode);
11592 rtx retry_reg = gen_reg_rtx (SImode);
11593 rtx_code_label *retry_label = NULL;
11594
11595 if (retry != NULL_RTX)
11596 {
11597 emit_move_insn (retry_reg, retry);
11598 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
11599 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
11600 retry_label = gen_label_rtx ();
11601 emit_label (retry_label);
11602 }
11603
11604 if (clobber_fprs_p)
11605 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
11606 else
11607 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11608 tdb));
11609
11610 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
11611 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
11612 CC_REGNUM)),
11613 UNSPEC_CC_TO_INT));
11614 if (retry != NULL_RTX)
11615 {
11616 const int CC0 = 1 << 3;
11617 const int CC1 = 1 << 2;
11618 const int CC3 = 1 << 0;
11619 rtx jump;
11620 rtx count = gen_reg_rtx (SImode);
11621 rtx_code_label *leave_label = gen_label_rtx ();
11622
11623 /* Exit for success and permanent failures. */
11624 jump = s390_emit_jump (leave_label,
11625 gen_rtx_EQ (VOIDmode,
11626 gen_rtx_REG (CCRAWmode, CC_REGNUM),
11627 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
11628 LABEL_NUSES (leave_label) = 1;
11629
11630 /* CC2 - transient failure. Perform retry with ppa. */
11631 emit_move_insn (count, retry_plus_two);
11632 emit_insn (gen_subsi3 (count, count, retry_reg));
11633 emit_insn (gen_tx_assist (count));
11634 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
11635 retry_reg,
11636 retry_reg));
11637 JUMP_LABEL (jump) = retry_label;
11638 LABEL_NUSES (retry_label) = 1;
11639 emit_label (leave_label);
11640 }
11641 }
11642
11643
11644 /* Return the decl for the target specific builtin with the function
11645 code FCODE. */
11646
11647 static tree
11648 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
11649 {
11650 if (fcode >= S390_BUILTIN_MAX)
11651 return error_mark_node;
11652
11653 return s390_builtin_decls[fcode];
11654 }
11655
11656 /* We call mcount before the function prologue. So a profiled leaf
11657 function should stay a leaf function. */
11658
11659 static bool
11660 s390_keep_leaf_when_profiled ()
11661 {
11662 return true;
11663 }
11664
11665 /* Output assembly code for the trampoline template to
11666 stdio stream FILE.
11667
11668 On S/390, we use gpr 1 internally in the trampoline code;
11669 gpr 0 is used to hold the static chain. */
11670
11671 static void
11672 s390_asm_trampoline_template (FILE *file)
11673 {
11674 rtx op[2];
11675 op[0] = gen_rtx_REG (Pmode, 0);
11676 op[1] = gen_rtx_REG (Pmode, 1);
11677
11678 if (TARGET_64BIT)
11679 {
11680 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11681 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
11682 output_asm_insn ("br\t%1", op); /* 2 byte */
11683 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
11684 }
11685 else
11686 {
11687 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11688 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
11689 output_asm_insn ("br\t%1", op); /* 2 byte */
11690 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
11691 }
11692 }
11693
11694 /* Emit RTL insns to initialize the variable parts of a trampoline.
11695 FNADDR is an RTX for the address of the function's pure code.
11696 CXT is an RTX for the static chain value for the function. */
11697
11698 static void
11699 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
11700 {
11701 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11702 rtx mem;
11703
11704 emit_block_move (m_tramp, assemble_trampoline_template (),
11705 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
11706
11707 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
11708 emit_move_insn (mem, cxt);
11709 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
11710 emit_move_insn (mem, fnaddr);
11711 }
11712
11713 /* Output assembler code to FILE to increment profiler label # LABELNO
11714 for profiling a function entry. */
11715
11716 void
11717 s390_function_profiler (FILE *file, int labelno)
11718 {
11719 rtx op[7];
11720
11721 char label[128];
11722 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
11723
11724 fprintf (file, "# function profiler \n");
11725
11726 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
11727 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
11728 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
11729
11730 op[2] = gen_rtx_REG (Pmode, 1);
11731 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
11732 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
11733
11734 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
11735 if (flag_pic)
11736 {
11737 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
11738 op[4] = gen_rtx_CONST (Pmode, op[4]);
11739 }
11740
11741 if (TARGET_64BIT)
11742 {
11743 output_asm_insn ("stg\t%0,%1", op);
11744 output_asm_insn ("larl\t%2,%3", op);
11745 output_asm_insn ("brasl\t%0,%4", op);
11746 output_asm_insn ("lg\t%0,%1", op);
11747 }
11748 else if (!flag_pic)
11749 {
11750 op[6] = gen_label_rtx ();
11751
11752 output_asm_insn ("st\t%0,%1", op);
11753 output_asm_insn ("bras\t%2,%l6", op);
11754 output_asm_insn (".long\t%4", op);
11755 output_asm_insn (".long\t%3", op);
11756 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11757 output_asm_insn ("l\t%0,0(%2)", op);
11758 output_asm_insn ("l\t%2,4(%2)", op);
11759 output_asm_insn ("basr\t%0,%0", op);
11760 output_asm_insn ("l\t%0,%1", op);
11761 }
11762 else
11763 {
11764 op[5] = gen_label_rtx ();
11765 op[6] = gen_label_rtx ();
11766
11767 output_asm_insn ("st\t%0,%1", op);
11768 output_asm_insn ("bras\t%2,%l6", op);
11769 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
11770 output_asm_insn (".long\t%4-%l5", op);
11771 output_asm_insn (".long\t%3-%l5", op);
11772 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11773 output_asm_insn ("lr\t%0,%2", op);
11774 output_asm_insn ("a\t%0,0(%2)", op);
11775 output_asm_insn ("a\t%2,4(%2)", op);
11776 output_asm_insn ("basr\t%0,%0", op);
11777 output_asm_insn ("l\t%0,%1", op);
11778 }
11779 }
11780
11781 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
11782 into its SYMBOL_REF_FLAGS. */
11783
11784 static void
11785 s390_encode_section_info (tree decl, rtx rtl, int first)
11786 {
11787 default_encode_section_info (decl, rtl, first);
11788
11789 if (TREE_CODE (decl) == VAR_DECL)
11790 {
11791 /* If a variable has a forced alignment to < 2 bytes, mark it
11792 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
11793 operand. */
11794 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
11795 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
11796 if (!DECL_SIZE (decl)
11797 || !DECL_ALIGN (decl)
11798 || !tree_fits_shwi_p (DECL_SIZE (decl))
11799 || (DECL_ALIGN (decl) <= 64
11800 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
11801 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11802 }
11803
11804 /* Literal pool references don't have a decl so they are handled
11805 differently here. We rely on the information in the MEM_ALIGN
11806 entry to decide upon natural alignment. */
11807 if (MEM_P (rtl)
11808 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
11809 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
11810 && (MEM_ALIGN (rtl) == 0
11811 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
11812 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
11813 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11814 }
11815
11816 /* Output thunk to FILE that implements a C++ virtual function call (with
11817 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
11818 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
11819 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
11820 relative to the resulting this pointer. */
11821
11822 static void
11823 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
11824 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11825 tree function)
11826 {
11827 rtx op[10];
11828 int nonlocal = 0;
11829
11830 /* Make sure unwind info is emitted for the thunk if needed. */
11831 final_start_function (emit_barrier (), file, 1);
11832
11833 /* Operand 0 is the target function. */
11834 op[0] = XEXP (DECL_RTL (function), 0);
11835 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
11836 {
11837 nonlocal = 1;
11838 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
11839 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
11840 op[0] = gen_rtx_CONST (Pmode, op[0]);
11841 }
11842
11843 /* Operand 1 is the 'this' pointer. */
11844 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11845 op[1] = gen_rtx_REG (Pmode, 3);
11846 else
11847 op[1] = gen_rtx_REG (Pmode, 2);
11848
11849 /* Operand 2 is the delta. */
11850 op[2] = GEN_INT (delta);
11851
11852 /* Operand 3 is the vcall_offset. */
11853 op[3] = GEN_INT (vcall_offset);
11854
11855 /* Operand 4 is the temporary register. */
11856 op[4] = gen_rtx_REG (Pmode, 1);
11857
11858 /* Operands 5 to 8 can be used as labels. */
11859 op[5] = NULL_RTX;
11860 op[6] = NULL_RTX;
11861 op[7] = NULL_RTX;
11862 op[8] = NULL_RTX;
11863
11864 /* Operand 9 can be used for temporary register. */
11865 op[9] = NULL_RTX;
11866
11867 /* Generate code. */
11868 if (TARGET_64BIT)
11869 {
11870 /* Setup literal pool pointer if required. */
11871 if ((!DISP_IN_RANGE (delta)
11872 && !CONST_OK_FOR_K (delta)
11873 && !CONST_OK_FOR_Os (delta))
11874 || (!DISP_IN_RANGE (vcall_offset)
11875 && !CONST_OK_FOR_K (vcall_offset)
11876 && !CONST_OK_FOR_Os (vcall_offset)))
11877 {
11878 op[5] = gen_label_rtx ();
11879 output_asm_insn ("larl\t%4,%5", op);
11880 }
11881
11882 /* Add DELTA to this pointer. */
11883 if (delta)
11884 {
11885 if (CONST_OK_FOR_J (delta))
11886 output_asm_insn ("la\t%1,%2(%1)", op);
11887 else if (DISP_IN_RANGE (delta))
11888 output_asm_insn ("lay\t%1,%2(%1)", op);
11889 else if (CONST_OK_FOR_K (delta))
11890 output_asm_insn ("aghi\t%1,%2", op);
11891 else if (CONST_OK_FOR_Os (delta))
11892 output_asm_insn ("agfi\t%1,%2", op);
11893 else
11894 {
11895 op[6] = gen_label_rtx ();
11896 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
11897 }
11898 }
11899
11900 /* Perform vcall adjustment. */
11901 if (vcall_offset)
11902 {
11903 if (DISP_IN_RANGE (vcall_offset))
11904 {
11905 output_asm_insn ("lg\t%4,0(%1)", op);
11906 output_asm_insn ("ag\t%1,%3(%4)", op);
11907 }
11908 else if (CONST_OK_FOR_K (vcall_offset))
11909 {
11910 output_asm_insn ("lghi\t%4,%3", op);
11911 output_asm_insn ("ag\t%4,0(%1)", op);
11912 output_asm_insn ("ag\t%1,0(%4)", op);
11913 }
11914 else if (CONST_OK_FOR_Os (vcall_offset))
11915 {
11916 output_asm_insn ("lgfi\t%4,%3", op);
11917 output_asm_insn ("ag\t%4,0(%1)", op);
11918 output_asm_insn ("ag\t%1,0(%4)", op);
11919 }
11920 else
11921 {
11922 op[7] = gen_label_rtx ();
11923 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
11924 output_asm_insn ("ag\t%4,0(%1)", op);
11925 output_asm_insn ("ag\t%1,0(%4)", op);
11926 }
11927 }
11928
11929 /* Jump to target. */
11930 output_asm_insn ("jg\t%0", op);
11931
11932 /* Output literal pool if required. */
11933 if (op[5])
11934 {
11935 output_asm_insn (".align\t4", op);
11936 targetm.asm_out.internal_label (file, "L",
11937 CODE_LABEL_NUMBER (op[5]));
11938 }
11939 if (op[6])
11940 {
11941 targetm.asm_out.internal_label (file, "L",
11942 CODE_LABEL_NUMBER (op[6]));
11943 output_asm_insn (".long\t%2", op);
11944 }
11945 if (op[7])
11946 {
11947 targetm.asm_out.internal_label (file, "L",
11948 CODE_LABEL_NUMBER (op[7]));
11949 output_asm_insn (".long\t%3", op);
11950 }
11951 }
11952 else
11953 {
11954 /* Setup base pointer if required. */
11955 if (!vcall_offset
11956 || (!DISP_IN_RANGE (delta)
11957 && !CONST_OK_FOR_K (delta)
11958 && !CONST_OK_FOR_Os (delta))
11959 || (!DISP_IN_RANGE (delta)
11960 && !CONST_OK_FOR_K (vcall_offset)
11961 && !CONST_OK_FOR_Os (vcall_offset)))
11962 {
11963 op[5] = gen_label_rtx ();
11964 output_asm_insn ("basr\t%4,0", op);
11965 targetm.asm_out.internal_label (file, "L",
11966 CODE_LABEL_NUMBER (op[5]));
11967 }
11968
11969 /* Add DELTA to this pointer. */
11970 if (delta)
11971 {
11972 if (CONST_OK_FOR_J (delta))
11973 output_asm_insn ("la\t%1,%2(%1)", op);
11974 else if (DISP_IN_RANGE (delta))
11975 output_asm_insn ("lay\t%1,%2(%1)", op);
11976 else if (CONST_OK_FOR_K (delta))
11977 output_asm_insn ("ahi\t%1,%2", op);
11978 else if (CONST_OK_FOR_Os (delta))
11979 output_asm_insn ("afi\t%1,%2", op);
11980 else
11981 {
11982 op[6] = gen_label_rtx ();
11983 output_asm_insn ("a\t%1,%6-%5(%4)", op);
11984 }
11985 }
11986
11987 /* Perform vcall adjustment. */
11988 if (vcall_offset)
11989 {
11990 if (CONST_OK_FOR_J (vcall_offset))
11991 {
11992 output_asm_insn ("l\t%4,0(%1)", op);
11993 output_asm_insn ("a\t%1,%3(%4)", op);
11994 }
11995 else if (DISP_IN_RANGE (vcall_offset))
11996 {
11997 output_asm_insn ("l\t%4,0(%1)", op);
11998 output_asm_insn ("ay\t%1,%3(%4)", op);
11999 }
12000 else if (CONST_OK_FOR_K (vcall_offset))
12001 {
12002 output_asm_insn ("lhi\t%4,%3", op);
12003 output_asm_insn ("a\t%4,0(%1)", op);
12004 output_asm_insn ("a\t%1,0(%4)", op);
12005 }
12006 else if (CONST_OK_FOR_Os (vcall_offset))
12007 {
12008 output_asm_insn ("iilf\t%4,%3", op);
12009 output_asm_insn ("a\t%4,0(%1)", op);
12010 output_asm_insn ("a\t%1,0(%4)", op);
12011 }
12012 else
12013 {
12014 op[7] = gen_label_rtx ();
12015 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12016 output_asm_insn ("a\t%4,0(%1)", op);
12017 output_asm_insn ("a\t%1,0(%4)", op);
12018 }
12019
12020 /* We had to clobber the base pointer register.
12021 Re-setup the base pointer (with a different base). */
12022 op[5] = gen_label_rtx ();
12023 output_asm_insn ("basr\t%4,0", op);
12024 targetm.asm_out.internal_label (file, "L",
12025 CODE_LABEL_NUMBER (op[5]));
12026 }
12027
12028 /* Jump to target. */
12029 op[8] = gen_label_rtx ();
12030
12031 if (!flag_pic)
12032 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12033 else if (!nonlocal)
12034 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12035 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12036 else if (flag_pic == 1)
12037 {
12038 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12039 output_asm_insn ("l\t%4,%0(%4)", op);
12040 }
12041 else if (flag_pic == 2)
12042 {
12043 op[9] = gen_rtx_REG (Pmode, 0);
12044 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12045 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12046 output_asm_insn ("ar\t%4,%9", op);
12047 output_asm_insn ("l\t%4,0(%4)", op);
12048 }
12049
12050 output_asm_insn ("br\t%4", op);
12051
12052 /* Output literal pool. */
12053 output_asm_insn (".align\t4", op);
12054
12055 if (nonlocal && flag_pic == 2)
12056 output_asm_insn (".long\t%0", op);
12057 if (nonlocal)
12058 {
12059 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12060 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12061 }
12062
12063 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12064 if (!flag_pic)
12065 output_asm_insn (".long\t%0", op);
12066 else
12067 output_asm_insn (".long\t%0-%5", op);
12068
12069 if (op[6])
12070 {
12071 targetm.asm_out.internal_label (file, "L",
12072 CODE_LABEL_NUMBER (op[6]));
12073 output_asm_insn (".long\t%2", op);
12074 }
12075 if (op[7])
12076 {
12077 targetm.asm_out.internal_label (file, "L",
12078 CODE_LABEL_NUMBER (op[7]));
12079 output_asm_insn (".long\t%3", op);
12080 }
12081 }
12082 final_end_function ();
12083 }
12084
12085 static bool
12086 s390_valid_pointer_mode (machine_mode mode)
12087 {
12088 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12089 }
12090
12091 /* Checks whether the given CALL_EXPR would use a caller
12092 saved register. This is used to decide whether sibling call
12093 optimization could be performed on the respective function
12094 call. */
12095
12096 static bool
12097 s390_call_saved_register_used (tree call_expr)
12098 {
12099 CUMULATIVE_ARGS cum_v;
12100 cumulative_args_t cum;
12101 tree parameter;
12102 machine_mode mode;
12103 tree type;
12104 rtx parm_rtx;
12105 int reg, i;
12106
12107 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12108 cum = pack_cumulative_args (&cum_v);
12109
12110 for (i = 0; i < call_expr_nargs (call_expr); i++)
12111 {
12112 parameter = CALL_EXPR_ARG (call_expr, i);
12113 gcc_assert (parameter);
12114
12115 /* For an undeclared variable passed as parameter we will get
12116 an ERROR_MARK node here. */
12117 if (TREE_CODE (parameter) == ERROR_MARK)
12118 return true;
12119
12120 type = TREE_TYPE (parameter);
12121 gcc_assert (type);
12122
12123 mode = TYPE_MODE (type);
12124 gcc_assert (mode);
12125
12126 /* We assume that in the target function all parameters are
12127 named. This only has an impact on vector argument register
12128 usage none of which is call-saved. */
12129 if (pass_by_reference (&cum_v, mode, type, true))
12130 {
12131 mode = Pmode;
12132 type = build_pointer_type (type);
12133 }
12134
12135 parm_rtx = s390_function_arg (cum, mode, type, true);
12136
12137 s390_function_arg_advance (cum, mode, type, true);
12138
12139 if (!parm_rtx)
12140 continue;
12141
12142 if (REG_P (parm_rtx))
12143 {
12144 for (reg = 0;
12145 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12146 reg++)
12147 if (!call_used_regs[reg + REGNO (parm_rtx)])
12148 return true;
12149 }
12150
12151 if (GET_CODE (parm_rtx) == PARALLEL)
12152 {
12153 int i;
12154
12155 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12156 {
12157 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12158
12159 gcc_assert (REG_P (r));
12160
12161 for (reg = 0;
12162 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12163 reg++)
12164 if (!call_used_regs[reg + REGNO (r)])
12165 return true;
12166 }
12167 }
12168
12169 }
12170 return false;
12171 }
12172
12173 /* Return true if the given call expression can be
12174 turned into a sibling call.
12175 DECL holds the declaration of the function to be called whereas
12176 EXP is the call expression itself. */
12177
12178 static bool
12179 s390_function_ok_for_sibcall (tree decl, tree exp)
12180 {
12181 /* The TPF epilogue uses register 1. */
12182 if (TARGET_TPF_PROFILING)
12183 return false;
12184
12185 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12186 which would have to be restored before the sibcall. */
12187 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12188 return false;
12189
12190 /* Register 6 on s390 is available as an argument register but unfortunately
12191 "caller saved". This makes functions needing this register for arguments
12192 not suitable for sibcalls. */
12193 return !s390_call_saved_register_used (exp);
12194 }
12195
12196 /* Return the fixed registers used for condition codes. */
12197
12198 static bool
12199 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12200 {
12201 *p1 = CC_REGNUM;
12202 *p2 = INVALID_REGNUM;
12203
12204 return true;
12205 }
12206
12207 /* This function is used by the call expanders of the machine description.
12208 It emits the call insn itself together with the necessary operations
12209 to adjust the target address and returns the emitted insn.
12210 ADDR_LOCATION is the target address rtx
12211 TLS_CALL the location of the thread-local symbol
12212 RESULT_REG the register where the result of the call should be stored
12213 RETADDR_REG the register where the return address should be stored
12214 If this parameter is NULL_RTX the call is considered
12215 to be a sibling call. */
12216
12217 rtx_insn *
12218 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12219 rtx retaddr_reg)
12220 {
12221 bool plt_call = false;
12222 rtx_insn *insn;
12223 rtx call;
12224 rtx clobber;
12225 rtvec vec;
12226
12227 /* Direct function calls need special treatment. */
12228 if (GET_CODE (addr_location) == SYMBOL_REF)
12229 {
12230 /* When calling a global routine in PIC mode, we must
12231 replace the symbol itself with the PLT stub. */
12232 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12233 {
12234 if (retaddr_reg != NULL_RTX)
12235 {
12236 addr_location = gen_rtx_UNSPEC (Pmode,
12237 gen_rtvec (1, addr_location),
12238 UNSPEC_PLT);
12239 addr_location = gen_rtx_CONST (Pmode, addr_location);
12240 plt_call = true;
12241 }
12242 else
12243 /* For -fpic code the PLT entries might use r12 which is
12244 call-saved. Therefore we cannot do a sibcall when
12245 calling directly using a symbol ref. When reaching
12246 this point we decided (in s390_function_ok_for_sibcall)
12247 to do a sibcall for a function pointer but one of the
12248 optimizers was able to get rid of the function pointer
12249 by propagating the symbol ref into the call. This
12250 optimization is illegal for S/390 so we turn the direct
12251 call into a indirect call again. */
12252 addr_location = force_reg (Pmode, addr_location);
12253 }
12254
12255 /* Unless we can use the bras(l) insn, force the
12256 routine address into a register. */
12257 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12258 {
12259 if (flag_pic)
12260 addr_location = legitimize_pic_address (addr_location, 0);
12261 else
12262 addr_location = force_reg (Pmode, addr_location);
12263 }
12264 }
12265
12266 /* If it is already an indirect call or the code above moved the
12267 SYMBOL_REF to somewhere else make sure the address can be found in
12268 register 1. */
12269 if (retaddr_reg == NULL_RTX
12270 && GET_CODE (addr_location) != SYMBOL_REF
12271 && !plt_call)
12272 {
12273 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12274 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12275 }
12276
12277 addr_location = gen_rtx_MEM (QImode, addr_location);
12278 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12279
12280 if (result_reg != NULL_RTX)
12281 call = gen_rtx_SET (result_reg, call);
12282
12283 if (retaddr_reg != NULL_RTX)
12284 {
12285 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12286
12287 if (tls_call != NULL_RTX)
12288 vec = gen_rtvec (3, call, clobber,
12289 gen_rtx_USE (VOIDmode, tls_call));
12290 else
12291 vec = gen_rtvec (2, call, clobber);
12292
12293 call = gen_rtx_PARALLEL (VOIDmode, vec);
12294 }
12295
12296 insn = emit_call_insn (call);
12297
12298 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12299 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12300 {
12301 /* s390_function_ok_for_sibcall should
12302 have denied sibcalls in this case. */
12303 gcc_assert (retaddr_reg != NULL_RTX);
12304 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12305 }
12306 return insn;
12307 }
12308
12309 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12310
12311 static void
12312 s390_conditional_register_usage (void)
12313 {
12314 int i;
12315
12316 if (flag_pic)
12317 {
12318 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12319 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12320 }
12321 if (TARGET_CPU_ZARCH)
12322 {
12323 fixed_regs[BASE_REGNUM] = 0;
12324 call_used_regs[BASE_REGNUM] = 0;
12325 fixed_regs[RETURN_REGNUM] = 0;
12326 call_used_regs[RETURN_REGNUM] = 0;
12327 }
12328 if (TARGET_64BIT)
12329 {
12330 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12331 call_used_regs[i] = call_really_used_regs[i] = 0;
12332 }
12333 else
12334 {
12335 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12336 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12337 }
12338
12339 if (TARGET_SOFT_FLOAT)
12340 {
12341 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12342 call_used_regs[i] = fixed_regs[i] = 1;
12343 }
12344
12345 /* Disable v16 - v31 for non-vector target. */
12346 if (!TARGET_VX)
12347 {
12348 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12349 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12350 }
12351 }
12352
12353 /* Corresponding function to eh_return expander. */
12354
12355 static GTY(()) rtx s390_tpf_eh_return_symbol;
12356 void
12357 s390_emit_tpf_eh_return (rtx target)
12358 {
12359 rtx_insn *insn;
12360 rtx reg, orig_ra;
12361
12362 if (!s390_tpf_eh_return_symbol)
12363 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12364
12365 reg = gen_rtx_REG (Pmode, 2);
12366 orig_ra = gen_rtx_REG (Pmode, 3);
12367
12368 emit_move_insn (reg, target);
12369 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12370 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12371 gen_rtx_REG (Pmode, RETURN_REGNUM));
12372 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12373 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12374
12375 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12376 }
12377
12378 /* Rework the prologue/epilogue to avoid saving/restoring
12379 registers unnecessarily. */
12380
12381 static void
12382 s390_optimize_prologue (void)
12383 {
12384 rtx_insn *insn, *new_insn, *next_insn;
12385
12386 /* Do a final recompute of the frame-related data. */
12387 s390_optimize_register_info ();
12388
12389 /* If all special registers are in fact used, there's nothing we
12390 can do, so no point in walking the insn list. */
12391
12392 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
12393 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
12394 && (TARGET_CPU_ZARCH
12395 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
12396 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
12397 return;
12398
12399 /* Search for prologue/epilogue insns and replace them. */
12400
12401 for (insn = get_insns (); insn; insn = next_insn)
12402 {
12403 int first, last, off;
12404 rtx set, base, offset;
12405 rtx pat;
12406
12407 next_insn = NEXT_INSN (insn);
12408
12409 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
12410 continue;
12411
12412 pat = PATTERN (insn);
12413
12414 /* Remove ldgr/lgdr instructions used for saving and restore
12415 GPRs if possible. */
12416 if (TARGET_Z10
12417 && GET_CODE (pat) == SET
12418 && GET_MODE (SET_SRC (pat)) == DImode
12419 && REG_P (SET_SRC (pat))
12420 && REG_P (SET_DEST (pat)))
12421 {
12422 int src_regno = REGNO (SET_SRC (pat));
12423 int dest_regno = REGNO (SET_DEST (pat));
12424 int gpr_regno;
12425 int fpr_regno;
12426
12427 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
12428 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
12429 continue;
12430
12431 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
12432 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
12433
12434 /* GPR must be call-saved, FPR must be call-clobbered. */
12435 if (!call_really_used_regs[fpr_regno]
12436 || call_really_used_regs[gpr_regno])
12437 continue;
12438
12439 /* It must not happen that what we once saved in an FPR now
12440 needs a stack slot. */
12441 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
12442
12443 if (cfun_gpr_save_slot (gpr_regno) == 0)
12444 {
12445 remove_insn (insn);
12446 continue;
12447 }
12448 }
12449
12450 if (GET_CODE (pat) == PARALLEL
12451 && store_multiple_operation (pat, VOIDmode))
12452 {
12453 set = XVECEXP (pat, 0, 0);
12454 first = REGNO (SET_SRC (set));
12455 last = first + XVECLEN (pat, 0) - 1;
12456 offset = const0_rtx;
12457 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12458 off = INTVAL (offset);
12459
12460 if (GET_CODE (base) != REG || off < 0)
12461 continue;
12462 if (cfun_frame_layout.first_save_gpr != -1
12463 && (cfun_frame_layout.first_save_gpr < first
12464 || cfun_frame_layout.last_save_gpr > last))
12465 continue;
12466 if (REGNO (base) != STACK_POINTER_REGNUM
12467 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12468 continue;
12469 if (first > BASE_REGNUM || last < BASE_REGNUM)
12470 continue;
12471
12472 if (cfun_frame_layout.first_save_gpr != -1)
12473 {
12474 rtx s_pat = save_gprs (base,
12475 off + (cfun_frame_layout.first_save_gpr
12476 - first) * UNITS_PER_LONG,
12477 cfun_frame_layout.first_save_gpr,
12478 cfun_frame_layout.last_save_gpr);
12479 new_insn = emit_insn_before (s_pat, insn);
12480 INSN_ADDRESSES_NEW (new_insn, -1);
12481 }
12482
12483 remove_insn (insn);
12484 continue;
12485 }
12486
12487 if (cfun_frame_layout.first_save_gpr == -1
12488 && GET_CODE (pat) == SET
12489 && GENERAL_REG_P (SET_SRC (pat))
12490 && GET_CODE (SET_DEST (pat)) == MEM)
12491 {
12492 set = pat;
12493 first = REGNO (SET_SRC (set));
12494 offset = const0_rtx;
12495 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12496 off = INTVAL (offset);
12497
12498 if (GET_CODE (base) != REG || off < 0)
12499 continue;
12500 if (REGNO (base) != STACK_POINTER_REGNUM
12501 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12502 continue;
12503
12504 remove_insn (insn);
12505 continue;
12506 }
12507
12508 if (GET_CODE (pat) == PARALLEL
12509 && load_multiple_operation (pat, VOIDmode))
12510 {
12511 set = XVECEXP (pat, 0, 0);
12512 first = REGNO (SET_DEST (set));
12513 last = first + XVECLEN (pat, 0) - 1;
12514 offset = const0_rtx;
12515 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12516 off = INTVAL (offset);
12517
12518 if (GET_CODE (base) != REG || off < 0)
12519 continue;
12520
12521 if (cfun_frame_layout.first_restore_gpr != -1
12522 && (cfun_frame_layout.first_restore_gpr < first
12523 || cfun_frame_layout.last_restore_gpr > last))
12524 continue;
12525 if (REGNO (base) != STACK_POINTER_REGNUM
12526 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12527 continue;
12528 if (first > BASE_REGNUM || last < BASE_REGNUM)
12529 continue;
12530
12531 if (cfun_frame_layout.first_restore_gpr != -1)
12532 {
12533 rtx rpat = restore_gprs (base,
12534 off + (cfun_frame_layout.first_restore_gpr
12535 - first) * UNITS_PER_LONG,
12536 cfun_frame_layout.first_restore_gpr,
12537 cfun_frame_layout.last_restore_gpr);
12538
12539 /* Remove REG_CFA_RESTOREs for registers that we no
12540 longer need to save. */
12541 REG_NOTES (rpat) = REG_NOTES (insn);
12542 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
12543 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
12544 && ((int) REGNO (XEXP (*ptr, 0))
12545 < cfun_frame_layout.first_restore_gpr))
12546 *ptr = XEXP (*ptr, 1);
12547 else
12548 ptr = &XEXP (*ptr, 1);
12549 new_insn = emit_insn_before (rpat, insn);
12550 RTX_FRAME_RELATED_P (new_insn) = 1;
12551 INSN_ADDRESSES_NEW (new_insn, -1);
12552 }
12553
12554 remove_insn (insn);
12555 continue;
12556 }
12557
12558 if (cfun_frame_layout.first_restore_gpr == -1
12559 && GET_CODE (pat) == SET
12560 && GENERAL_REG_P (SET_DEST (pat))
12561 && GET_CODE (SET_SRC (pat)) == MEM)
12562 {
12563 set = pat;
12564 first = REGNO (SET_DEST (set));
12565 offset = const0_rtx;
12566 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12567 off = INTVAL (offset);
12568
12569 if (GET_CODE (base) != REG || off < 0)
12570 continue;
12571
12572 if (REGNO (base) != STACK_POINTER_REGNUM
12573 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12574 continue;
12575
12576 remove_insn (insn);
12577 continue;
12578 }
12579 }
12580 }
12581
12582 /* On z10 and later the dynamic branch prediction must see the
12583 backward jump within a certain windows. If not it falls back to
12584 the static prediction. This function rearranges the loop backward
12585 branch in a way which makes the static prediction always correct.
12586 The function returns true if it added an instruction. */
12587 static bool
12588 s390_fix_long_loop_prediction (rtx_insn *insn)
12589 {
12590 rtx set = single_set (insn);
12591 rtx code_label, label_ref, new_label;
12592 rtx_insn *uncond_jump;
12593 rtx_insn *cur_insn;
12594 rtx tmp;
12595 int distance;
12596
12597 /* This will exclude branch on count and branch on index patterns
12598 since these are correctly statically predicted. */
12599 if (!set
12600 || SET_DEST (set) != pc_rtx
12601 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
12602 return false;
12603
12604 /* Skip conditional returns. */
12605 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
12606 && XEXP (SET_SRC (set), 2) == pc_rtx)
12607 return false;
12608
12609 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
12610 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
12611
12612 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
12613
12614 code_label = XEXP (label_ref, 0);
12615
12616 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
12617 || INSN_ADDRESSES (INSN_UID (insn)) == -1
12618 || (INSN_ADDRESSES (INSN_UID (insn))
12619 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
12620 return false;
12621
12622 for (distance = 0, cur_insn = PREV_INSN (insn);
12623 distance < PREDICT_DISTANCE - 6;
12624 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
12625 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
12626 return false;
12627
12628 new_label = gen_label_rtx ();
12629 uncond_jump = emit_jump_insn_after (
12630 gen_rtx_SET (pc_rtx,
12631 gen_rtx_LABEL_REF (VOIDmode, code_label)),
12632 insn);
12633 emit_label_after (new_label, uncond_jump);
12634
12635 tmp = XEXP (SET_SRC (set), 1);
12636 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
12637 XEXP (SET_SRC (set), 2) = tmp;
12638 INSN_CODE (insn) = -1;
12639
12640 XEXP (label_ref, 0) = new_label;
12641 JUMP_LABEL (insn) = new_label;
12642 JUMP_LABEL (uncond_jump) = code_label;
12643
12644 return true;
12645 }
12646
12647 /* Returns 1 if INSN reads the value of REG for purposes not related
12648 to addressing of memory, and 0 otherwise. */
12649 static int
12650 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
12651 {
12652 return reg_referenced_p (reg, PATTERN (insn))
12653 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
12654 }
12655
12656 /* Starting from INSN find_cond_jump looks downwards in the insn
12657 stream for a single jump insn which is the last user of the
12658 condition code set in INSN. */
12659 static rtx_insn *
12660 find_cond_jump (rtx_insn *insn)
12661 {
12662 for (; insn; insn = NEXT_INSN (insn))
12663 {
12664 rtx ite, cc;
12665
12666 if (LABEL_P (insn))
12667 break;
12668
12669 if (!JUMP_P (insn))
12670 {
12671 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
12672 break;
12673 continue;
12674 }
12675
12676 /* This will be triggered by a return. */
12677 if (GET_CODE (PATTERN (insn)) != SET)
12678 break;
12679
12680 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
12681 ite = SET_SRC (PATTERN (insn));
12682
12683 if (GET_CODE (ite) != IF_THEN_ELSE)
12684 break;
12685
12686 cc = XEXP (XEXP (ite, 0), 0);
12687 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
12688 break;
12689
12690 if (find_reg_note (insn, REG_DEAD, cc))
12691 return insn;
12692 break;
12693 }
12694
12695 return NULL;
12696 }
12697
12698 /* Swap the condition in COND and the operands in OP0 and OP1 so that
12699 the semantics does not change. If NULL_RTX is passed as COND the
12700 function tries to find the conditional jump starting with INSN. */
12701 static void
12702 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
12703 {
12704 rtx tmp = *op0;
12705
12706 if (cond == NULL_RTX)
12707 {
12708 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
12709 rtx set = jump ? single_set (jump) : NULL_RTX;
12710
12711 if (set == NULL_RTX)
12712 return;
12713
12714 cond = XEXP (SET_SRC (set), 0);
12715 }
12716
12717 *op0 = *op1;
12718 *op1 = tmp;
12719 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
12720 }
12721
12722 /* On z10, instructions of the compare-and-branch family have the
12723 property to access the register occurring as second operand with
12724 its bits complemented. If such a compare is grouped with a second
12725 instruction that accesses the same register non-complemented, and
12726 if that register's value is delivered via a bypass, then the
12727 pipeline recycles, thereby causing significant performance decline.
12728 This function locates such situations and exchanges the two
12729 operands of the compare. The function return true whenever it
12730 added an insn. */
12731 static bool
12732 s390_z10_optimize_cmp (rtx_insn *insn)
12733 {
12734 rtx_insn *prev_insn, *next_insn;
12735 bool insn_added_p = false;
12736 rtx cond, *op0, *op1;
12737
12738 if (GET_CODE (PATTERN (insn)) == PARALLEL)
12739 {
12740 /* Handle compare and branch and branch on count
12741 instructions. */
12742 rtx pattern = single_set (insn);
12743
12744 if (!pattern
12745 || SET_DEST (pattern) != pc_rtx
12746 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
12747 return false;
12748
12749 cond = XEXP (SET_SRC (pattern), 0);
12750 op0 = &XEXP (cond, 0);
12751 op1 = &XEXP (cond, 1);
12752 }
12753 else if (GET_CODE (PATTERN (insn)) == SET)
12754 {
12755 rtx src, dest;
12756
12757 /* Handle normal compare instructions. */
12758 src = SET_SRC (PATTERN (insn));
12759 dest = SET_DEST (PATTERN (insn));
12760
12761 if (!REG_P (dest)
12762 || !CC_REGNO_P (REGNO (dest))
12763 || GET_CODE (src) != COMPARE)
12764 return false;
12765
12766 /* s390_swap_cmp will try to find the conditional
12767 jump when passing NULL_RTX as condition. */
12768 cond = NULL_RTX;
12769 op0 = &XEXP (src, 0);
12770 op1 = &XEXP (src, 1);
12771 }
12772 else
12773 return false;
12774
12775 if (!REG_P (*op0) || !REG_P (*op1))
12776 return false;
12777
12778 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
12779 return false;
12780
12781 /* Swap the COMPARE arguments and its mask if there is a
12782 conflicting access in the previous insn. */
12783 prev_insn = prev_active_insn (insn);
12784 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12785 && reg_referenced_p (*op1, PATTERN (prev_insn)))
12786 s390_swap_cmp (cond, op0, op1, insn);
12787
12788 /* Check if there is a conflict with the next insn. If there
12789 was no conflict with the previous insn, then swap the
12790 COMPARE arguments and its mask. If we already swapped
12791 the operands, or if swapping them would cause a conflict
12792 with the previous insn, issue a NOP after the COMPARE in
12793 order to separate the two instuctions. */
12794 next_insn = next_active_insn (insn);
12795 if (next_insn != NULL_RTX && INSN_P (next_insn)
12796 && s390_non_addr_reg_read_p (*op1, next_insn))
12797 {
12798 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12799 && s390_non_addr_reg_read_p (*op0, prev_insn))
12800 {
12801 if (REGNO (*op1) == 0)
12802 emit_insn_after (gen_nop1 (), insn);
12803 else
12804 emit_insn_after (gen_nop (), insn);
12805 insn_added_p = true;
12806 }
12807 else
12808 s390_swap_cmp (cond, op0, op1, insn);
12809 }
12810 return insn_added_p;
12811 }
12812
12813 /* Perform machine-dependent processing. */
12814
12815 static void
12816 s390_reorg (void)
12817 {
12818 bool pool_overflow = false;
12819 int hw_before, hw_after;
12820
12821 /* Make sure all splits have been performed; splits after
12822 machine_dependent_reorg might confuse insn length counts. */
12823 split_all_insns_noflow ();
12824
12825 /* Install the main literal pool and the associated base
12826 register load insns.
12827
12828 In addition, there are two problematic situations we need
12829 to correct:
12830
12831 - the literal pool might be > 4096 bytes in size, so that
12832 some of its elements cannot be directly accessed
12833
12834 - a branch target might be > 64K away from the branch, so that
12835 it is not possible to use a PC-relative instruction.
12836
12837 To fix those, we split the single literal pool into multiple
12838 pool chunks, reloading the pool base register at various
12839 points throughout the function to ensure it always points to
12840 the pool chunk the following code expects, and / or replace
12841 PC-relative branches by absolute branches.
12842
12843 However, the two problems are interdependent: splitting the
12844 literal pool can move a branch further away from its target,
12845 causing the 64K limit to overflow, and on the other hand,
12846 replacing a PC-relative branch by an absolute branch means
12847 we need to put the branch target address into the literal
12848 pool, possibly causing it to overflow.
12849
12850 So, we loop trying to fix up both problems until we manage
12851 to satisfy both conditions at the same time. Note that the
12852 loop is guaranteed to terminate as every pass of the loop
12853 strictly decreases the total number of PC-relative branches
12854 in the function. (This is not completely true as there
12855 might be branch-over-pool insns introduced by chunkify_start.
12856 Those never need to be split however.) */
12857
12858 for (;;)
12859 {
12860 struct constant_pool *pool = NULL;
12861
12862 /* Collect the literal pool. */
12863 if (!pool_overflow)
12864 {
12865 pool = s390_mainpool_start ();
12866 if (!pool)
12867 pool_overflow = true;
12868 }
12869
12870 /* If literal pool overflowed, start to chunkify it. */
12871 if (pool_overflow)
12872 pool = s390_chunkify_start ();
12873
12874 /* Split out-of-range branches. If this has created new
12875 literal pool entries, cancel current chunk list and
12876 recompute it. zSeries machines have large branch
12877 instructions, so we never need to split a branch. */
12878 if (!TARGET_CPU_ZARCH && s390_split_branches ())
12879 {
12880 if (pool_overflow)
12881 s390_chunkify_cancel (pool);
12882 else
12883 s390_mainpool_cancel (pool);
12884
12885 continue;
12886 }
12887
12888 /* If we made it up to here, both conditions are satisfied.
12889 Finish up literal pool related changes. */
12890 if (pool_overflow)
12891 s390_chunkify_finish (pool);
12892 else
12893 s390_mainpool_finish (pool);
12894
12895 /* We're done splitting branches. */
12896 cfun->machine->split_branches_pending_p = false;
12897 break;
12898 }
12899
12900 /* Generate out-of-pool execute target insns. */
12901 if (TARGET_CPU_ZARCH)
12902 {
12903 rtx_insn *insn, *target;
12904 rtx label;
12905
12906 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12907 {
12908 label = s390_execute_label (insn);
12909 if (!label)
12910 continue;
12911
12912 gcc_assert (label != const0_rtx);
12913
12914 target = emit_label (XEXP (label, 0));
12915 INSN_ADDRESSES_NEW (target, -1);
12916
12917 target = emit_insn (s390_execute_target (insn));
12918 INSN_ADDRESSES_NEW (target, -1);
12919 }
12920 }
12921
12922 /* Try to optimize prologue and epilogue further. */
12923 s390_optimize_prologue ();
12924
12925 /* Walk over the insns and do some >=z10 specific changes. */
12926 if (s390_tune == PROCESSOR_2097_Z10
12927 || s390_tune == PROCESSOR_2817_Z196
12928 || s390_tune == PROCESSOR_2827_ZEC12
12929 || s390_tune == PROCESSOR_2964_Z13)
12930 {
12931 rtx_insn *insn;
12932 bool insn_added_p = false;
12933
12934 /* The insn lengths and addresses have to be up to date for the
12935 following manipulations. */
12936 shorten_branches (get_insns ());
12937
12938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12939 {
12940 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
12941 continue;
12942
12943 if (JUMP_P (insn))
12944 insn_added_p |= s390_fix_long_loop_prediction (insn);
12945
12946 if ((GET_CODE (PATTERN (insn)) == PARALLEL
12947 || GET_CODE (PATTERN (insn)) == SET)
12948 && s390_tune == PROCESSOR_2097_Z10)
12949 insn_added_p |= s390_z10_optimize_cmp (insn);
12950 }
12951
12952 /* Adjust branches if we added new instructions. */
12953 if (insn_added_p)
12954 shorten_branches (get_insns ());
12955 }
12956
12957 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
12958 if (hw_after > 0)
12959 {
12960 rtx_insn *insn;
12961
12962 /* Insert NOPs for hotpatching. */
12963 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12964 /* Emit NOPs
12965 1. inside the area covered by debug information to allow setting
12966 breakpoints at the NOPs,
12967 2. before any insn which results in an asm instruction,
12968 3. before in-function labels to avoid jumping to the NOPs, for
12969 example as part of a loop,
12970 4. before any barrier in case the function is completely empty
12971 (__builtin_unreachable ()) and has neither internal labels nor
12972 active insns.
12973 */
12974 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
12975 break;
12976 /* Output a series of NOPs before the first active insn. */
12977 while (insn && hw_after > 0)
12978 {
12979 if (hw_after >= 3 && TARGET_CPU_ZARCH)
12980 {
12981 emit_insn_before (gen_nop_6_byte (), insn);
12982 hw_after -= 3;
12983 }
12984 else if (hw_after >= 2)
12985 {
12986 emit_insn_before (gen_nop_4_byte (), insn);
12987 hw_after -= 2;
12988 }
12989 else
12990 {
12991 emit_insn_before (gen_nop_2_byte (), insn);
12992 hw_after -= 1;
12993 }
12994 }
12995 }
12996 }
12997
12998 /* Return true if INSN is a fp load insn writing register REGNO. */
12999 static inline bool
13000 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13001 {
13002 rtx set;
13003 enum attr_type flag = s390_safe_attr_type (insn);
13004
13005 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13006 return false;
13007
13008 set = single_set (insn);
13009
13010 if (set == NULL_RTX)
13011 return false;
13012
13013 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13014 return false;
13015
13016 if (REGNO (SET_DEST (set)) != regno)
13017 return false;
13018
13019 return true;
13020 }
13021
13022 /* This value describes the distance to be avoided between an
13023 aritmetic fp instruction and an fp load writing the same register.
13024 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13025 fine but the exact value has to be avoided. Otherwise the FP
13026 pipeline will throw an exception causing a major penalty. */
13027 #define Z10_EARLYLOAD_DISTANCE 7
13028
13029 /* Rearrange the ready list in order to avoid the situation described
13030 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13031 moved to the very end of the ready list. */
13032 static void
13033 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13034 {
13035 unsigned int regno;
13036 int nready = *nready_p;
13037 rtx_insn *tmp;
13038 int i;
13039 rtx_insn *insn;
13040 rtx set;
13041 enum attr_type flag;
13042 int distance;
13043
13044 /* Skip DISTANCE - 1 active insns. */
13045 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13046 distance > 0 && insn != NULL_RTX;
13047 distance--, insn = prev_active_insn (insn))
13048 if (CALL_P (insn) || JUMP_P (insn))
13049 return;
13050
13051 if (insn == NULL_RTX)
13052 return;
13053
13054 set = single_set (insn);
13055
13056 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13057 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13058 return;
13059
13060 flag = s390_safe_attr_type (insn);
13061
13062 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13063 return;
13064
13065 regno = REGNO (SET_DEST (set));
13066 i = nready - 1;
13067
13068 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13069 i--;
13070
13071 if (!i)
13072 return;
13073
13074 tmp = ready[i];
13075 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13076 ready[0] = tmp;
13077 }
13078
13079
13080 /* The s390_sched_state variable tracks the state of the current or
13081 the last instruction group.
13082
13083 0,1,2 number of instructions scheduled in the current group
13084 3 the last group is complete - normal insns
13085 4 the last group was a cracked/expanded insn */
13086
13087 static int s390_sched_state;
13088
13089 #define S390_OOO_SCHED_STATE_NORMAL 3
13090 #define S390_OOO_SCHED_STATE_CRACKED 4
13091
13092 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
13093 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
13094 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
13095 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
13096
13097 static unsigned int
13098 s390_get_sched_attrmask (rtx_insn *insn)
13099 {
13100 unsigned int mask = 0;
13101
13102 if (get_attr_ooo_cracked (insn))
13103 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
13104 if (get_attr_ooo_expanded (insn))
13105 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
13106 if (get_attr_ooo_endgroup (insn))
13107 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
13108 if (get_attr_ooo_groupalone (insn))
13109 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
13110 return mask;
13111 }
13112
13113 /* Return the scheduling score for INSN. The higher the score the
13114 better. The score is calculated from the OOO scheduling attributes
13115 of INSN and the scheduling state s390_sched_state. */
13116 static int
13117 s390_sched_score (rtx_insn *insn)
13118 {
13119 unsigned int mask = s390_get_sched_attrmask (insn);
13120 int score = 0;
13121
13122 switch (s390_sched_state)
13123 {
13124 case 0:
13125 /* Try to put insns into the first slot which would otherwise
13126 break a group. */
13127 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13128 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13129 score += 5;
13130 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13131 score += 10;
13132 case 1:
13133 /* Prefer not cracked insns while trying to put together a
13134 group. */
13135 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13136 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13137 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13138 score += 10;
13139 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
13140 score += 5;
13141 break;
13142 case 2:
13143 /* Prefer not cracked insns while trying to put together a
13144 group. */
13145 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13146 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13147 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13148 score += 10;
13149 /* Prefer endgroup insns in the last slot. */
13150 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
13151 score += 10;
13152 break;
13153 case S390_OOO_SCHED_STATE_NORMAL:
13154 /* Prefer not cracked insns if the last was not cracked. */
13155 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13156 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
13157 score += 5;
13158 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13159 score += 10;
13160 break;
13161 case S390_OOO_SCHED_STATE_CRACKED:
13162 /* Try to keep cracked insns together to prevent them from
13163 interrupting groups. */
13164 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13165 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13166 score += 5;
13167 break;
13168 }
13169 return score;
13170 }
13171
13172 /* This function is called via hook TARGET_SCHED_REORDER before
13173 issuing one insn from list READY which contains *NREADYP entries.
13174 For target z10 it reorders load instructions to avoid early load
13175 conflicts in the floating point pipeline */
13176 static int
13177 s390_sched_reorder (FILE *file, int verbose,
13178 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13179 {
13180 if (s390_tune == PROCESSOR_2097_Z10)
13181 if (reload_completed && *nreadyp > 1)
13182 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13183
13184 if ((s390_tune == PROCESSOR_2827_ZEC12
13185 || s390_tune == PROCESSOR_2964_Z13)
13186 && reload_completed
13187 && *nreadyp > 1)
13188 {
13189 int i;
13190 int last_index = *nreadyp - 1;
13191 int max_index = -1;
13192 int max_score = -1;
13193 rtx_insn *tmp;
13194
13195 /* Just move the insn with the highest score to the top (the
13196 end) of the list. A full sort is not needed since a conflict
13197 in the hazard recognition cannot happen. So the top insn in
13198 the ready list will always be taken. */
13199 for (i = last_index; i >= 0; i--)
13200 {
13201 int score;
13202
13203 if (recog_memoized (ready[i]) < 0)
13204 continue;
13205
13206 score = s390_sched_score (ready[i]);
13207 if (score > max_score)
13208 {
13209 max_score = score;
13210 max_index = i;
13211 }
13212 }
13213
13214 if (max_index != -1)
13215 {
13216 if (max_index != last_index)
13217 {
13218 tmp = ready[max_index];
13219 ready[max_index] = ready[last_index];
13220 ready[last_index] = tmp;
13221
13222 if (verbose > 5)
13223 fprintf (file,
13224 "move insn %d to the top of list\n",
13225 INSN_UID (ready[last_index]));
13226 }
13227 else if (verbose > 5)
13228 fprintf (file,
13229 "best insn %d already on top\n",
13230 INSN_UID (ready[last_index]));
13231 }
13232
13233 if (verbose > 5)
13234 {
13235 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13236 s390_sched_state);
13237
13238 for (i = last_index; i >= 0; i--)
13239 {
13240 if (recog_memoized (ready[i]) < 0)
13241 continue;
13242 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
13243 s390_sched_score (ready[i]));
13244 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
13245 PRINT_OOO_ATTR (ooo_cracked);
13246 PRINT_OOO_ATTR (ooo_expanded);
13247 PRINT_OOO_ATTR (ooo_endgroup);
13248 PRINT_OOO_ATTR (ooo_groupalone);
13249 #undef PRINT_OOO_ATTR
13250 fprintf (file, "\n");
13251 }
13252 }
13253 }
13254
13255 return s390_issue_rate ();
13256 }
13257
13258
13259 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13260 the scheduler has issued INSN. It stores the last issued insn into
13261 last_scheduled_insn in order to make it available for
13262 s390_sched_reorder. */
13263 static int
13264 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13265 {
13266 last_scheduled_insn = insn;
13267
13268 if ((s390_tune == PROCESSOR_2827_ZEC12
13269 || s390_tune == PROCESSOR_2964_Z13)
13270 && reload_completed
13271 && recog_memoized (insn) >= 0)
13272 {
13273 unsigned int mask = s390_get_sched_attrmask (insn);
13274
13275 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13276 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13277 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
13278 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
13279 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13280 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13281 else
13282 {
13283 /* Only normal insns are left (mask == 0). */
13284 switch (s390_sched_state)
13285 {
13286 case 0:
13287 case 1:
13288 case 2:
13289 case S390_OOO_SCHED_STATE_NORMAL:
13290 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
13291 s390_sched_state = 1;
13292 else
13293 s390_sched_state++;
13294
13295 break;
13296 case S390_OOO_SCHED_STATE_CRACKED:
13297 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13298 break;
13299 }
13300 }
13301 if (verbose > 5)
13302 {
13303 fprintf (file, "insn %d: ", INSN_UID (insn));
13304 #define PRINT_OOO_ATTR(ATTR) \
13305 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
13306 PRINT_OOO_ATTR (ooo_cracked);
13307 PRINT_OOO_ATTR (ooo_expanded);
13308 PRINT_OOO_ATTR (ooo_endgroup);
13309 PRINT_OOO_ATTR (ooo_groupalone);
13310 #undef PRINT_OOO_ATTR
13311 fprintf (file, "\n");
13312 fprintf (file, "sched state: %d\n", s390_sched_state);
13313 }
13314 }
13315
13316 if (GET_CODE (PATTERN (insn)) != USE
13317 && GET_CODE (PATTERN (insn)) != CLOBBER)
13318 return more - 1;
13319 else
13320 return more;
13321 }
13322
13323 static void
13324 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
13325 int verbose ATTRIBUTE_UNUSED,
13326 int max_ready ATTRIBUTE_UNUSED)
13327 {
13328 last_scheduled_insn = NULL;
13329 s390_sched_state = 0;
13330 }
13331
13332 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
13333 a new number struct loop *loop should be unrolled if tuned for cpus with
13334 a built-in stride prefetcher.
13335 The loop is analyzed for memory accesses by calling check_dpu for
13336 each rtx of the loop. Depending on the loop_depth and the amount of
13337 memory accesses a new number <=nunroll is returned to improve the
13338 behaviour of the hardware prefetch unit. */
13339 static unsigned
13340 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
13341 {
13342 basic_block *bbs;
13343 rtx_insn *insn;
13344 unsigned i;
13345 unsigned mem_count = 0;
13346
13347 if (s390_tune != PROCESSOR_2097_Z10
13348 && s390_tune != PROCESSOR_2817_Z196
13349 && s390_tune != PROCESSOR_2827_ZEC12
13350 && s390_tune != PROCESSOR_2964_Z13)
13351 return nunroll;
13352
13353 /* Count the number of memory references within the loop body. */
13354 bbs = get_loop_body (loop);
13355 subrtx_iterator::array_type array;
13356 for (i = 0; i < loop->num_nodes; i++)
13357 FOR_BB_INSNS (bbs[i], insn)
13358 if (INSN_P (insn) && INSN_CODE (insn) != -1)
13359 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13360 if (MEM_P (*iter))
13361 mem_count += 1;
13362 free (bbs);
13363
13364 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
13365 if (mem_count == 0)
13366 return nunroll;
13367
13368 switch (loop_depth(loop))
13369 {
13370 case 1:
13371 return MIN (nunroll, 28 / mem_count);
13372 case 2:
13373 return MIN (nunroll, 22 / mem_count);
13374 default:
13375 return MIN (nunroll, 16 / mem_count);
13376 }
13377 }
13378
13379 static void
13380 s390_option_override (void)
13381 {
13382 unsigned int i;
13383 cl_deferred_option *opt;
13384 vec<cl_deferred_option> *v =
13385 (vec<cl_deferred_option> *) s390_deferred_options;
13386
13387 if (v)
13388 FOR_EACH_VEC_ELT (*v, i, opt)
13389 {
13390 switch (opt->opt_index)
13391 {
13392 case OPT_mhotpatch_:
13393 {
13394 int val1;
13395 int val2;
13396 char s[256];
13397 char *t;
13398
13399 strncpy (s, opt->arg, 256);
13400 s[255] = 0;
13401 t = strchr (s, ',');
13402 if (t != NULL)
13403 {
13404 *t = 0;
13405 t++;
13406 val1 = integral_argument (s);
13407 val2 = integral_argument (t);
13408 }
13409 else
13410 {
13411 val1 = -1;
13412 val2 = -1;
13413 }
13414 if (val1 == -1 || val2 == -1)
13415 {
13416 /* argument is not a plain number */
13417 error ("arguments to %qs should be non-negative integers",
13418 "-mhotpatch=n,m");
13419 break;
13420 }
13421 else if (val1 > s390_hotpatch_hw_max
13422 || val2 > s390_hotpatch_hw_max)
13423 {
13424 error ("argument to %qs is too large (max. %d)",
13425 "-mhotpatch=n,m", s390_hotpatch_hw_max);
13426 break;
13427 }
13428 s390_hotpatch_hw_before_label = val1;
13429 s390_hotpatch_hw_after_label = val2;
13430 break;
13431 }
13432 default:
13433 gcc_unreachable ();
13434 }
13435 }
13436
13437 /* Set up function hooks. */
13438 init_machine_status = s390_init_machine_status;
13439
13440 /* Architecture mode defaults according to ABI. */
13441 if (!(target_flags_explicit & MASK_ZARCH))
13442 {
13443 if (TARGET_64BIT)
13444 target_flags |= MASK_ZARCH;
13445 else
13446 target_flags &= ~MASK_ZARCH;
13447 }
13448
13449 /* Set the march default in case it hasn't been specified on
13450 cmdline. */
13451 if (s390_arch == PROCESSOR_max)
13452 {
13453 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
13454 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
13455 s390_arch_flags = processor_flags_table[(int)s390_arch];
13456 }
13457
13458 /* Determine processor to tune for. */
13459 if (s390_tune == PROCESSOR_max)
13460 {
13461 s390_tune = s390_arch;
13462 s390_tune_flags = s390_arch_flags;
13463 }
13464
13465 /* Sanity checks. */
13466 if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE)
13467 gcc_unreachable ();
13468 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
13469 error ("z/Architecture mode not supported on %s", s390_arch_string);
13470 if (TARGET_64BIT && !TARGET_ZARCH)
13471 error ("64-bit ABI not supported in ESA/390 mode");
13472
13473 /* Use hardware DFP if available and not explicitly disabled by
13474 user. E.g. with -m31 -march=z10 -mzarch */
13475 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
13476 target_flags |= MASK_HARD_DFP;
13477
13478 /* Enable hardware transactions if available and not explicitly
13479 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
13480 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
13481 target_flags |= MASK_OPT_HTM;
13482
13483 if (target_flags_explicit & MASK_OPT_VX)
13484 {
13485 if (TARGET_OPT_VX)
13486 {
13487 if (!TARGET_CPU_VX)
13488 error ("hardware vector support not available on %s",
13489 s390_arch_string);
13490 if (TARGET_SOFT_FLOAT)
13491 error ("hardware vector support not available with -msoft-float");
13492 }
13493 }
13494 else if (TARGET_CPU_VX)
13495 /* Enable vector support if available and not explicitly disabled
13496 by user. E.g. with -m31 -march=z13 -mzarch */
13497 target_flags |= MASK_OPT_VX;
13498
13499 if (TARGET_HARD_DFP && !TARGET_DFP)
13500 {
13501 if (target_flags_explicit & MASK_HARD_DFP)
13502 {
13503 if (!TARGET_CPU_DFP)
13504 error ("hardware decimal floating point instructions"
13505 " not available on %s", s390_arch_string);
13506 if (!TARGET_ZARCH)
13507 error ("hardware decimal floating point instructions"
13508 " not available in ESA/390 mode");
13509 }
13510 else
13511 target_flags &= ~MASK_HARD_DFP;
13512 }
13513
13514 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
13515 {
13516 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
13517 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13518
13519 target_flags &= ~MASK_HARD_DFP;
13520 }
13521
13522 /* Set processor cost function. */
13523 switch (s390_tune)
13524 {
13525 case PROCESSOR_2084_Z990:
13526 s390_cost = &z990_cost;
13527 break;
13528 case PROCESSOR_2094_Z9_109:
13529 s390_cost = &z9_109_cost;
13530 break;
13531 case PROCESSOR_2097_Z10:
13532 s390_cost = &z10_cost;
13533 break;
13534 case PROCESSOR_2817_Z196:
13535 s390_cost = &z196_cost;
13536 break;
13537 case PROCESSOR_2827_ZEC12:
13538 case PROCESSOR_2964_Z13:
13539 s390_cost = &zEC12_cost;
13540 break;
13541 default:
13542 s390_cost = &z900_cost;
13543 }
13544
13545 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
13546 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13547 "in combination");
13548
13549 if (s390_stack_size)
13550 {
13551 if (s390_stack_guard >= s390_stack_size)
13552 error ("stack size must be greater than the stack guard value");
13553 else if (s390_stack_size > 1 << 16)
13554 error ("stack size must not be greater than 64k");
13555 }
13556 else if (s390_stack_guard)
13557 error ("-mstack-guard implies use of -mstack-size");
13558
13559 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
13560 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
13561 target_flags |= MASK_LONG_DOUBLE_128;
13562 #endif
13563
13564 if (s390_tune == PROCESSOR_2097_Z10
13565 || s390_tune == PROCESSOR_2817_Z196
13566 || s390_tune == PROCESSOR_2827_ZEC12
13567 || s390_tune == PROCESSOR_2964_Z13)
13568 {
13569 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
13570 global_options.x_param_values,
13571 global_options_set.x_param_values);
13572 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
13573 global_options.x_param_values,
13574 global_options_set.x_param_values);
13575 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
13576 global_options.x_param_values,
13577 global_options_set.x_param_values);
13578 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
13579 global_options.x_param_values,
13580 global_options_set.x_param_values);
13581 }
13582
13583 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
13584 global_options.x_param_values,
13585 global_options_set.x_param_values);
13586 /* values for loop prefetching */
13587 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
13588 global_options.x_param_values,
13589 global_options_set.x_param_values);
13590 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
13591 global_options.x_param_values,
13592 global_options_set.x_param_values);
13593 /* s390 has more than 2 levels and the size is much larger. Since
13594 we are always running virtualized assume that we only get a small
13595 part of the caches above l1. */
13596 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
13597 global_options.x_param_values,
13598 global_options_set.x_param_values);
13599 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
13600 global_options.x_param_values,
13601 global_options_set.x_param_values);
13602 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
13603 global_options.x_param_values,
13604 global_options_set.x_param_values);
13605
13606 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
13607 requires the arch flags to be evaluated already. Since prefetching
13608 is beneficial on s390, we enable it if available. */
13609 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
13610 flag_prefetch_loop_arrays = 1;
13611
13612 /* Use the alternative scheduling-pressure algorithm by default. */
13613 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
13614 global_options.x_param_values,
13615 global_options_set.x_param_values);
13616
13617 if (TARGET_TPF)
13618 {
13619 /* Don't emit DWARF3/4 unless specifically selected. The TPF
13620 debuggers do not yet support DWARF 3/4. */
13621 if (!global_options_set.x_dwarf_strict)
13622 dwarf_strict = 1;
13623 if (!global_options_set.x_dwarf_version)
13624 dwarf_version = 2;
13625 }
13626
13627 /* Register a target-specific optimization-and-lowering pass
13628 to run immediately before prologue and epilogue generation.
13629
13630 Registering the pass must be done at start up. It's
13631 convenient to do it here. */
13632 opt_pass *new_pass = new pass_s390_early_mach (g);
13633 struct register_pass_info insert_pass_s390_early_mach =
13634 {
13635 new_pass, /* pass */
13636 "pro_and_epilogue", /* reference_pass_name */
13637 1, /* ref_pass_instance_number */
13638 PASS_POS_INSERT_BEFORE /* po_op */
13639 };
13640 register_pass (&insert_pass_s390_early_mach);
13641 }
13642
13643 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13644
13645 static bool
13646 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13647 unsigned int align ATTRIBUTE_UNUSED,
13648 enum by_pieces_operation op ATTRIBUTE_UNUSED,
13649 bool speed_p ATTRIBUTE_UNUSED)
13650 {
13651 return (size == 1 || size == 2
13652 || size == 4 || (TARGET_ZARCH && size == 8));
13653 }
13654
13655 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13656
13657 static void
13658 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13659 {
13660 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
13661 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
13662 tree call_efpc = build_call_expr (efpc, 0);
13663 tree fenv_var = create_tmp_var (unsigned_type_node);
13664
13665 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
13666 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
13667 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
13668 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
13669 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
13670 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
13671
13672 /* Generates the equivalent of feholdexcept (&fenv_var)
13673
13674 fenv_var = __builtin_s390_efpc ();
13675 __builtin_s390_sfpc (fenv_var & mask) */
13676 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
13677 tree new_fpc =
13678 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13679 build_int_cst (unsigned_type_node,
13680 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
13681 FPC_EXCEPTION_MASK)));
13682 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
13683 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
13684
13685 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
13686
13687 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
13688 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
13689 build_int_cst (unsigned_type_node,
13690 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
13691 *clear = build_call_expr (sfpc, 1, new_fpc);
13692
13693 /* Generates the equivalent of feupdateenv (fenv_var)
13694
13695 old_fpc = __builtin_s390_efpc ();
13696 __builtin_s390_sfpc (fenv_var);
13697 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
13698
13699 old_fpc = create_tmp_var (unsigned_type_node);
13700 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
13701 old_fpc, call_efpc);
13702
13703 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
13704
13705 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
13706 build_int_cst (unsigned_type_node,
13707 FPC_FLAGS_MASK));
13708 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
13709 build_int_cst (unsigned_type_node,
13710 FPC_FLAGS_SHIFT));
13711 tree atomic_feraiseexcept
13712 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13713 raise_old_except = build_call_expr (atomic_feraiseexcept,
13714 1, raise_old_except);
13715
13716 *update = build2 (COMPOUND_EXPR, void_type_node,
13717 build2 (COMPOUND_EXPR, void_type_node,
13718 store_old_fpc, set_new_fpc),
13719 raise_old_except);
13720
13721 #undef FPC_EXCEPTION_MASK
13722 #undef FPC_FLAGS_MASK
13723 #undef FPC_DXC_MASK
13724 #undef FPC_EXCEPTION_MASK_SHIFT
13725 #undef FPC_FLAGS_SHIFT
13726 #undef FPC_DXC_SHIFT
13727 }
13728
13729 /* Return the vector mode to be used for inner mode MODE when doing
13730 vectorization. */
13731 static machine_mode
13732 s390_preferred_simd_mode (machine_mode mode)
13733 {
13734 if (TARGET_VX)
13735 switch (mode)
13736 {
13737 case DFmode:
13738 return V2DFmode;
13739 case DImode:
13740 return V2DImode;
13741 case SImode:
13742 return V4SImode;
13743 case HImode:
13744 return V8HImode;
13745 case QImode:
13746 return V16QImode;
13747 default:;
13748 }
13749 return word_mode;
13750 }
13751
13752 /* Our hardware does not require vectors to be strictly aligned. */
13753 static bool
13754 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
13755 const_tree type ATTRIBUTE_UNUSED,
13756 int misalignment ATTRIBUTE_UNUSED,
13757 bool is_packed ATTRIBUTE_UNUSED)
13758 {
13759 if (TARGET_VX)
13760 return true;
13761
13762 return default_builtin_support_vector_misalignment (mode, type, misalignment,
13763 is_packed);
13764 }
13765
13766 /* The vector ABI requires vector types to be aligned on an 8 byte
13767 boundary (our stack alignment). However, we allow this to be
13768 overriden by the user, while this definitely breaks the ABI. */
13769 static HOST_WIDE_INT
13770 s390_vector_alignment (const_tree type)
13771 {
13772 if (!TARGET_VX_ABI)
13773 return default_vector_alignment (type);
13774
13775 if (TYPE_USER_ALIGN (type))
13776 return TYPE_ALIGN (type);
13777
13778 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
13779 }
13780
13781 /* Implement TARGET_ASM_FILE_END. */
13782 static void
13783 s390_asm_file_end (void)
13784 {
13785 #ifdef HAVE_AS_GNU_ATTRIBUTE
13786 varpool_node *vnode;
13787 cgraph_node *cnode;
13788
13789 FOR_EACH_VARIABLE (vnode)
13790 if (TREE_PUBLIC (vnode->decl))
13791 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
13792
13793 FOR_EACH_FUNCTION (cnode)
13794 if (TREE_PUBLIC (cnode->decl))
13795 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
13796
13797
13798 if (s390_vector_abi != 0)
13799 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
13800 s390_vector_abi);
13801 #endif
13802 file_end_indicate_exec_stack ();
13803 }
13804
13805 /* Return true if TYPE is a vector bool type. */
13806 static inline bool
13807 s390_vector_bool_type_p (const_tree type)
13808 {
13809 return TYPE_VECTOR_OPAQUE (type);
13810 }
13811
13812 /* Return the diagnostic message string if the binary operation OP is
13813 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13814 static const char*
13815 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
13816 {
13817 bool bool1_p, bool2_p;
13818 bool plusminus_p;
13819 bool muldiv_p;
13820 bool compare_p;
13821 machine_mode mode1, mode2;
13822
13823 if (!TARGET_ZVECTOR)
13824 return NULL;
13825
13826 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
13827 return NULL;
13828
13829 bool1_p = s390_vector_bool_type_p (type1);
13830 bool2_p = s390_vector_bool_type_p (type2);
13831
13832 /* Mixing signed and unsigned types is forbidden for all
13833 operators. */
13834 if (!bool1_p && !bool2_p
13835 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
13836 return N_("types differ in signess");
13837
13838 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
13839 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
13840 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
13841 || op == ROUND_DIV_EXPR);
13842 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
13843 || op == EQ_EXPR || op == NE_EXPR);
13844
13845 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
13846 return N_("binary operator does not support two vector bool operands");
13847
13848 if (bool1_p != bool2_p && (muldiv_p || compare_p))
13849 return N_("binary operator does not support vector bool operand");
13850
13851 mode1 = TYPE_MODE (type1);
13852 mode2 = TYPE_MODE (type2);
13853
13854 if (bool1_p != bool2_p && plusminus_p
13855 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
13856 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
13857 return N_("binary operator does not support mixing vector "
13858 "bool with floating point vector operands");
13859
13860 return NULL;
13861 }
13862
13863 /* Initialize GCC target structure. */
13864
13865 #undef TARGET_ASM_ALIGNED_HI_OP
13866 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
13867 #undef TARGET_ASM_ALIGNED_DI_OP
13868 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
13869 #undef TARGET_ASM_INTEGER
13870 #define TARGET_ASM_INTEGER s390_assemble_integer
13871
13872 #undef TARGET_ASM_OPEN_PAREN
13873 #define TARGET_ASM_OPEN_PAREN ""
13874
13875 #undef TARGET_ASM_CLOSE_PAREN
13876 #define TARGET_ASM_CLOSE_PAREN ""
13877
13878 #undef TARGET_OPTION_OVERRIDE
13879 #define TARGET_OPTION_OVERRIDE s390_option_override
13880
13881 #undef TARGET_ENCODE_SECTION_INFO
13882 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
13883
13884 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13885 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13886
13887 #ifdef HAVE_AS_TLS
13888 #undef TARGET_HAVE_TLS
13889 #define TARGET_HAVE_TLS true
13890 #endif
13891 #undef TARGET_CANNOT_FORCE_CONST_MEM
13892 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
13893
13894 #undef TARGET_DELEGITIMIZE_ADDRESS
13895 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
13896
13897 #undef TARGET_LEGITIMIZE_ADDRESS
13898 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
13899
13900 #undef TARGET_RETURN_IN_MEMORY
13901 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
13902
13903 #undef TARGET_INIT_BUILTINS
13904 #define TARGET_INIT_BUILTINS s390_init_builtins
13905 #undef TARGET_EXPAND_BUILTIN
13906 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
13907 #undef TARGET_BUILTIN_DECL
13908 #define TARGET_BUILTIN_DECL s390_builtin_decl
13909
13910 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
13911 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
13912
13913 #undef TARGET_ASM_OUTPUT_MI_THUNK
13914 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
13915 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13916 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
13917
13918 #undef TARGET_SCHED_ADJUST_PRIORITY
13919 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
13920 #undef TARGET_SCHED_ISSUE_RATE
13921 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
13922 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13923 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
13924
13925 #undef TARGET_SCHED_VARIABLE_ISSUE
13926 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
13927 #undef TARGET_SCHED_REORDER
13928 #define TARGET_SCHED_REORDER s390_sched_reorder
13929 #undef TARGET_SCHED_INIT
13930 #define TARGET_SCHED_INIT s390_sched_init
13931
13932 #undef TARGET_CANNOT_COPY_INSN_P
13933 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
13934 #undef TARGET_RTX_COSTS
13935 #define TARGET_RTX_COSTS s390_rtx_costs
13936 #undef TARGET_ADDRESS_COST
13937 #define TARGET_ADDRESS_COST s390_address_cost
13938 #undef TARGET_REGISTER_MOVE_COST
13939 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
13940 #undef TARGET_MEMORY_MOVE_COST
13941 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
13942
13943 #undef TARGET_MACHINE_DEPENDENT_REORG
13944 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
13945
13946 #undef TARGET_VALID_POINTER_MODE
13947 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
13948
13949 #undef TARGET_BUILD_BUILTIN_VA_LIST
13950 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
13951 #undef TARGET_EXPAND_BUILTIN_VA_START
13952 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
13953 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13954 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
13955
13956 #undef TARGET_PROMOTE_FUNCTION_MODE
13957 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
13958 #undef TARGET_PASS_BY_REFERENCE
13959 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
13960
13961 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13962 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
13963 #undef TARGET_FUNCTION_ARG
13964 #define TARGET_FUNCTION_ARG s390_function_arg
13965 #undef TARGET_FUNCTION_ARG_ADVANCE
13966 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
13967 #undef TARGET_FUNCTION_VALUE
13968 #define TARGET_FUNCTION_VALUE s390_function_value
13969 #undef TARGET_LIBCALL_VALUE
13970 #define TARGET_LIBCALL_VALUE s390_libcall_value
13971 #undef TARGET_STRICT_ARGUMENT_NAMING
13972 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
13973
13974 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
13975 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
13976
13977 #undef TARGET_FIXED_CONDITION_CODE_REGS
13978 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
13979
13980 #undef TARGET_CC_MODES_COMPATIBLE
13981 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
13982
13983 #undef TARGET_INVALID_WITHIN_DOLOOP
13984 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
13985
13986 #ifdef HAVE_AS_TLS
13987 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
13988 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
13989 #endif
13990
13991 #undef TARGET_DWARF_FRAME_REG_MODE
13992 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
13993
13994 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
13995 #undef TARGET_MANGLE_TYPE
13996 #define TARGET_MANGLE_TYPE s390_mangle_type
13997 #endif
13998
13999 #undef TARGET_SCALAR_MODE_SUPPORTED_P
14000 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
14001
14002 #undef TARGET_VECTOR_MODE_SUPPORTED_P
14003 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
14004
14005 #undef TARGET_PREFERRED_RELOAD_CLASS
14006 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
14007
14008 #undef TARGET_SECONDARY_RELOAD
14009 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
14010
14011 #undef TARGET_LIBGCC_CMP_RETURN_MODE
14012 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
14013
14014 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
14015 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
14016
14017 #undef TARGET_LEGITIMATE_ADDRESS_P
14018 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
14019
14020 #undef TARGET_LEGITIMATE_CONSTANT_P
14021 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
14022
14023 #undef TARGET_LRA_P
14024 #define TARGET_LRA_P s390_lra_p
14025
14026 #undef TARGET_CAN_ELIMINATE
14027 #define TARGET_CAN_ELIMINATE s390_can_eliminate
14028
14029 #undef TARGET_CONDITIONAL_REGISTER_USAGE
14030 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
14031
14032 #undef TARGET_LOOP_UNROLL_ADJUST
14033 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
14034
14035 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14036 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
14037 #undef TARGET_TRAMPOLINE_INIT
14038 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
14039
14040 #undef TARGET_UNWIND_WORD_MODE
14041 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
14042
14043 #undef TARGET_CANONICALIZE_COMPARISON
14044 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
14045
14046 #undef TARGET_HARD_REGNO_SCRATCH_OK
14047 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
14048
14049 #undef TARGET_ATTRIBUTE_TABLE
14050 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
14051
14052 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
14053 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
14054
14055 #undef TARGET_SET_UP_BY_PROLOGUE
14056 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
14057
14058 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14059 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14060 s390_use_by_pieces_infrastructure_p
14061
14062 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14063 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
14064
14065 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
14066 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
14067
14068 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14069 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
14070
14071 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
14072 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
14073
14074 #undef TARGET_VECTOR_ALIGNMENT
14075 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
14076
14077 #undef TARGET_INVALID_BINARY_OP
14078 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
14079
14080 #undef TARGET_ASM_FILE_END
14081 #define TARGET_ASM_FILE_END s390_asm_file_end
14082
14083 struct gcc_target targetm = TARGET_INITIALIZER;
14084
14085 #include "gt-s390.h"