S/390: Fix PR89775. Stackpointer save/restore instructions removed
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2019 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "params.h"
77 #include "opts.h"
78 #include "tree-pass.h"
79 #include "context.h"
80 #include "builtins.h"
81 #include "rtl-iter.h"
82 #include "intl.h"
83 #include "tm-constrs.h"
84 #include "tree-vrp.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "sched-int.h"
89
90 /* This file should be included last. */
91 #include "target-def.h"
92
93 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
94
95 /* Remember the last target of s390_set_current_function. */
96 static GTY(()) tree s390_previous_fndecl;
97
98 /* Define the specific costs for a given cpu. */
99
100 struct processor_costs
101 {
102 /* multiplication */
103 const int m; /* cost of an M instruction. */
104 const int mghi; /* cost of an MGHI instruction. */
105 const int mh; /* cost of an MH instruction. */
106 const int mhi; /* cost of an MHI instruction. */
107 const int ml; /* cost of an ML instruction. */
108 const int mr; /* cost of an MR instruction. */
109 const int ms; /* cost of an MS instruction. */
110 const int msg; /* cost of an MSG instruction. */
111 const int msgf; /* cost of an MSGF instruction. */
112 const int msgfr; /* cost of an MSGFR instruction. */
113 const int msgr; /* cost of an MSGR instruction. */
114 const int msr; /* cost of an MSR instruction. */
115 const int mult_df; /* cost of multiplication in DFmode. */
116 const int mxbr;
117 /* square root */
118 const int sqxbr; /* cost of square root in TFmode. */
119 const int sqdbr; /* cost of square root in DFmode. */
120 const int sqebr; /* cost of square root in SFmode. */
121 /* multiply and add */
122 const int madbr; /* cost of multiply and add in DFmode. */
123 const int maebr; /* cost of multiply and add in SFmode. */
124 /* division */
125 const int dxbr;
126 const int ddbr;
127 const int debr;
128 const int dlgr;
129 const int dlr;
130 const int dr;
131 const int dsgfr;
132 const int dsgr;
133 };
134
135 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
136
137 static const
138 struct processor_costs z900_cost =
139 {
140 COSTS_N_INSNS (5), /* M */
141 COSTS_N_INSNS (10), /* MGHI */
142 COSTS_N_INSNS (5), /* MH */
143 COSTS_N_INSNS (4), /* MHI */
144 COSTS_N_INSNS (5), /* ML */
145 COSTS_N_INSNS (5), /* MR */
146 COSTS_N_INSNS (4), /* MS */
147 COSTS_N_INSNS (15), /* MSG */
148 COSTS_N_INSNS (7), /* MSGF */
149 COSTS_N_INSNS (7), /* MSGFR */
150 COSTS_N_INSNS (10), /* MSGR */
151 COSTS_N_INSNS (4), /* MSR */
152 COSTS_N_INSNS (7), /* multiplication in DFmode */
153 COSTS_N_INSNS (13), /* MXBR */
154 COSTS_N_INSNS (136), /* SQXBR */
155 COSTS_N_INSNS (44), /* SQDBR */
156 COSTS_N_INSNS (35), /* SQEBR */
157 COSTS_N_INSNS (18), /* MADBR */
158 COSTS_N_INSNS (13), /* MAEBR */
159 COSTS_N_INSNS (134), /* DXBR */
160 COSTS_N_INSNS (30), /* DDBR */
161 COSTS_N_INSNS (27), /* DEBR */
162 COSTS_N_INSNS (220), /* DLGR */
163 COSTS_N_INSNS (34), /* DLR */
164 COSTS_N_INSNS (34), /* DR */
165 COSTS_N_INSNS (32), /* DSGFR */
166 COSTS_N_INSNS (32), /* DSGR */
167 };
168
169 static const
170 struct processor_costs z990_cost =
171 {
172 COSTS_N_INSNS (4), /* M */
173 COSTS_N_INSNS (2), /* MGHI */
174 COSTS_N_INSNS (2), /* MH */
175 COSTS_N_INSNS (2), /* MHI */
176 COSTS_N_INSNS (4), /* ML */
177 COSTS_N_INSNS (4), /* MR */
178 COSTS_N_INSNS (5), /* MS */
179 COSTS_N_INSNS (6), /* MSG */
180 COSTS_N_INSNS (4), /* MSGF */
181 COSTS_N_INSNS (4), /* MSGFR */
182 COSTS_N_INSNS (4), /* MSGR */
183 COSTS_N_INSNS (4), /* MSR */
184 COSTS_N_INSNS (1), /* multiplication in DFmode */
185 COSTS_N_INSNS (28), /* MXBR */
186 COSTS_N_INSNS (130), /* SQXBR */
187 COSTS_N_INSNS (66), /* SQDBR */
188 COSTS_N_INSNS (38), /* SQEBR */
189 COSTS_N_INSNS (1), /* MADBR */
190 COSTS_N_INSNS (1), /* MAEBR */
191 COSTS_N_INSNS (60), /* DXBR */
192 COSTS_N_INSNS (40), /* DDBR */
193 COSTS_N_INSNS (26), /* DEBR */
194 COSTS_N_INSNS (176), /* DLGR */
195 COSTS_N_INSNS (31), /* DLR */
196 COSTS_N_INSNS (31), /* DR */
197 COSTS_N_INSNS (31), /* DSGFR */
198 COSTS_N_INSNS (31), /* DSGR */
199 };
200
201 static const
202 struct processor_costs z9_109_cost =
203 {
204 COSTS_N_INSNS (4), /* M */
205 COSTS_N_INSNS (2), /* MGHI */
206 COSTS_N_INSNS (2), /* MH */
207 COSTS_N_INSNS (2), /* MHI */
208 COSTS_N_INSNS (4), /* ML */
209 COSTS_N_INSNS (4), /* MR */
210 COSTS_N_INSNS (5), /* MS */
211 COSTS_N_INSNS (6), /* MSG */
212 COSTS_N_INSNS (4), /* MSGF */
213 COSTS_N_INSNS (4), /* MSGFR */
214 COSTS_N_INSNS (4), /* MSGR */
215 COSTS_N_INSNS (4), /* MSR */
216 COSTS_N_INSNS (1), /* multiplication in DFmode */
217 COSTS_N_INSNS (28), /* MXBR */
218 COSTS_N_INSNS (130), /* SQXBR */
219 COSTS_N_INSNS (66), /* SQDBR */
220 COSTS_N_INSNS (38), /* SQEBR */
221 COSTS_N_INSNS (1), /* MADBR */
222 COSTS_N_INSNS (1), /* MAEBR */
223 COSTS_N_INSNS (60), /* DXBR */
224 COSTS_N_INSNS (40), /* DDBR */
225 COSTS_N_INSNS (26), /* DEBR */
226 COSTS_N_INSNS (30), /* DLGR */
227 COSTS_N_INSNS (23), /* DLR */
228 COSTS_N_INSNS (23), /* DR */
229 COSTS_N_INSNS (24), /* DSGFR */
230 COSTS_N_INSNS (24), /* DSGR */
231 };
232
233 static const
234 struct processor_costs z10_cost =
235 {
236 COSTS_N_INSNS (10), /* M */
237 COSTS_N_INSNS (10), /* MGHI */
238 COSTS_N_INSNS (10), /* MH */
239 COSTS_N_INSNS (10), /* MHI */
240 COSTS_N_INSNS (10), /* ML */
241 COSTS_N_INSNS (10), /* MR */
242 COSTS_N_INSNS (10), /* MS */
243 COSTS_N_INSNS (10), /* MSG */
244 COSTS_N_INSNS (10), /* MSGF */
245 COSTS_N_INSNS (10), /* MSGFR */
246 COSTS_N_INSNS (10), /* MSGR */
247 COSTS_N_INSNS (10), /* MSR */
248 COSTS_N_INSNS (1) , /* multiplication in DFmode */
249 COSTS_N_INSNS (50), /* MXBR */
250 COSTS_N_INSNS (120), /* SQXBR */
251 COSTS_N_INSNS (52), /* SQDBR */
252 COSTS_N_INSNS (38), /* SQEBR */
253 COSTS_N_INSNS (1), /* MADBR */
254 COSTS_N_INSNS (1), /* MAEBR */
255 COSTS_N_INSNS (111), /* DXBR */
256 COSTS_N_INSNS (39), /* DDBR */
257 COSTS_N_INSNS (32), /* DEBR */
258 COSTS_N_INSNS (160), /* DLGR */
259 COSTS_N_INSNS (71), /* DLR */
260 COSTS_N_INSNS (71), /* DR */
261 COSTS_N_INSNS (71), /* DSGFR */
262 COSTS_N_INSNS (71), /* DSGR */
263 };
264
265 static const
266 struct processor_costs z196_cost =
267 {
268 COSTS_N_INSNS (7), /* M */
269 COSTS_N_INSNS (5), /* MGHI */
270 COSTS_N_INSNS (5), /* MH */
271 COSTS_N_INSNS (5), /* MHI */
272 COSTS_N_INSNS (7), /* ML */
273 COSTS_N_INSNS (7), /* MR */
274 COSTS_N_INSNS (6), /* MS */
275 COSTS_N_INSNS (8), /* MSG */
276 COSTS_N_INSNS (6), /* MSGF */
277 COSTS_N_INSNS (6), /* MSGFR */
278 COSTS_N_INSNS (8), /* MSGR */
279 COSTS_N_INSNS (6), /* MSR */
280 COSTS_N_INSNS (1) , /* multiplication in DFmode */
281 COSTS_N_INSNS (40), /* MXBR B+40 */
282 COSTS_N_INSNS (100), /* SQXBR B+100 */
283 COSTS_N_INSNS (42), /* SQDBR B+42 */
284 COSTS_N_INSNS (28), /* SQEBR B+28 */
285 COSTS_N_INSNS (1), /* MADBR B */
286 COSTS_N_INSNS (1), /* MAEBR B */
287 COSTS_N_INSNS (101), /* DXBR B+101 */
288 COSTS_N_INSNS (29), /* DDBR */
289 COSTS_N_INSNS (22), /* DEBR */
290 COSTS_N_INSNS (160), /* DLGR cracked */
291 COSTS_N_INSNS (160), /* DLR cracked */
292 COSTS_N_INSNS (160), /* DR expanded */
293 COSTS_N_INSNS (160), /* DSGFR cracked */
294 COSTS_N_INSNS (160), /* DSGR cracked */
295 };
296
297 static const
298 struct processor_costs zEC12_cost =
299 {
300 COSTS_N_INSNS (7), /* M */
301 COSTS_N_INSNS (5), /* MGHI */
302 COSTS_N_INSNS (5), /* MH */
303 COSTS_N_INSNS (5), /* MHI */
304 COSTS_N_INSNS (7), /* ML */
305 COSTS_N_INSNS (7), /* MR */
306 COSTS_N_INSNS (6), /* MS */
307 COSTS_N_INSNS (8), /* MSG */
308 COSTS_N_INSNS (6), /* MSGF */
309 COSTS_N_INSNS (6), /* MSGFR */
310 COSTS_N_INSNS (8), /* MSGR */
311 COSTS_N_INSNS (6), /* MSR */
312 COSTS_N_INSNS (1) , /* multiplication in DFmode */
313 COSTS_N_INSNS (40), /* MXBR B+40 */
314 COSTS_N_INSNS (100), /* SQXBR B+100 */
315 COSTS_N_INSNS (42), /* SQDBR B+42 */
316 COSTS_N_INSNS (28), /* SQEBR B+28 */
317 COSTS_N_INSNS (1), /* MADBR B */
318 COSTS_N_INSNS (1), /* MAEBR B */
319 COSTS_N_INSNS (131), /* DXBR B+131 */
320 COSTS_N_INSNS (29), /* DDBR */
321 COSTS_N_INSNS (22), /* DEBR */
322 COSTS_N_INSNS (160), /* DLGR cracked */
323 COSTS_N_INSNS (160), /* DLR cracked */
324 COSTS_N_INSNS (160), /* DR expanded */
325 COSTS_N_INSNS (160), /* DSGFR cracked */
326 COSTS_N_INSNS (160), /* DSGR cracked */
327 };
328
329 const struct s390_processor processor_table[] =
330 {
331 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
332 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
333 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
334 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
335 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
336 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
337 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
338 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
339 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
340 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
341 };
342
343 extern int reload_completed;
344
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
346 static rtx_insn *last_scheduled_insn;
347 #define NUM_SIDES 2
348
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
351
352 /* Estimate of number of cycles a long-running insn occupies an
353 execution unit. */
354 static int fxd_longrunning[NUM_SIDES];
355 static int fpd_longrunning[NUM_SIDES];
356
357 /* The maximum score added for an instruction whose unit hasn't been
358 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
359 give instruction mix scheduling more priority over instruction
360 grouping. */
361 #define MAX_SCHED_MIX_SCORE 2
362
363 /* The maximum distance up to which individual scores will be
364 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
365 Increase this with the OOO windows size of the machine. */
366 #define MAX_SCHED_MIX_DISTANCE 70
367
368 /* Structure used to hold the components of a S/390 memory
369 address. A legitimate address on S/390 is of the general
370 form
371 base + index + displacement
372 where any of the components is optional.
373
374 base and index are registers of the class ADDR_REGS,
375 displacement is an unsigned 12-bit immediate constant. */
376
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378 loops. This value is used in the unroll adjust hook to detect such
379 loops. Current max is 9 coming from the memcmp loop. */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
381
382 struct s390_address
383 {
384 rtx base;
385 rtx indx;
386 rtx disp;
387 bool pointer;
388 bool literal_pool;
389 };
390
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
392
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
396 ? cfun_frame_layout.fpr_bitmap & 0x0f \
397 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
401 (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
403 (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405 cfun->machine->frame_layout.gpr_save_slots[REGNO]
406
407 /* Number of GPRs and FPRs used for argument passing. */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
411
412 /* A couple of shortcuts. */
413 #define CONST_OK_FOR_J(x) \
414 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
423
424 #define REGNO_PAIR_OK(REGNO, MODE) \
425 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
426
427 /* That's the read ahead of the dynamic branch prediction unit in
428 bytes on a z10 (or higher) CPU. */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
430
431 /* Masks per jump target register indicating which thunk need to be
432 generated. */
433 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434 static GTY(()) int indirect_branch_z10thunk_mask = 0;
435
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
437
438 enum s390_indirect_branch_option
439 {
440 s390_opt_indirect_branch_jump = 0,
441 s390_opt_indirect_branch_call,
442 s390_opt_function_return_reg,
443 s390_opt_function_return_mem
444 };
445
446 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
450 { ".s390_indirect_jump", ".s390_indirect_call",
451 ".s390_return_reg", ".s390_return_mem" };
452
453 bool
454 s390_return_addr_from_memory ()
455 {
456 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
457 }
458
459 /* Indicate which ABI has been used for passing vector args.
460 0 - no vector type arguments have been passed where the ABI is relevant
461 1 - the old ABI has been used
462 2 - a vector type argument has been passed either in a vector register
463 or on the stack by value */
464 static int s390_vector_abi = 0;
465
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467 switch. The vector ABI affects only vector data types. There are
468 two aspects of the vector ABI relevant here:
469
470 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471 ABI and natural alignment with the old.
472
473 2. vector <= 16 bytes are passed in VRs or by value on the stack
474 with the new ABI but by reference on the stack with the old.
475
476 If ARG_P is true TYPE is used for a function argument or return
477 value. The ABI marker then is set for all vector data types. If
478 ARG_P is false only type 1 vectors are being checked. */
479
480 static void
481 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
482 {
483 static hash_set<const_tree> visited_types_hash;
484
485 if (s390_vector_abi)
486 return;
487
488 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489 return;
490
491 if (visited_types_hash.contains (type))
492 return;
493
494 visited_types_hash.add (type);
495
496 if (VECTOR_TYPE_P (type))
497 {
498 int type_size = int_size_in_bytes (type);
499
500 /* Outside arguments only the alignment is changing and this
501 only happens for vector types >= 16 bytes. */
502 if (!arg_p && type_size < 16)
503 return;
504
505 /* In arguments vector types > 16 are passed as before (GCC
506 never enforced the bigger alignment for arguments which was
507 required by the old vector ABI). However, it might still be
508 ABI relevant due to the changed alignment if it is a struct
509 member. */
510 if (arg_p && type_size > 16 && !in_struct_p)
511 return;
512
513 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
514 }
515 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
516 {
517 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 natural alignment there will never be ABI dependent padding
519 in an array type. That's why we do not set in_struct_p to
520 true here. */
521 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
522 }
523 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
524 {
525 tree arg_chain;
526
527 /* Check the return type. */
528 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
529
530 for (arg_chain = TYPE_ARG_TYPES (type);
531 arg_chain;
532 arg_chain = TREE_CHAIN (arg_chain))
533 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
534 }
535 else if (RECORD_OR_UNION_TYPE_P (type))
536 {
537 tree field;
538
539 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
540 {
541 if (TREE_CODE (field) != FIELD_DECL)
542 continue;
543
544 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
545 }
546 }
547 }
548
549
550 /* System z builtins. */
551
552 #include "s390-builtins.h"
553
554 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
555 {
556 #undef B_DEF
557 #undef OB_DEF
558 #undef OB_DEF_VAR
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560 #define OB_DEF(...)
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
563 0
564 };
565
566 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
567 {
568 #undef B_DEF
569 #undef OB_DEF
570 #undef OB_DEF_VAR
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572 #define OB_DEF(...)
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
575 0
576 };
577
578 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
579 {
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
587 0
588 };
589
590 const unsigned int
591 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
592 {
593 #undef B_DEF
594 #undef OB_DEF
595 #undef OB_DEF_VAR
596 #define B_DEF(...)
597 #define OB_DEF(...)
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
600 0
601 };
602
603 const unsigned int
604 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
605 {
606 #undef B_DEF
607 #undef OB_DEF
608 #undef OB_DEF_VAR
609 #define B_DEF(...)
610 #define OB_DEF(...)
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
613 0
614 };
615
616 tree s390_builtin_types[BT_MAX];
617 tree s390_builtin_fn_types[BT_FN_MAX];
618 tree s390_builtin_decls[S390_BUILTIN_MAX +
619 S390_OVERLOADED_BUILTIN_MAX +
620 S390_OVERLOADED_BUILTIN_VAR_MAX];
621
622 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623 #undef B_DEF
624 #undef OB_DEF
625 #undef OB_DEF_VAR
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627 #define OB_DEF(...)
628 #define OB_DEF_VAR(...)
629
630 #include "s390-builtins.def"
631 CODE_FOR_nothing
632 };
633
634 static void
635 s390_init_builtins (void)
636 {
637 /* These definitions are being used in s390-builtins.def. */
638 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639 NULL, NULL);
640 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641 tree c_uint64_type_node;
642
643 /* The uint64_type_node from tree.c is not compatible to the C99
644 uint64_t data type. What we want is c_uint64_type_node from
645 c-common.c. But since backend code is not supposed to interface
646 with the frontend we recreate it here. */
647 if (TARGET_64BIT)
648 c_uint64_type_node = long_unsigned_type_node;
649 else
650 c_uint64_type_node = long_long_unsigned_type_node;
651
652 #undef DEF_TYPE
653 #define DEF_TYPE(INDEX, NODE, CONST_P) \
654 if (s390_builtin_types[INDEX] == NULL) \
655 s390_builtin_types[INDEX] = (!CONST_P) ? \
656 (NODE) : build_type_variant ((NODE), 1, 0);
657
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
660 if (s390_builtin_types[INDEX] == NULL) \
661 s390_builtin_types[INDEX] = \
662 build_pointer_type (s390_builtin_types[INDEX_BASE]);
663
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
666 if (s390_builtin_types[INDEX] == NULL) \
667 s390_builtin_types[INDEX] = \
668 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
669
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
672 if (s390_builtin_types[INDEX] == NULL) \
673 s390_builtin_types[INDEX] = \
674 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
675
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
678 if (s390_builtin_types[INDEX] == NULL) \
679 s390_builtin_types[INDEX] = \
680 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
681
682 #undef DEF_FN_TYPE
683 #define DEF_FN_TYPE(INDEX, args...) \
684 if (s390_builtin_fn_types[INDEX] == NULL) \
685 s390_builtin_fn_types[INDEX] = \
686 build_function_type_list (args, NULL_TREE);
687 #undef DEF_OV_TYPE
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
690
691 #undef B_DEF
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
693 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
694 s390_builtin_decls[S390_BUILTIN_##NAME] = \
695 add_builtin_function ("__builtin_" #NAME, \
696 s390_builtin_fn_types[FNTYPE], \
697 S390_BUILTIN_##NAME, \
698 BUILT_IN_MD, \
699 NULL, \
700 ATTRS);
701 #undef OB_DEF
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
703 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704 == NULL) \
705 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709 BUILT_IN_MD, \
710 NULL, \
711 0);
712 #undef OB_DEF_VAR
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
715
716 }
717
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719 builtin DECL. The operand flags from s390-builtins.def have to
720 passed as OP_FLAGS. */
721 bool
722 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
723 {
724 if (O_UIMM_P (op_flags))
725 {
726 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727 int bitwidth = bitwidths[op_flags - O_U1];
728
729 if (!tree_fits_uhwi_p (arg)
730 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
731 {
732 error ("constant argument %d for builtin %qF is out of range "
733 "(0..%wu)", argnum, decl,
734 (HOST_WIDE_INT_1U << bitwidth) - 1);
735 return false;
736 }
737 }
738
739 if (O_SIMM_P (op_flags))
740 {
741 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742 int bitwidth = bitwidths[op_flags - O_S2];
743
744 if (!tree_fits_shwi_p (arg)
745 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
747 {
748 error ("constant argument %d for builtin %qF is out of range "
749 "(%wd..%wd)", argnum, decl,
750 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752 return false;
753 }
754 }
755 return true;
756 }
757
758 /* Expand an expression EXP that calls a built-in function,
759 with result going to TARGET if that's convenient
760 (and in mode MODE if that's convenient).
761 SUBTARGET may be used as the target for computing one of EXP's operands.
762 IGNORE is nonzero if the value is to be ignored. */
763
764 static rtx
765 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766 machine_mode mode ATTRIBUTE_UNUSED,
767 int ignore ATTRIBUTE_UNUSED)
768 {
769 #define MAX_ARGS 6
770
771 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
773 enum insn_code icode;
774 rtx op[MAX_ARGS], pat;
775 int arity;
776 bool nonvoid;
777 tree arg;
778 call_expr_arg_iterator iter;
779 unsigned int all_op_flags = opflags_for_builtin (fcode);
780 machine_mode last_vec_mode = VOIDmode;
781
782 if (TARGET_DEBUG_ARG)
783 {
784 fprintf (stderr,
785 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787 bflags_for_builtin (fcode));
788 }
789
790 if (S390_USE_TARGET_ATTRIBUTE)
791 {
792 unsigned int bflags;
793
794 bflags = bflags_for_builtin (fcode);
795 if ((bflags & B_HTM) && !TARGET_HTM)
796 {
797 error ("builtin %qF is not supported without %<-mhtm%> "
798 "(default with %<-march=zEC12%> and higher).", fndecl);
799 return const0_rtx;
800 }
801 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
802 {
803 error ("builtin %qF requires %<-mvx%> "
804 "(default with %<-march=z13%> and higher).", fndecl);
805 return const0_rtx;
806 }
807
808 if ((bflags & B_VXE) && !TARGET_VXE)
809 {
810 error ("Builtin %qF requires z14 or higher.", fndecl);
811 return const0_rtx;
812 }
813 }
814 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
815 && fcode < S390_ALL_BUILTIN_MAX)
816 {
817 gcc_unreachable ();
818 }
819 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
820 {
821 icode = code_for_builtin[fcode];
822 /* Set a flag in the machine specific cfun part in order to support
823 saving/restoring of FPRs. */
824 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
825 cfun->machine->tbegin_p = true;
826 }
827 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
828 {
829 error ("unresolved overloaded builtin");
830 return const0_rtx;
831 }
832 else
833 internal_error ("bad builtin fcode");
834
835 if (icode == 0)
836 internal_error ("bad builtin icode");
837
838 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
839
840 if (nonvoid)
841 {
842 machine_mode tmode = insn_data[icode].operand[0].mode;
843 if (!target
844 || GET_MODE (target) != tmode
845 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
846 target = gen_reg_rtx (tmode);
847
848 /* There are builtins (e.g. vec_promote) with no vector
849 arguments but an element selector. So we have to also look
850 at the vector return type when emitting the modulo
851 operation. */
852 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
853 last_vec_mode = insn_data[icode].operand[0].mode;
854 }
855
856 arity = 0;
857 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
858 {
859 rtx tmp_rtx;
860 const struct insn_operand_data *insn_op;
861 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
862
863 all_op_flags = all_op_flags >> O_SHIFT;
864
865 if (arg == error_mark_node)
866 return NULL_RTX;
867 if (arity >= MAX_ARGS)
868 return NULL_RTX;
869
870 if (O_IMM_P (op_flags)
871 && TREE_CODE (arg) != INTEGER_CST)
872 {
873 error ("constant value required for builtin %qF argument %d",
874 fndecl, arity + 1);
875 return const0_rtx;
876 }
877
878 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
879 return const0_rtx;
880
881 insn_op = &insn_data[icode].operand[arity + nonvoid];
882 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
883
884 /* expand_expr truncates constants to the target mode only if it
885 is "convenient". However, our checks below rely on this
886 being done. */
887 if (CONST_INT_P (op[arity])
888 && SCALAR_INT_MODE_P (insn_op->mode)
889 && GET_MODE (op[arity]) != insn_op->mode)
890 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
891 insn_op->mode));
892
893 /* Wrap the expanded RTX for pointer types into a MEM expr with
894 the proper mode. This allows us to use e.g. (match_operand
895 "memory_operand"..) in the insn patterns instead of (mem
896 (match_operand "address_operand)). This is helpful for
897 patterns not just accepting MEMs. */
898 if (POINTER_TYPE_P (TREE_TYPE (arg))
899 && insn_op->predicate != address_operand)
900 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
901
902 /* Expand the module operation required on element selectors. */
903 if (op_flags == O_ELEM)
904 {
905 gcc_assert (last_vec_mode != VOIDmode);
906 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
907 op[arity],
908 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
909 NULL_RTX, 1, OPTAB_DIRECT);
910 }
911
912 /* Record the vector mode used for an element selector. This assumes:
913 1. There is no builtin with two different vector modes and an element selector
914 2. The element selector comes after the vector type it is referring to.
915 This currently the true for all the builtins but FIXME we
916 should better check for that. */
917 if (VECTOR_MODE_P (insn_op->mode))
918 last_vec_mode = insn_op->mode;
919
920 if (insn_op->predicate (op[arity], insn_op->mode))
921 {
922 arity++;
923 continue;
924 }
925
926 /* A memory operand is rejected by the memory_operand predicate.
927 Try making the address legal by copying it into a register. */
928 if (MEM_P (op[arity])
929 && insn_op->predicate == memory_operand
930 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
931 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
932 {
933 op[arity] = replace_equiv_address (op[arity],
934 copy_to_mode_reg (Pmode,
935 XEXP (op[arity], 0)));
936 }
937 /* Some of the builtins require different modes/types than the
938 pattern in order to implement a specific API. Instead of
939 adding many expanders which do the mode change we do it here.
940 E.g. s390_vec_add_u128 required to have vector unsigned char
941 arguments is mapped to addti3. */
942 else if (insn_op->mode != VOIDmode
943 && GET_MODE (op[arity]) != VOIDmode
944 && GET_MODE (op[arity]) != insn_op->mode
945 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
946 GET_MODE (op[arity]), 0))
947 != NULL_RTX))
948 {
949 op[arity] = tmp_rtx;
950 }
951
952 /* The predicate rejects the operand although the mode is fine.
953 Copy the operand to register. */
954 if (!insn_op->predicate (op[arity], insn_op->mode)
955 && (GET_MODE (op[arity]) == insn_op->mode
956 || GET_MODE (op[arity]) == VOIDmode
957 || (insn_op->predicate == address_operand
958 && GET_MODE (op[arity]) == Pmode)))
959 {
960 /* An address_operand usually has VOIDmode in the expander
961 so we cannot use this. */
962 machine_mode target_mode =
963 (insn_op->predicate == address_operand
964 ? (machine_mode) Pmode : insn_op->mode);
965 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
966 }
967
968 if (!insn_op->predicate (op[arity], insn_op->mode))
969 {
970 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
971 return const0_rtx;
972 }
973 arity++;
974 }
975
976 switch (arity)
977 {
978 case 0:
979 pat = GEN_FCN (icode) (target);
980 break;
981 case 1:
982 if (nonvoid)
983 pat = GEN_FCN (icode) (target, op[0]);
984 else
985 pat = GEN_FCN (icode) (op[0]);
986 break;
987 case 2:
988 if (nonvoid)
989 pat = GEN_FCN (icode) (target, op[0], op[1]);
990 else
991 pat = GEN_FCN (icode) (op[0], op[1]);
992 break;
993 case 3:
994 if (nonvoid)
995 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
996 else
997 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
998 break;
999 case 4:
1000 if (nonvoid)
1001 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1002 else
1003 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1004 break;
1005 case 5:
1006 if (nonvoid)
1007 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1008 else
1009 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1010 break;
1011 case 6:
1012 if (nonvoid)
1013 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1014 else
1015 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1016 break;
1017 default:
1018 gcc_unreachable ();
1019 }
1020 if (!pat)
1021 return NULL_RTX;
1022 emit_insn (pat);
1023
1024 if (nonvoid)
1025 return target;
1026 else
1027 return const0_rtx;
1028 }
1029
1030
1031 static const int s390_hotpatch_hw_max = 1000000;
1032 static int s390_hotpatch_hw_before_label = 0;
1033 static int s390_hotpatch_hw_after_label = 0;
1034
1035 /* Check whether the hotpatch attribute is applied to a function and, if it has
1036 an argument, the argument is valid. */
1037
1038 static tree
1039 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1040 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1041 {
1042 tree expr;
1043 tree expr2;
1044 int err;
1045
1046 if (TREE_CODE (*node) != FUNCTION_DECL)
1047 {
1048 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1049 name);
1050 *no_add_attrs = true;
1051 }
1052 if (args != NULL && TREE_CHAIN (args) != NULL)
1053 {
1054 expr = TREE_VALUE (args);
1055 expr2 = TREE_VALUE (TREE_CHAIN (args));
1056 }
1057 if (args == NULL || TREE_CHAIN (args) == NULL)
1058 err = 1;
1059 else if (TREE_CODE (expr) != INTEGER_CST
1060 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1061 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1062 err = 1;
1063 else if (TREE_CODE (expr2) != INTEGER_CST
1064 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1065 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1066 err = 1;
1067 else
1068 err = 0;
1069 if (err)
1070 {
1071 error ("requested %qE attribute is not a comma separated pair of"
1072 " non-negative integer constants or too large (max. %d)", name,
1073 s390_hotpatch_hw_max);
1074 *no_add_attrs = true;
1075 }
1076
1077 return NULL_TREE;
1078 }
1079
1080 /* Expand the s390_vector_bool type attribute. */
1081
1082 static tree
1083 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1084 tree args ATTRIBUTE_UNUSED,
1085 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1086 {
1087 tree type = *node, result = NULL_TREE;
1088 machine_mode mode;
1089
1090 while (POINTER_TYPE_P (type)
1091 || TREE_CODE (type) == FUNCTION_TYPE
1092 || TREE_CODE (type) == METHOD_TYPE
1093 || TREE_CODE (type) == ARRAY_TYPE)
1094 type = TREE_TYPE (type);
1095
1096 mode = TYPE_MODE (type);
1097 switch (mode)
1098 {
1099 case E_DImode: case E_V2DImode:
1100 result = s390_builtin_types[BT_BV2DI];
1101 break;
1102 case E_SImode: case E_V4SImode:
1103 result = s390_builtin_types[BT_BV4SI];
1104 break;
1105 case E_HImode: case E_V8HImode:
1106 result = s390_builtin_types[BT_BV8HI];
1107 break;
1108 case E_QImode: case E_V16QImode:
1109 result = s390_builtin_types[BT_BV16QI];
1110 break;
1111 default:
1112 break;
1113 }
1114
1115 *no_add_attrs = true; /* No need to hang on to the attribute. */
1116
1117 if (result)
1118 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1119
1120 return NULL_TREE;
1121 }
1122
1123 /* Check syntax of function decl attributes having a string type value. */
1124
1125 static tree
1126 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1127 tree args ATTRIBUTE_UNUSED,
1128 int flags ATTRIBUTE_UNUSED,
1129 bool *no_add_attrs)
1130 {
1131 tree cst;
1132
1133 if (TREE_CODE (*node) != FUNCTION_DECL)
1134 {
1135 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1136 name);
1137 *no_add_attrs = true;
1138 }
1139
1140 cst = TREE_VALUE (args);
1141
1142 if (TREE_CODE (cst) != STRING_CST)
1143 {
1144 warning (OPT_Wattributes,
1145 "%qE attribute requires a string constant argument",
1146 name);
1147 *no_add_attrs = true;
1148 }
1149
1150 if (is_attribute_p ("indirect_branch", name)
1151 || is_attribute_p ("indirect_branch_call", name)
1152 || is_attribute_p ("function_return", name)
1153 || is_attribute_p ("function_return_reg", name)
1154 || is_attribute_p ("function_return_mem", name))
1155 {
1156 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1157 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1158 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1159 {
1160 warning (OPT_Wattributes,
1161 "argument to %qE attribute is not "
1162 "(keep|thunk|thunk-extern)", name);
1163 *no_add_attrs = true;
1164 }
1165 }
1166
1167 if (is_attribute_p ("indirect_branch_jump", name)
1168 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1169 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1170 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1171 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1172 {
1173 warning (OPT_Wattributes,
1174 "argument to %qE attribute is not "
1175 "(keep|thunk|thunk-inline|thunk-extern)", name);
1176 *no_add_attrs = true;
1177 }
1178
1179 return NULL_TREE;
1180 }
1181
1182 static const struct attribute_spec s390_attribute_table[] = {
1183 { "hotpatch", 2, 2, true, false, false, false,
1184 s390_handle_hotpatch_attribute, NULL },
1185 { "s390_vector_bool", 0, 0, false, true, false, true,
1186 s390_handle_vectorbool_attribute, NULL },
1187 { "indirect_branch", 1, 1, true, false, false, false,
1188 s390_handle_string_attribute, NULL },
1189 { "indirect_branch_jump", 1, 1, true, false, false, false,
1190 s390_handle_string_attribute, NULL },
1191 { "indirect_branch_call", 1, 1, true, false, false, false,
1192 s390_handle_string_attribute, NULL },
1193 { "function_return", 1, 1, true, false, false, false,
1194 s390_handle_string_attribute, NULL },
1195 { "function_return_reg", 1, 1, true, false, false, false,
1196 s390_handle_string_attribute, NULL },
1197 { "function_return_mem", 1, 1, true, false, false, false,
1198 s390_handle_string_attribute, NULL },
1199
1200 /* End element. */
1201 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1202 };
1203
1204 /* Return the alignment for LABEL. We default to the -falign-labels
1205 value except for the literal pool base label. */
1206 int
1207 s390_label_align (rtx_insn *label)
1208 {
1209 rtx_insn *prev_insn = prev_active_insn (label);
1210 rtx set, src;
1211
1212 if (prev_insn == NULL_RTX)
1213 goto old;
1214
1215 set = single_set (prev_insn);
1216
1217 if (set == NULL_RTX)
1218 goto old;
1219
1220 src = SET_SRC (set);
1221
1222 /* Don't align literal pool base labels. */
1223 if (GET_CODE (src) == UNSPEC
1224 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1225 return 0;
1226
1227 old:
1228 return align_labels.levels[0].log;
1229 }
1230
1231 static GTY(()) rtx got_symbol;
1232
1233 /* Return the GOT table symbol. The symbol will be created when the
1234 function is invoked for the first time. */
1235
1236 static rtx
1237 s390_got_symbol (void)
1238 {
1239 if (!got_symbol)
1240 {
1241 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1242 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1243 }
1244
1245 return got_symbol;
1246 }
1247
1248 static scalar_int_mode
1249 s390_libgcc_cmp_return_mode (void)
1250 {
1251 return TARGET_64BIT ? DImode : SImode;
1252 }
1253
1254 static scalar_int_mode
1255 s390_libgcc_shift_count_mode (void)
1256 {
1257 return TARGET_64BIT ? DImode : SImode;
1258 }
1259
1260 static scalar_int_mode
1261 s390_unwind_word_mode (void)
1262 {
1263 return TARGET_64BIT ? DImode : SImode;
1264 }
1265
1266 /* Return true if the back end supports mode MODE. */
1267 static bool
1268 s390_scalar_mode_supported_p (scalar_mode mode)
1269 {
1270 /* In contrast to the default implementation reject TImode constants on 31bit
1271 TARGET_ZARCH for ABI compliance. */
1272 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1273 return false;
1274
1275 if (DECIMAL_FLOAT_MODE_P (mode))
1276 return default_decimal_float_supported_p ();
1277
1278 return default_scalar_mode_supported_p (mode);
1279 }
1280
1281 /* Return true if the back end supports vector mode MODE. */
1282 static bool
1283 s390_vector_mode_supported_p (machine_mode mode)
1284 {
1285 machine_mode inner;
1286
1287 if (!VECTOR_MODE_P (mode)
1288 || !TARGET_VX
1289 || GET_MODE_SIZE (mode) > 16)
1290 return false;
1291
1292 inner = GET_MODE_INNER (mode);
1293
1294 switch (inner)
1295 {
1296 case E_QImode:
1297 case E_HImode:
1298 case E_SImode:
1299 case E_DImode:
1300 case E_TImode:
1301 case E_SFmode:
1302 case E_DFmode:
1303 case E_TFmode:
1304 return true;
1305 default:
1306 return false;
1307 }
1308 }
1309
1310 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1311
1312 void
1313 s390_set_has_landing_pad_p (bool value)
1314 {
1315 cfun->machine->has_landing_pad_p = value;
1316 }
1317
1318 /* If two condition code modes are compatible, return a condition code
1319 mode which is compatible with both. Otherwise, return
1320 VOIDmode. */
1321
1322 static machine_mode
1323 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1324 {
1325 if (m1 == m2)
1326 return m1;
1327
1328 switch (m1)
1329 {
1330 case E_CCZmode:
1331 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1332 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1333 return m2;
1334 return VOIDmode;
1335
1336 case E_CCSmode:
1337 case E_CCUmode:
1338 case E_CCTmode:
1339 case E_CCSRmode:
1340 case E_CCURmode:
1341 case E_CCZ1mode:
1342 if (m2 == CCZmode)
1343 return m1;
1344
1345 return VOIDmode;
1346
1347 default:
1348 return VOIDmode;
1349 }
1350 return VOIDmode;
1351 }
1352
1353 /* Return true if SET either doesn't set the CC register, or else
1354 the source and destination have matching CC modes and that
1355 CC mode is at least as constrained as REQ_MODE. */
1356
1357 static bool
1358 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1359 {
1360 machine_mode set_mode;
1361
1362 gcc_assert (GET_CODE (set) == SET);
1363
1364 /* These modes are supposed to be used only in CC consumer
1365 patterns. */
1366 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1367 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1368
1369 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1370 return 1;
1371
1372 set_mode = GET_MODE (SET_DEST (set));
1373 switch (set_mode)
1374 {
1375 case E_CCZ1mode:
1376 case E_CCSmode:
1377 case E_CCSRmode:
1378 case E_CCUmode:
1379 case E_CCURmode:
1380 case E_CCLmode:
1381 case E_CCL1mode:
1382 case E_CCL2mode:
1383 case E_CCL3mode:
1384 case E_CCT1mode:
1385 case E_CCT2mode:
1386 case E_CCT3mode:
1387 case E_CCVEQmode:
1388 case E_CCVIHmode:
1389 case E_CCVIHUmode:
1390 case E_CCVFHmode:
1391 case E_CCVFHEmode:
1392 if (req_mode != set_mode)
1393 return 0;
1394 break;
1395
1396 case E_CCZmode:
1397 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1398 && req_mode != CCSRmode && req_mode != CCURmode
1399 && req_mode != CCZ1mode)
1400 return 0;
1401 break;
1402
1403 case E_CCAPmode:
1404 case E_CCANmode:
1405 if (req_mode != CCAmode)
1406 return 0;
1407 break;
1408
1409 default:
1410 gcc_unreachable ();
1411 }
1412
1413 return (GET_MODE (SET_SRC (set)) == set_mode);
1414 }
1415
1416 /* Return true if every SET in INSN that sets the CC register
1417 has source and destination with matching CC modes and that
1418 CC mode is at least as constrained as REQ_MODE.
1419 If REQ_MODE is VOIDmode, always return false. */
1420
1421 bool
1422 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1423 {
1424 int i;
1425
1426 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1427 if (req_mode == VOIDmode)
1428 return false;
1429
1430 if (GET_CODE (PATTERN (insn)) == SET)
1431 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1432
1433 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1434 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1435 {
1436 rtx set = XVECEXP (PATTERN (insn), 0, i);
1437 if (GET_CODE (set) == SET)
1438 if (!s390_match_ccmode_set (set, req_mode))
1439 return false;
1440 }
1441
1442 return true;
1443 }
1444
1445 /* If a test-under-mask instruction can be used to implement
1446 (compare (and ... OP1) OP2), return the CC mode required
1447 to do that. Otherwise, return VOIDmode.
1448 MIXED is true if the instruction can distinguish between
1449 CC1 and CC2 for mixed selected bits (TMxx), it is false
1450 if the instruction cannot (TM). */
1451
1452 machine_mode
1453 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1454 {
1455 int bit0, bit1;
1456
1457 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1458 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1459 return VOIDmode;
1460
1461 /* Selected bits all zero: CC0.
1462 e.g.: int a; if ((a & (16 + 128)) == 0) */
1463 if (INTVAL (op2) == 0)
1464 return CCTmode;
1465
1466 /* Selected bits all one: CC3.
1467 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1468 if (INTVAL (op2) == INTVAL (op1))
1469 return CCT3mode;
1470
1471 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1472 int a;
1473 if ((a & (16 + 128)) == 16) -> CCT1
1474 if ((a & (16 + 128)) == 128) -> CCT2 */
1475 if (mixed)
1476 {
1477 bit1 = exact_log2 (INTVAL (op2));
1478 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1479 if (bit0 != -1 && bit1 != -1)
1480 return bit0 > bit1 ? CCT1mode : CCT2mode;
1481 }
1482
1483 return VOIDmode;
1484 }
1485
1486 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1487 OP0 and OP1 of a COMPARE, return the mode to be used for the
1488 comparison. */
1489
1490 machine_mode
1491 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1492 {
1493 switch (code)
1494 {
1495 case EQ:
1496 case NE:
1497 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1498 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1499 return CCAPmode;
1500 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1501 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1502 return CCAPmode;
1503 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1504 || GET_CODE (op1) == NEG)
1505 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1506 return CCLmode;
1507
1508 if (GET_CODE (op0) == AND)
1509 {
1510 /* Check whether we can potentially do it via TM. */
1511 machine_mode ccmode;
1512 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1513 if (ccmode != VOIDmode)
1514 {
1515 /* Relax CCTmode to CCZmode to allow fall-back to AND
1516 if that turns out to be beneficial. */
1517 return ccmode == CCTmode ? CCZmode : ccmode;
1518 }
1519 }
1520
1521 if (register_operand (op0, HImode)
1522 && GET_CODE (op1) == CONST_INT
1523 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1524 return CCT3mode;
1525 if (register_operand (op0, QImode)
1526 && GET_CODE (op1) == CONST_INT
1527 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1528 return CCT3mode;
1529
1530 return CCZmode;
1531
1532 case LE:
1533 case LT:
1534 case GE:
1535 case GT:
1536 /* The only overflow condition of NEG and ABS happens when
1537 -INT_MAX is used as parameter, which stays negative. So
1538 we have an overflow from a positive value to a negative.
1539 Using CCAP mode the resulting cc can be used for comparisons. */
1540 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1541 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1542 return CCAPmode;
1543
1544 /* If constants are involved in an add instruction it is possible to use
1545 the resulting cc for comparisons with zero. Knowing the sign of the
1546 constant the overflow behavior gets predictable. e.g.:
1547 int a, b; if ((b = a + c) > 0)
1548 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1549 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1550 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1551 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1552 /* Avoid INT32_MIN on 32 bit. */
1553 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1554 {
1555 if (INTVAL (XEXP((op0), 1)) < 0)
1556 return CCANmode;
1557 else
1558 return CCAPmode;
1559 }
1560 /* Fall through. */
1561 case UNORDERED:
1562 case ORDERED:
1563 case UNEQ:
1564 case UNLE:
1565 case UNLT:
1566 case UNGE:
1567 case UNGT:
1568 case LTGT:
1569 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1570 && GET_CODE (op1) != CONST_INT)
1571 return CCSRmode;
1572 return CCSmode;
1573
1574 case LTU:
1575 case GEU:
1576 if (GET_CODE (op0) == PLUS
1577 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1578 return CCL1mode;
1579
1580 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1581 && GET_CODE (op1) != CONST_INT)
1582 return CCURmode;
1583 return CCUmode;
1584
1585 case LEU:
1586 case GTU:
1587 if (GET_CODE (op0) == MINUS
1588 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1589 return CCL2mode;
1590
1591 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1592 && GET_CODE (op1) != CONST_INT)
1593 return CCURmode;
1594 return CCUmode;
1595
1596 default:
1597 gcc_unreachable ();
1598 }
1599 }
1600
1601 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1602 that we can implement more efficiently. */
1603
1604 static void
1605 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1606 bool op0_preserve_value)
1607 {
1608 if (op0_preserve_value)
1609 return;
1610
1611 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1612 if ((*code == EQ || *code == NE)
1613 && *op1 == const0_rtx
1614 && GET_CODE (*op0) == ZERO_EXTRACT
1615 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1616 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1617 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1618 {
1619 rtx inner = XEXP (*op0, 0);
1620 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1621 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1622 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1623
1624 if (len > 0 && len < modesize
1625 && pos >= 0 && pos + len <= modesize
1626 && modesize <= HOST_BITS_PER_WIDE_INT)
1627 {
1628 unsigned HOST_WIDE_INT block;
1629 block = (HOST_WIDE_INT_1U << len) - 1;
1630 block <<= modesize - pos - len;
1631
1632 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1633 gen_int_mode (block, GET_MODE (inner)));
1634 }
1635 }
1636
1637 /* Narrow AND of memory against immediate to enable TM. */
1638 if ((*code == EQ || *code == NE)
1639 && *op1 == const0_rtx
1640 && GET_CODE (*op0) == AND
1641 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1642 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1643 {
1644 rtx inner = XEXP (*op0, 0);
1645 rtx mask = XEXP (*op0, 1);
1646
1647 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1648 if (GET_CODE (inner) == SUBREG
1649 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1650 && (GET_MODE_SIZE (GET_MODE (inner))
1651 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1652 && ((INTVAL (mask)
1653 & GET_MODE_MASK (GET_MODE (inner))
1654 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1655 == 0))
1656 inner = SUBREG_REG (inner);
1657
1658 /* Do not change volatile MEMs. */
1659 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1660 {
1661 int part = s390_single_part (XEXP (*op0, 1),
1662 GET_MODE (inner), QImode, 0);
1663 if (part >= 0)
1664 {
1665 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1666 inner = adjust_address_nv (inner, QImode, part);
1667 *op0 = gen_rtx_AND (QImode, inner, mask);
1668 }
1669 }
1670 }
1671
1672 /* Narrow comparisons against 0xffff to HImode if possible. */
1673 if ((*code == EQ || *code == NE)
1674 && GET_CODE (*op1) == CONST_INT
1675 && INTVAL (*op1) == 0xffff
1676 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1677 && (nonzero_bits (*op0, GET_MODE (*op0))
1678 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1679 {
1680 *op0 = gen_lowpart (HImode, *op0);
1681 *op1 = constm1_rtx;
1682 }
1683
1684 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1685 if (GET_CODE (*op0) == UNSPEC
1686 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1687 && XVECLEN (*op0, 0) == 1
1688 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1689 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1690 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1691 && *op1 == const0_rtx)
1692 {
1693 enum rtx_code new_code = UNKNOWN;
1694 switch (*code)
1695 {
1696 case EQ: new_code = EQ; break;
1697 case NE: new_code = NE; break;
1698 case LT: new_code = GTU; break;
1699 case GT: new_code = LTU; break;
1700 case LE: new_code = GEU; break;
1701 case GE: new_code = LEU; break;
1702 default: break;
1703 }
1704
1705 if (new_code != UNKNOWN)
1706 {
1707 *op0 = XVECEXP (*op0, 0, 0);
1708 *code = new_code;
1709 }
1710 }
1711
1712 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1713 if (GET_CODE (*op0) == UNSPEC
1714 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1715 && XVECLEN (*op0, 0) == 1
1716 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1717 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1718 && CONST_INT_P (*op1))
1719 {
1720 enum rtx_code new_code = UNKNOWN;
1721 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1722 {
1723 case E_CCZmode:
1724 case E_CCRAWmode:
1725 switch (*code)
1726 {
1727 case EQ: new_code = EQ; break;
1728 case NE: new_code = NE; break;
1729 default: break;
1730 }
1731 break;
1732 default: break;
1733 }
1734
1735 if (new_code != UNKNOWN)
1736 {
1737 /* For CCRAWmode put the required cc mask into the second
1738 operand. */
1739 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1740 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1741 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1742 *op0 = XVECEXP (*op0, 0, 0);
1743 *code = new_code;
1744 }
1745 }
1746
1747 /* Simplify cascaded EQ, NE with const0_rtx. */
1748 if ((*code == NE || *code == EQ)
1749 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1750 && GET_MODE (*op0) == SImode
1751 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1752 && REG_P (XEXP (*op0, 0))
1753 && XEXP (*op0, 1) == const0_rtx
1754 && *op1 == const0_rtx)
1755 {
1756 if ((*code == EQ && GET_CODE (*op0) == NE)
1757 || (*code == NE && GET_CODE (*op0) == EQ))
1758 *code = EQ;
1759 else
1760 *code = NE;
1761 *op0 = XEXP (*op0, 0);
1762 }
1763
1764 /* Prefer register over memory as first operand. */
1765 if (MEM_P (*op0) && REG_P (*op1))
1766 {
1767 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1768 *code = (int)swap_condition ((enum rtx_code)*code);
1769 }
1770
1771 /* A comparison result is compared against zero. Replace it with
1772 the (perhaps inverted) original comparison.
1773 This probably should be done by simplify_relational_operation. */
1774 if ((*code == EQ || *code == NE)
1775 && *op1 == const0_rtx
1776 && COMPARISON_P (*op0)
1777 && CC_REG_P (XEXP (*op0, 0)))
1778 {
1779 enum rtx_code new_code;
1780
1781 if (*code == EQ)
1782 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1783 XEXP (*op0, 0),
1784 XEXP (*op1, 0), NULL);
1785 else
1786 new_code = GET_CODE (*op0);
1787
1788 if (new_code != UNKNOWN)
1789 {
1790 *code = new_code;
1791 *op1 = XEXP (*op0, 1);
1792 *op0 = XEXP (*op0, 0);
1793 }
1794 }
1795 }
1796
1797
1798 /* Emit a compare instruction suitable to implement the comparison
1799 OP0 CODE OP1. Return the correct condition RTL to be placed in
1800 the IF_THEN_ELSE of the conditional branch testing the result. */
1801
1802 rtx
1803 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1804 {
1805 machine_mode mode = s390_select_ccmode (code, op0, op1);
1806 rtx cc;
1807
1808 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1809 {
1810 /* Do not output a redundant compare instruction if a
1811 compare_and_swap pattern already computed the result and the
1812 machine modes are compatible. */
1813 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1814 == GET_MODE (op0));
1815 cc = op0;
1816 }
1817 else
1818 {
1819 cc = gen_rtx_REG (mode, CC_REGNUM);
1820 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1821 }
1822
1823 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1824 }
1825
1826 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1827 MEM, whose address is a pseudo containing the original MEM's address. */
1828
1829 static rtx
1830 s390_legitimize_cs_operand (rtx mem)
1831 {
1832 rtx tmp;
1833
1834 if (!contains_symbol_ref_p (mem))
1835 return mem;
1836 tmp = gen_reg_rtx (Pmode);
1837 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1838 return change_address (mem, VOIDmode, tmp);
1839 }
1840
1841 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1842 matches CMP.
1843 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1844 conditional branch testing the result. */
1845
1846 static rtx
1847 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1848 rtx cmp, rtx new_rtx, machine_mode ccmode)
1849 {
1850 rtx cc;
1851
1852 mem = s390_legitimize_cs_operand (mem);
1853 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1854 switch (GET_MODE (mem))
1855 {
1856 case E_SImode:
1857 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1858 new_rtx, cc));
1859 break;
1860 case E_DImode:
1861 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1862 new_rtx, cc));
1863 break;
1864 case E_TImode:
1865 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1866 new_rtx, cc));
1867 break;
1868 case E_QImode:
1869 case E_HImode:
1870 default:
1871 gcc_unreachable ();
1872 }
1873 return s390_emit_compare (code, cc, const0_rtx);
1874 }
1875
1876 /* Emit a jump instruction to TARGET and return it. If COND is
1877 NULL_RTX, emit an unconditional jump, else a conditional jump under
1878 condition COND. */
1879
1880 rtx_insn *
1881 s390_emit_jump (rtx target, rtx cond)
1882 {
1883 rtx insn;
1884
1885 target = gen_rtx_LABEL_REF (VOIDmode, target);
1886 if (cond)
1887 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1888
1889 insn = gen_rtx_SET (pc_rtx, target);
1890 return emit_jump_insn (insn);
1891 }
1892
1893 /* Return branch condition mask to implement a branch
1894 specified by CODE. Return -1 for invalid comparisons. */
1895
1896 int
1897 s390_branch_condition_mask (rtx code)
1898 {
1899 const int CC0 = 1 << 3;
1900 const int CC1 = 1 << 2;
1901 const int CC2 = 1 << 1;
1902 const int CC3 = 1 << 0;
1903
1904 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1905 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1906 gcc_assert (XEXP (code, 1) == const0_rtx
1907 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1908 && CONST_INT_P (XEXP (code, 1))));
1909
1910
1911 switch (GET_MODE (XEXP (code, 0)))
1912 {
1913 case E_CCZmode:
1914 case E_CCZ1mode:
1915 switch (GET_CODE (code))
1916 {
1917 case EQ: return CC0;
1918 case NE: return CC1 | CC2 | CC3;
1919 default: return -1;
1920 }
1921 break;
1922
1923 case E_CCT1mode:
1924 switch (GET_CODE (code))
1925 {
1926 case EQ: return CC1;
1927 case NE: return CC0 | CC2 | CC3;
1928 default: return -1;
1929 }
1930 break;
1931
1932 case E_CCT2mode:
1933 switch (GET_CODE (code))
1934 {
1935 case EQ: return CC2;
1936 case NE: return CC0 | CC1 | CC3;
1937 default: return -1;
1938 }
1939 break;
1940
1941 case E_CCT3mode:
1942 switch (GET_CODE (code))
1943 {
1944 case EQ: return CC3;
1945 case NE: return CC0 | CC1 | CC2;
1946 default: return -1;
1947 }
1948 break;
1949
1950 case E_CCLmode:
1951 switch (GET_CODE (code))
1952 {
1953 case EQ: return CC0 | CC2;
1954 case NE: return CC1 | CC3;
1955 default: return -1;
1956 }
1957 break;
1958
1959 case E_CCL1mode:
1960 switch (GET_CODE (code))
1961 {
1962 case LTU: return CC2 | CC3; /* carry */
1963 case GEU: return CC0 | CC1; /* no carry */
1964 default: return -1;
1965 }
1966 break;
1967
1968 case E_CCL2mode:
1969 switch (GET_CODE (code))
1970 {
1971 case GTU: return CC0 | CC1; /* borrow */
1972 case LEU: return CC2 | CC3; /* no borrow */
1973 default: return -1;
1974 }
1975 break;
1976
1977 case E_CCL3mode:
1978 switch (GET_CODE (code))
1979 {
1980 case EQ: return CC0 | CC2;
1981 case NE: return CC1 | CC3;
1982 case LTU: return CC1;
1983 case GTU: return CC3;
1984 case LEU: return CC1 | CC2;
1985 case GEU: return CC2 | CC3;
1986 default: return -1;
1987 }
1988
1989 case E_CCUmode:
1990 switch (GET_CODE (code))
1991 {
1992 case EQ: return CC0;
1993 case NE: return CC1 | CC2 | CC3;
1994 case LTU: return CC1;
1995 case GTU: return CC2;
1996 case LEU: return CC0 | CC1;
1997 case GEU: return CC0 | CC2;
1998 default: return -1;
1999 }
2000 break;
2001
2002 case E_CCURmode:
2003 switch (GET_CODE (code))
2004 {
2005 case EQ: return CC0;
2006 case NE: return CC2 | CC1 | CC3;
2007 case LTU: return CC2;
2008 case GTU: return CC1;
2009 case LEU: return CC0 | CC2;
2010 case GEU: return CC0 | CC1;
2011 default: return -1;
2012 }
2013 break;
2014
2015 case E_CCAPmode:
2016 switch (GET_CODE (code))
2017 {
2018 case EQ: return CC0;
2019 case NE: return CC1 | CC2 | CC3;
2020 case LT: return CC1 | CC3;
2021 case GT: return CC2;
2022 case LE: return CC0 | CC1 | CC3;
2023 case GE: return CC0 | CC2;
2024 default: return -1;
2025 }
2026 break;
2027
2028 case E_CCANmode:
2029 switch (GET_CODE (code))
2030 {
2031 case EQ: return CC0;
2032 case NE: return CC1 | CC2 | CC3;
2033 case LT: return CC1;
2034 case GT: return CC2 | CC3;
2035 case LE: return CC0 | CC1;
2036 case GE: return CC0 | CC2 | CC3;
2037 default: return -1;
2038 }
2039 break;
2040
2041 case E_CCSmode:
2042 switch (GET_CODE (code))
2043 {
2044 case EQ: return CC0;
2045 case NE: return CC1 | CC2 | CC3;
2046 case LT: return CC1;
2047 case GT: return CC2;
2048 case LE: return CC0 | CC1;
2049 case GE: return CC0 | CC2;
2050 case UNORDERED: return CC3;
2051 case ORDERED: return CC0 | CC1 | CC2;
2052 case UNEQ: return CC0 | CC3;
2053 case UNLT: return CC1 | CC3;
2054 case UNGT: return CC2 | CC3;
2055 case UNLE: return CC0 | CC1 | CC3;
2056 case UNGE: return CC0 | CC2 | CC3;
2057 case LTGT: return CC1 | CC2;
2058 default: return -1;
2059 }
2060 break;
2061
2062 case E_CCSRmode:
2063 switch (GET_CODE (code))
2064 {
2065 case EQ: return CC0;
2066 case NE: return CC2 | CC1 | CC3;
2067 case LT: return CC2;
2068 case GT: return CC1;
2069 case LE: return CC0 | CC2;
2070 case GE: return CC0 | CC1;
2071 case UNORDERED: return CC3;
2072 case ORDERED: return CC0 | CC2 | CC1;
2073 case UNEQ: return CC0 | CC3;
2074 case UNLT: return CC2 | CC3;
2075 case UNGT: return CC1 | CC3;
2076 case UNLE: return CC0 | CC2 | CC3;
2077 case UNGE: return CC0 | CC1 | CC3;
2078 case LTGT: return CC2 | CC1;
2079 default: return -1;
2080 }
2081 break;
2082
2083 /* Vector comparison modes. */
2084 /* CC2 will never be set. It however is part of the negated
2085 masks. */
2086 case E_CCVIALLmode:
2087 switch (GET_CODE (code))
2088 {
2089 case EQ:
2090 case GTU:
2091 case GT:
2092 case GE: return CC0;
2093 /* The inverted modes are in fact *any* modes. */
2094 case NE:
2095 case LEU:
2096 case LE:
2097 case LT: return CC3 | CC1 | CC2;
2098 default: return -1;
2099 }
2100
2101 case E_CCVIANYmode:
2102 switch (GET_CODE (code))
2103 {
2104 case EQ:
2105 case GTU:
2106 case GT:
2107 case GE: return CC0 | CC1;
2108 /* The inverted modes are in fact *all* modes. */
2109 case NE:
2110 case LEU:
2111 case LE:
2112 case LT: return CC3 | CC2;
2113 default: return -1;
2114 }
2115 case E_CCVFALLmode:
2116 switch (GET_CODE (code))
2117 {
2118 case EQ:
2119 case GT:
2120 case GE: return CC0;
2121 /* The inverted modes are in fact *any* modes. */
2122 case NE:
2123 case UNLE:
2124 case UNLT: return CC3 | CC1 | CC2;
2125 default: return -1;
2126 }
2127
2128 case E_CCVFANYmode:
2129 switch (GET_CODE (code))
2130 {
2131 case EQ:
2132 case GT:
2133 case GE: return CC0 | CC1;
2134 /* The inverted modes are in fact *all* modes. */
2135 case NE:
2136 case UNLE:
2137 case UNLT: return CC3 | CC2;
2138 default: return -1;
2139 }
2140
2141 case E_CCRAWmode:
2142 switch (GET_CODE (code))
2143 {
2144 case EQ:
2145 return INTVAL (XEXP (code, 1));
2146 case NE:
2147 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2148 default:
2149 gcc_unreachable ();
2150 }
2151
2152 default:
2153 return -1;
2154 }
2155 }
2156
2157
2158 /* Return branch condition mask to implement a compare and branch
2159 specified by CODE. Return -1 for invalid comparisons. */
2160
2161 int
2162 s390_compare_and_branch_condition_mask (rtx code)
2163 {
2164 const int CC0 = 1 << 3;
2165 const int CC1 = 1 << 2;
2166 const int CC2 = 1 << 1;
2167
2168 switch (GET_CODE (code))
2169 {
2170 case EQ:
2171 return CC0;
2172 case NE:
2173 return CC1 | CC2;
2174 case LT:
2175 case LTU:
2176 return CC1;
2177 case GT:
2178 case GTU:
2179 return CC2;
2180 case LE:
2181 case LEU:
2182 return CC0 | CC1;
2183 case GE:
2184 case GEU:
2185 return CC0 | CC2;
2186 default:
2187 gcc_unreachable ();
2188 }
2189 return -1;
2190 }
2191
2192 /* If INV is false, return assembler mnemonic string to implement
2193 a branch specified by CODE. If INV is true, return mnemonic
2194 for the corresponding inverted branch. */
2195
2196 static const char *
2197 s390_branch_condition_mnemonic (rtx code, int inv)
2198 {
2199 int mask;
2200
2201 static const char *const mnemonic[16] =
2202 {
2203 NULL, "o", "h", "nle",
2204 "l", "nhe", "lh", "ne",
2205 "e", "nlh", "he", "nl",
2206 "le", "nh", "no", NULL
2207 };
2208
2209 if (GET_CODE (XEXP (code, 0)) == REG
2210 && REGNO (XEXP (code, 0)) == CC_REGNUM
2211 && (XEXP (code, 1) == const0_rtx
2212 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2213 && CONST_INT_P (XEXP (code, 1)))))
2214 mask = s390_branch_condition_mask (code);
2215 else
2216 mask = s390_compare_and_branch_condition_mask (code);
2217
2218 gcc_assert (mask >= 0);
2219
2220 if (inv)
2221 mask ^= 15;
2222
2223 gcc_assert (mask >= 1 && mask <= 14);
2224
2225 return mnemonic[mask];
2226 }
2227
2228 /* Return the part of op which has a value different from def.
2229 The size of the part is determined by mode.
2230 Use this function only if you already know that op really
2231 contains such a part. */
2232
2233 unsigned HOST_WIDE_INT
2234 s390_extract_part (rtx op, machine_mode mode, int def)
2235 {
2236 unsigned HOST_WIDE_INT value = 0;
2237 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2238 int part_bits = GET_MODE_BITSIZE (mode);
2239 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2240 int i;
2241
2242 for (i = 0; i < max_parts; i++)
2243 {
2244 if (i == 0)
2245 value = UINTVAL (op);
2246 else
2247 value >>= part_bits;
2248
2249 if ((value & part_mask) != (def & part_mask))
2250 return value & part_mask;
2251 }
2252
2253 gcc_unreachable ();
2254 }
2255
2256 /* If OP is an integer constant of mode MODE with exactly one
2257 part of mode PART_MODE unequal to DEF, return the number of that
2258 part. Otherwise, return -1. */
2259
2260 int
2261 s390_single_part (rtx op,
2262 machine_mode mode,
2263 machine_mode part_mode,
2264 int def)
2265 {
2266 unsigned HOST_WIDE_INT value = 0;
2267 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2268 unsigned HOST_WIDE_INT part_mask
2269 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2270 int i, part = -1;
2271
2272 if (GET_CODE (op) != CONST_INT)
2273 return -1;
2274
2275 for (i = 0; i < n_parts; i++)
2276 {
2277 if (i == 0)
2278 value = UINTVAL (op);
2279 else
2280 value >>= GET_MODE_BITSIZE (part_mode);
2281
2282 if ((value & part_mask) != (def & part_mask))
2283 {
2284 if (part != -1)
2285 return -1;
2286 else
2287 part = i;
2288 }
2289 }
2290 return part == -1 ? -1 : n_parts - 1 - part;
2291 }
2292
2293 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2294 bits and no other bits are set in (the lower SIZE bits of) IN.
2295
2296 PSTART and PEND can be used to obtain the start and end
2297 position (inclusive) of the bitfield relative to 64
2298 bits. *PSTART / *PEND gives the position of the first/last bit
2299 of the bitfield counting from the highest order bit starting
2300 with zero. */
2301
2302 bool
2303 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2304 int *pstart, int *pend)
2305 {
2306 int start;
2307 int end = -1;
2308 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2309 int highbit = HOST_BITS_PER_WIDE_INT - size;
2310 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2311
2312 gcc_assert (!!pstart == !!pend);
2313 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2314 if (end == -1)
2315 {
2316 /* Look for the rightmost bit of a contiguous range of ones. */
2317 if (bitmask & in)
2318 /* Found it. */
2319 end = start;
2320 }
2321 else
2322 {
2323 /* Look for the firt zero bit after the range of ones. */
2324 if (! (bitmask & in))
2325 /* Found it. */
2326 break;
2327 }
2328 /* We're one past the last one-bit. */
2329 start++;
2330
2331 if (end == -1)
2332 /* No one bits found. */
2333 return false;
2334
2335 if (start > highbit)
2336 {
2337 unsigned HOST_WIDE_INT mask;
2338
2339 /* Calculate a mask for all bits beyond the contiguous bits. */
2340 mask = ((~HOST_WIDE_INT_0U >> highbit)
2341 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2342 if (mask & in)
2343 /* There are more bits set beyond the first range of one bits. */
2344 return false;
2345 }
2346
2347 if (pstart)
2348 {
2349 *pstart = start;
2350 *pend = end;
2351 }
2352
2353 return true;
2354 }
2355
2356 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2357 if ~IN contains a contiguous bitfield. In that case, *END is <
2358 *START.
2359
2360 If WRAP_P is true, a bitmask that wraps around is also tested.
2361 When a wraparoud occurs *START is greater than *END (in
2362 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2363 part of the range. If WRAP_P is false, no wraparound is
2364 tested. */
2365
2366 bool
2367 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2368 int size, int *start, int *end)
2369 {
2370 int bs = HOST_BITS_PER_WIDE_INT;
2371 bool b;
2372
2373 gcc_assert (!!start == !!end);
2374 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2375 /* This cannot be expressed as a contiguous bitmask. Exit early because
2376 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2377 a valid bitmask. */
2378 return false;
2379 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2380 if (b)
2381 return true;
2382 if (! wrap_p)
2383 return false;
2384 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2385 if (b && start)
2386 {
2387 int s = *start;
2388 int e = *end;
2389
2390 gcc_assert (s >= 1);
2391 *start = ((e + 1) & (bs - 1));
2392 *end = ((s - 1 + bs) & (bs - 1));
2393 }
2394
2395 return b;
2396 }
2397
2398 /* Return true if OP contains the same contiguous bitfield in *all*
2399 its elements. START and END can be used to obtain the start and
2400 end position of the bitfield.
2401
2402 START/STOP give the position of the first/last bit of the bitfield
2403 counting from the lowest order bit starting with zero. In order to
2404 use these values for S/390 instructions this has to be converted to
2405 "bits big endian" style. */
2406
2407 bool
2408 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2409 {
2410 unsigned HOST_WIDE_INT mask;
2411 int size;
2412 rtx elt;
2413 bool b;
2414
2415 gcc_assert (!!start == !!end);
2416 if (!const_vec_duplicate_p (op, &elt)
2417 || !CONST_INT_P (elt))
2418 return false;
2419
2420 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2421
2422 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2423 if (size > 64)
2424 return false;
2425
2426 mask = UINTVAL (elt);
2427
2428 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2429 if (b)
2430 {
2431 if (start)
2432 {
2433 *start -= (HOST_BITS_PER_WIDE_INT - size);
2434 *end -= (HOST_BITS_PER_WIDE_INT - size);
2435 }
2436 return true;
2437 }
2438 else
2439 return false;
2440 }
2441
2442 /* Return true if C consists only of byte chunks being either 0 or
2443 0xff. If MASK is !=NULL a byte mask is generated which is
2444 appropriate for the vector generate byte mask instruction. */
2445
2446 bool
2447 s390_bytemask_vector_p (rtx op, unsigned *mask)
2448 {
2449 int i;
2450 unsigned tmp_mask = 0;
2451 int nunit, unit_size;
2452
2453 if (!VECTOR_MODE_P (GET_MODE (op))
2454 || GET_CODE (op) != CONST_VECTOR
2455 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2456 return false;
2457
2458 nunit = GET_MODE_NUNITS (GET_MODE (op));
2459 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2460
2461 for (i = 0; i < nunit; i++)
2462 {
2463 unsigned HOST_WIDE_INT c;
2464 int j;
2465
2466 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2467 return false;
2468
2469 c = UINTVAL (XVECEXP (op, 0, i));
2470 for (j = 0; j < unit_size; j++)
2471 {
2472 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2473 return false;
2474 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2475 c = c >> BITS_PER_UNIT;
2476 }
2477 }
2478
2479 if (mask != NULL)
2480 *mask = tmp_mask;
2481
2482 return true;
2483 }
2484
2485 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2486 equivalent to a shift followed by the AND. In particular, CONTIG
2487 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2488 for ROTL indicate a rotate to the right. */
2489
2490 bool
2491 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2492 {
2493 int start, end;
2494 bool ok;
2495
2496 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2497 gcc_assert (ok);
2498
2499 if (rotl >= 0)
2500 return (64 - end >= rotl);
2501 else
2502 {
2503 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2504 DIMode. */
2505 rotl = -rotl + (64 - bitsize);
2506 return (start >= rotl);
2507 }
2508 }
2509
2510 /* Check whether we can (and want to) split a double-word
2511 move in mode MODE from SRC to DST into two single-word
2512 moves, moving the subword FIRST_SUBWORD first. */
2513
2514 bool
2515 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2516 {
2517 /* Floating point and vector registers cannot be split. */
2518 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2519 return false;
2520
2521 /* Non-offsettable memory references cannot be split. */
2522 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2523 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2524 return false;
2525
2526 /* Moving the first subword must not clobber a register
2527 needed to move the second subword. */
2528 if (register_operand (dst, mode))
2529 {
2530 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2531 if (reg_overlap_mentioned_p (subreg, src))
2532 return false;
2533 }
2534
2535 return true;
2536 }
2537
2538 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2539 and [MEM2, MEM2 + SIZE] do overlap and false
2540 otherwise. */
2541
2542 bool
2543 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2544 {
2545 rtx addr1, addr2, addr_delta;
2546 HOST_WIDE_INT delta;
2547
2548 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2549 return true;
2550
2551 if (size == 0)
2552 return false;
2553
2554 addr1 = XEXP (mem1, 0);
2555 addr2 = XEXP (mem2, 0);
2556
2557 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2558
2559 /* This overlapping check is used by peepholes merging memory block operations.
2560 Overlapping operations would otherwise be recognized by the S/390 hardware
2561 and would fall back to a slower implementation. Allowing overlapping
2562 operations would lead to slow code but not to wrong code. Therefore we are
2563 somewhat optimistic if we cannot prove that the memory blocks are
2564 overlapping.
2565 That's why we return false here although this may accept operations on
2566 overlapping memory areas. */
2567 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2568 return false;
2569
2570 delta = INTVAL (addr_delta);
2571
2572 if (delta == 0
2573 || (delta > 0 && delta < size)
2574 || (delta < 0 && -delta < size))
2575 return true;
2576
2577 return false;
2578 }
2579
2580 /* Check whether the address of memory reference MEM2 equals exactly
2581 the address of memory reference MEM1 plus DELTA. Return true if
2582 we can prove this to be the case, false otherwise. */
2583
2584 bool
2585 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2586 {
2587 rtx addr1, addr2, addr_delta;
2588
2589 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2590 return false;
2591
2592 addr1 = XEXP (mem1, 0);
2593 addr2 = XEXP (mem2, 0);
2594
2595 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2596 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2597 return false;
2598
2599 return true;
2600 }
2601
2602 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2603
2604 void
2605 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2606 rtx *operands)
2607 {
2608 machine_mode wmode = mode;
2609 rtx dst = operands[0];
2610 rtx src1 = operands[1];
2611 rtx src2 = operands[2];
2612 rtx op, clob, tem;
2613
2614 /* If we cannot handle the operation directly, use a temp register. */
2615 if (!s390_logical_operator_ok_p (operands))
2616 dst = gen_reg_rtx (mode);
2617
2618 /* QImode and HImode patterns make sense only if we have a destination
2619 in memory. Otherwise perform the operation in SImode. */
2620 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2621 wmode = SImode;
2622
2623 /* Widen operands if required. */
2624 if (mode != wmode)
2625 {
2626 if (GET_CODE (dst) == SUBREG
2627 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2628 dst = tem;
2629 else if (REG_P (dst))
2630 dst = gen_rtx_SUBREG (wmode, dst, 0);
2631 else
2632 dst = gen_reg_rtx (wmode);
2633
2634 if (GET_CODE (src1) == SUBREG
2635 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2636 src1 = tem;
2637 else if (GET_MODE (src1) != VOIDmode)
2638 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2639
2640 if (GET_CODE (src2) == SUBREG
2641 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2642 src2 = tem;
2643 else if (GET_MODE (src2) != VOIDmode)
2644 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2645 }
2646
2647 /* Emit the instruction. */
2648 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2649 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2650 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2651
2652 /* Fix up the destination if needed. */
2653 if (dst != operands[0])
2654 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2655 }
2656
2657 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2658
2659 bool
2660 s390_logical_operator_ok_p (rtx *operands)
2661 {
2662 /* If the destination operand is in memory, it needs to coincide
2663 with one of the source operands. After reload, it has to be
2664 the first source operand. */
2665 if (GET_CODE (operands[0]) == MEM)
2666 return rtx_equal_p (operands[0], operands[1])
2667 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2668
2669 return true;
2670 }
2671
2672 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2673 operand IMMOP to switch from SS to SI type instructions. */
2674
2675 void
2676 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2677 {
2678 int def = code == AND ? -1 : 0;
2679 HOST_WIDE_INT mask;
2680 int part;
2681
2682 gcc_assert (GET_CODE (*memop) == MEM);
2683 gcc_assert (!MEM_VOLATILE_P (*memop));
2684
2685 mask = s390_extract_part (*immop, QImode, def);
2686 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2687 gcc_assert (part >= 0);
2688
2689 *memop = adjust_address (*memop, QImode, part);
2690 *immop = gen_int_mode (mask, QImode);
2691 }
2692
2693
2694 /* How to allocate a 'struct machine_function'. */
2695
2696 static struct machine_function *
2697 s390_init_machine_status (void)
2698 {
2699 return ggc_cleared_alloc<machine_function> ();
2700 }
2701
2702 /* Map for smallest class containing reg regno. */
2703
2704 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2705 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2706 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2707 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2708 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2709 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2710 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2711 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2712 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2713 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2714 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2715 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2716 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2717 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2718 VEC_REGS, VEC_REGS /* 52 */
2719 };
2720
2721 /* Return attribute type of insn. */
2722
2723 static enum attr_type
2724 s390_safe_attr_type (rtx_insn *insn)
2725 {
2726 if (recog_memoized (insn) >= 0)
2727 return get_attr_type (insn);
2728 else
2729 return TYPE_NONE;
2730 }
2731
2732 /* Return attribute relative_long of insn. */
2733
2734 static bool
2735 s390_safe_relative_long_p (rtx_insn *insn)
2736 {
2737 if (recog_memoized (insn) >= 0)
2738 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2739 else
2740 return false;
2741 }
2742
2743 /* Return true if DISP is a valid short displacement. */
2744
2745 static bool
2746 s390_short_displacement (rtx disp)
2747 {
2748 /* No displacement is OK. */
2749 if (!disp)
2750 return true;
2751
2752 /* Without the long displacement facility we don't need to
2753 distingiush between long and short displacement. */
2754 if (!TARGET_LONG_DISPLACEMENT)
2755 return true;
2756
2757 /* Integer displacement in range. */
2758 if (GET_CODE (disp) == CONST_INT)
2759 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2760
2761 /* GOT offset is not OK, the GOT can be large. */
2762 if (GET_CODE (disp) == CONST
2763 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2764 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2765 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2766 return false;
2767
2768 /* All other symbolic constants are literal pool references,
2769 which are OK as the literal pool must be small. */
2770 if (GET_CODE (disp) == CONST)
2771 return true;
2772
2773 return false;
2774 }
2775
2776 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2777 If successful, also determines the
2778 following characteristics of `ref': `is_ptr' - whether it can be an
2779 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2780 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2781 considered a literal pool pointer for purposes of avoiding two different
2782 literal pool pointers per insn during or after reload (`B' constraint). */
2783 static bool
2784 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2785 bool *is_base_ptr, bool *is_pool_ptr)
2786 {
2787 if (!*ref)
2788 return true;
2789
2790 if (GET_CODE (*ref) == UNSPEC)
2791 switch (XINT (*ref, 1))
2792 {
2793 case UNSPEC_LTREF:
2794 if (!*disp)
2795 *disp = gen_rtx_UNSPEC (Pmode,
2796 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2797 UNSPEC_LTREL_OFFSET);
2798 else
2799 return false;
2800
2801 *ref = XVECEXP (*ref, 0, 1);
2802 break;
2803
2804 default:
2805 return false;
2806 }
2807
2808 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2809 return false;
2810
2811 if (REGNO (*ref) == STACK_POINTER_REGNUM
2812 || REGNO (*ref) == FRAME_POINTER_REGNUM
2813 || ((reload_completed || reload_in_progress)
2814 && frame_pointer_needed
2815 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2816 || REGNO (*ref) == ARG_POINTER_REGNUM
2817 || (flag_pic
2818 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2819 *is_ptr = *is_base_ptr = true;
2820
2821 if ((reload_completed || reload_in_progress)
2822 && *ref == cfun->machine->base_reg)
2823 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2824
2825 return true;
2826 }
2827
2828 /* Decompose a RTL expression ADDR for a memory address into
2829 its components, returned in OUT.
2830
2831 Returns false if ADDR is not a valid memory address, true
2832 otherwise. If OUT is NULL, don't return the components,
2833 but check for validity only.
2834
2835 Note: Only addresses in canonical form are recognized.
2836 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2837 canonical form so that they will be recognized. */
2838
2839 static int
2840 s390_decompose_address (rtx addr, struct s390_address *out)
2841 {
2842 HOST_WIDE_INT offset = 0;
2843 rtx base = NULL_RTX;
2844 rtx indx = NULL_RTX;
2845 rtx disp = NULL_RTX;
2846 rtx orig_disp;
2847 bool pointer = false;
2848 bool base_ptr = false;
2849 bool indx_ptr = false;
2850 bool literal_pool = false;
2851
2852 /* We may need to substitute the literal pool base register into the address
2853 below. However, at this point we do not know which register is going to
2854 be used as base, so we substitute the arg pointer register. This is going
2855 to be treated as holding a pointer below -- it shouldn't be used for any
2856 other purpose. */
2857 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2858
2859 /* Decompose address into base + index + displacement. */
2860
2861 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2862 base = addr;
2863
2864 else if (GET_CODE (addr) == PLUS)
2865 {
2866 rtx op0 = XEXP (addr, 0);
2867 rtx op1 = XEXP (addr, 1);
2868 enum rtx_code code0 = GET_CODE (op0);
2869 enum rtx_code code1 = GET_CODE (op1);
2870
2871 if (code0 == REG || code0 == UNSPEC)
2872 {
2873 if (code1 == REG || code1 == UNSPEC)
2874 {
2875 indx = op0; /* index + base */
2876 base = op1;
2877 }
2878
2879 else
2880 {
2881 base = op0; /* base + displacement */
2882 disp = op1;
2883 }
2884 }
2885
2886 else if (code0 == PLUS)
2887 {
2888 indx = XEXP (op0, 0); /* index + base + disp */
2889 base = XEXP (op0, 1);
2890 disp = op1;
2891 }
2892
2893 else
2894 {
2895 return false;
2896 }
2897 }
2898
2899 else
2900 disp = addr; /* displacement */
2901
2902 /* Extract integer part of displacement. */
2903 orig_disp = disp;
2904 if (disp)
2905 {
2906 if (GET_CODE (disp) == CONST_INT)
2907 {
2908 offset = INTVAL (disp);
2909 disp = NULL_RTX;
2910 }
2911 else if (GET_CODE (disp) == CONST
2912 && GET_CODE (XEXP (disp, 0)) == PLUS
2913 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2914 {
2915 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2916 disp = XEXP (XEXP (disp, 0), 0);
2917 }
2918 }
2919
2920 /* Strip off CONST here to avoid special case tests later. */
2921 if (disp && GET_CODE (disp) == CONST)
2922 disp = XEXP (disp, 0);
2923
2924 /* We can convert literal pool addresses to
2925 displacements by basing them off the base register. */
2926 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2927 {
2928 if (base || indx)
2929 return false;
2930
2931 base = fake_pool_base, literal_pool = true;
2932
2933 /* Mark up the displacement. */
2934 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2935 UNSPEC_LTREL_OFFSET);
2936 }
2937
2938 /* Validate base register. */
2939 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2940 &literal_pool))
2941 return false;
2942
2943 /* Validate index register. */
2944 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
2945 &literal_pool))
2946 return false;
2947
2948 /* Prefer to use pointer as base, not index. */
2949 if (base && indx && !base_ptr
2950 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2951 {
2952 rtx tmp = base;
2953 base = indx;
2954 indx = tmp;
2955 }
2956
2957 /* Validate displacement. */
2958 if (!disp)
2959 {
2960 /* If virtual registers are involved, the displacement will change later
2961 anyway as the virtual registers get eliminated. This could make a
2962 valid displacement invalid, but it is more likely to make an invalid
2963 displacement valid, because we sometimes access the register save area
2964 via negative offsets to one of those registers.
2965 Thus we don't check the displacement for validity here. If after
2966 elimination the displacement turns out to be invalid after all,
2967 this is fixed up by reload in any case. */
2968 /* LRA maintains always displacements up to date and we need to
2969 know the displacement is right during all LRA not only at the
2970 final elimination. */
2971 if (lra_in_progress
2972 || (base != arg_pointer_rtx
2973 && indx != arg_pointer_rtx
2974 && base != return_address_pointer_rtx
2975 && indx != return_address_pointer_rtx
2976 && base != frame_pointer_rtx
2977 && indx != frame_pointer_rtx
2978 && base != virtual_stack_vars_rtx
2979 && indx != virtual_stack_vars_rtx))
2980 if (!DISP_IN_RANGE (offset))
2981 return false;
2982 }
2983 else
2984 {
2985 /* All the special cases are pointers. */
2986 pointer = true;
2987
2988 /* In the small-PIC case, the linker converts @GOT
2989 and @GOTNTPOFF offsets to possible displacements. */
2990 if (GET_CODE (disp) == UNSPEC
2991 && (XINT (disp, 1) == UNSPEC_GOT
2992 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2993 && flag_pic == 1)
2994 {
2995 ;
2996 }
2997
2998 /* Accept pool label offsets. */
2999 else if (GET_CODE (disp) == UNSPEC
3000 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3001 ;
3002
3003 /* Accept literal pool references. */
3004 else if (GET_CODE (disp) == UNSPEC
3005 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3006 {
3007 /* In case CSE pulled a non literal pool reference out of
3008 the pool we have to reject the address. This is
3009 especially important when loading the GOT pointer on non
3010 zarch CPUs. In this case the literal pool contains an lt
3011 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3012 will most likely exceed the displacement. */
3013 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3014 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3015 return false;
3016
3017 orig_disp = gen_rtx_CONST (Pmode, disp);
3018 if (offset)
3019 {
3020 /* If we have an offset, make sure it does not
3021 exceed the size of the constant pool entry.
3022 Otherwise we might generate an out-of-range
3023 displacement for the base register form. */
3024 rtx sym = XVECEXP (disp, 0, 0);
3025 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3026 return false;
3027
3028 orig_disp = plus_constant (Pmode, orig_disp, offset);
3029 }
3030 }
3031
3032 else
3033 return false;
3034 }
3035
3036 if (!base && !indx)
3037 pointer = true;
3038
3039 if (out)
3040 {
3041 out->base = base;
3042 out->indx = indx;
3043 out->disp = orig_disp;
3044 out->pointer = pointer;
3045 out->literal_pool = literal_pool;
3046 }
3047
3048 return true;
3049 }
3050
3051 /* Decompose a RTL expression OP for an address style operand into its
3052 components, and return the base register in BASE and the offset in
3053 OFFSET. While OP looks like an address it is never supposed to be
3054 used as such.
3055
3056 Return true if OP is a valid address operand, false if not. */
3057
3058 bool
3059 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3060 HOST_WIDE_INT *offset)
3061 {
3062 rtx off = NULL_RTX;
3063
3064 /* We can have an integer constant, an address register,
3065 or a sum of the two. */
3066 if (CONST_SCALAR_INT_P (op))
3067 {
3068 off = op;
3069 op = NULL_RTX;
3070 }
3071 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3072 {
3073 off = XEXP (op, 1);
3074 op = XEXP (op, 0);
3075 }
3076 while (op && GET_CODE (op) == SUBREG)
3077 op = SUBREG_REG (op);
3078
3079 if (op && GET_CODE (op) != REG)
3080 return false;
3081
3082 if (offset)
3083 {
3084 if (off == NULL_RTX)
3085 *offset = 0;
3086 else if (CONST_INT_P (off))
3087 *offset = INTVAL (off);
3088 else if (CONST_WIDE_INT_P (off))
3089 /* The offset will anyway be cut down to 12 bits so take just
3090 the lowest order chunk of the wide int. */
3091 *offset = CONST_WIDE_INT_ELT (off, 0);
3092 else
3093 gcc_unreachable ();
3094 }
3095 if (base)
3096 *base = op;
3097
3098 return true;
3099 }
3100
3101
3102 /* Return true if CODE is a valid address without index. */
3103
3104 bool
3105 s390_legitimate_address_without_index_p (rtx op)
3106 {
3107 struct s390_address addr;
3108
3109 if (!s390_decompose_address (XEXP (op, 0), &addr))
3110 return false;
3111 if (addr.indx)
3112 return false;
3113
3114 return true;
3115 }
3116
3117
3118 /* Return TRUE if ADDR is an operand valid for a load/store relative
3119 instruction. Be aware that the alignment of the operand needs to
3120 be checked separately.
3121 Valid addresses are single references or a sum of a reference and a
3122 constant integer. Return these parts in SYMREF and ADDEND. You can
3123 pass NULL in REF and/or ADDEND if you are not interested in these
3124 values. */
3125
3126 static bool
3127 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3128 {
3129 HOST_WIDE_INT tmpaddend = 0;
3130
3131 if (GET_CODE (addr) == CONST)
3132 addr = XEXP (addr, 0);
3133
3134 if (GET_CODE (addr) == PLUS)
3135 {
3136 if (!CONST_INT_P (XEXP (addr, 1)))
3137 return false;
3138
3139 tmpaddend = INTVAL (XEXP (addr, 1));
3140 addr = XEXP (addr, 0);
3141 }
3142
3143 if (GET_CODE (addr) == SYMBOL_REF
3144 || (GET_CODE (addr) == UNSPEC
3145 && (XINT (addr, 1) == UNSPEC_GOTENT
3146 || XINT (addr, 1) == UNSPEC_PLT)))
3147 {
3148 if (symref)
3149 *symref = addr;
3150 if (addend)
3151 *addend = tmpaddend;
3152
3153 return true;
3154 }
3155 return false;
3156 }
3157
3158 /* Return true if the address in OP is valid for constraint letter C
3159 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3160 pool MEMs should be accepted. Only the Q, R, S, T constraint
3161 letters are allowed for C. */
3162
3163 static int
3164 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3165 {
3166 rtx symref;
3167 struct s390_address addr;
3168 bool decomposed = false;
3169
3170 if (!address_operand (op, GET_MODE (op)))
3171 return 0;
3172
3173 /* This check makes sure that no symbolic address (except literal
3174 pool references) are accepted by the R or T constraints. */
3175 if (s390_loadrelative_operand_p (op, &symref, NULL)
3176 && (!lit_pool_ok
3177 || !SYMBOL_REF_P (symref)
3178 || !CONSTANT_POOL_ADDRESS_P (symref)))
3179 return 0;
3180
3181 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3182 if (!lit_pool_ok)
3183 {
3184 if (!s390_decompose_address (op, &addr))
3185 return 0;
3186 if (addr.literal_pool)
3187 return 0;
3188 decomposed = true;
3189 }
3190
3191 /* With reload, we sometimes get intermediate address forms that are
3192 actually invalid as-is, but we need to accept them in the most
3193 generic cases below ('R' or 'T'), since reload will in fact fix
3194 them up. LRA behaves differently here; we never see such forms,
3195 but on the other hand, we need to strictly reject every invalid
3196 address form. After both reload and LRA invalid address forms
3197 must be rejected, because nothing will fix them up later. Perform
3198 this check right up front. */
3199 if (lra_in_progress || reload_completed)
3200 {
3201 if (!decomposed && !s390_decompose_address (op, &addr))
3202 return 0;
3203 decomposed = true;
3204 }
3205
3206 switch (c)
3207 {
3208 case 'Q': /* no index short displacement */
3209 if (!decomposed && !s390_decompose_address (op, &addr))
3210 return 0;
3211 if (addr.indx)
3212 return 0;
3213 if (!s390_short_displacement (addr.disp))
3214 return 0;
3215 break;
3216
3217 case 'R': /* with index short displacement */
3218 if (TARGET_LONG_DISPLACEMENT)
3219 {
3220 if (!decomposed && !s390_decompose_address (op, &addr))
3221 return 0;
3222 if (!s390_short_displacement (addr.disp))
3223 return 0;
3224 }
3225 /* Any invalid address here will be fixed up by reload,
3226 so accept it for the most generic constraint. */
3227 break;
3228
3229 case 'S': /* no index long displacement */
3230 if (!decomposed && !s390_decompose_address (op, &addr))
3231 return 0;
3232 if (addr.indx)
3233 return 0;
3234 break;
3235
3236 case 'T': /* with index long displacement */
3237 /* Any invalid address here will be fixed up by reload,
3238 so accept it for the most generic constraint. */
3239 break;
3240
3241 default:
3242 return 0;
3243 }
3244 return 1;
3245 }
3246
3247
3248 /* Evaluates constraint strings described by the regular expression
3249 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3250 the constraint given in STR, or 0 else. */
3251
3252 int
3253 s390_mem_constraint (const char *str, rtx op)
3254 {
3255 char c = str[0];
3256
3257 switch (c)
3258 {
3259 case 'A':
3260 /* Check for offsettable variants of memory constraints. */
3261 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3262 return 0;
3263 if ((reload_completed || reload_in_progress)
3264 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3265 return 0;
3266 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3267 case 'B':
3268 /* Check for non-literal-pool variants of memory constraints. */
3269 if (!MEM_P (op))
3270 return 0;
3271 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3272 case 'Q':
3273 case 'R':
3274 case 'S':
3275 case 'T':
3276 if (GET_CODE (op) != MEM)
3277 return 0;
3278 return s390_check_qrst_address (c, XEXP (op, 0), true);
3279 case 'Y':
3280 /* Simply check for the basic form of a shift count. Reload will
3281 take care of making sure we have a proper base register. */
3282 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3283 return 0;
3284 break;
3285 case 'Z':
3286 return s390_check_qrst_address (str[1], op, true);
3287 default:
3288 return 0;
3289 }
3290 return 1;
3291 }
3292
3293
3294 /* Evaluates constraint strings starting with letter O. Input
3295 parameter C is the second letter following the "O" in the constraint
3296 string. Returns 1 if VALUE meets the respective constraint and 0
3297 otherwise. */
3298
3299 int
3300 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3301 {
3302 if (!TARGET_EXTIMM)
3303 return 0;
3304
3305 switch (c)
3306 {
3307 case 's':
3308 return trunc_int_for_mode (value, SImode) == value;
3309
3310 case 'p':
3311 return value == 0
3312 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3313
3314 case 'n':
3315 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3316
3317 default:
3318 gcc_unreachable ();
3319 }
3320 }
3321
3322
3323 /* Evaluates constraint strings starting with letter N. Parameter STR
3324 contains the letters following letter "N" in the constraint string.
3325 Returns true if VALUE matches the constraint. */
3326
3327 int
3328 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3329 {
3330 machine_mode mode, part_mode;
3331 int def;
3332 int part, part_goal;
3333
3334
3335 if (str[0] == 'x')
3336 part_goal = -1;
3337 else
3338 part_goal = str[0] - '0';
3339
3340 switch (str[1])
3341 {
3342 case 'Q':
3343 part_mode = QImode;
3344 break;
3345 case 'H':
3346 part_mode = HImode;
3347 break;
3348 case 'S':
3349 part_mode = SImode;
3350 break;
3351 default:
3352 return 0;
3353 }
3354
3355 switch (str[2])
3356 {
3357 case 'H':
3358 mode = HImode;
3359 break;
3360 case 'S':
3361 mode = SImode;
3362 break;
3363 case 'D':
3364 mode = DImode;
3365 break;
3366 default:
3367 return 0;
3368 }
3369
3370 switch (str[3])
3371 {
3372 case '0':
3373 def = 0;
3374 break;
3375 case 'F':
3376 def = -1;
3377 break;
3378 default:
3379 return 0;
3380 }
3381
3382 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3383 return 0;
3384
3385 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3386 if (part < 0)
3387 return 0;
3388 if (part_goal != -1 && part_goal != part)
3389 return 0;
3390
3391 return 1;
3392 }
3393
3394
3395 /* Returns true if the input parameter VALUE is a float zero. */
3396
3397 int
3398 s390_float_const_zero_p (rtx value)
3399 {
3400 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3401 && value == CONST0_RTX (GET_MODE (value)));
3402 }
3403
3404 /* Implement TARGET_REGISTER_MOVE_COST. */
3405
3406 static int
3407 s390_register_move_cost (machine_mode mode,
3408 reg_class_t from, reg_class_t to)
3409 {
3410 /* On s390, copy between fprs and gprs is expensive. */
3411
3412 /* It becomes somewhat faster having ldgr/lgdr. */
3413 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3414 {
3415 /* ldgr is single cycle. */
3416 if (reg_classes_intersect_p (from, GENERAL_REGS)
3417 && reg_classes_intersect_p (to, FP_REGS))
3418 return 1;
3419 /* lgdr needs 3 cycles. */
3420 if (reg_classes_intersect_p (to, GENERAL_REGS)
3421 && reg_classes_intersect_p (from, FP_REGS))
3422 return 3;
3423 }
3424
3425 /* Otherwise copying is done via memory. */
3426 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3427 && reg_classes_intersect_p (to, FP_REGS))
3428 || (reg_classes_intersect_p (from, FP_REGS)
3429 && reg_classes_intersect_p (to, GENERAL_REGS)))
3430 return 10;
3431
3432 /* We usually do not want to copy via CC. */
3433 if (reg_classes_intersect_p (from, CC_REGS)
3434 || reg_classes_intersect_p (to, CC_REGS))
3435 return 5;
3436
3437 return 1;
3438 }
3439
3440 /* Implement TARGET_MEMORY_MOVE_COST. */
3441
3442 static int
3443 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3444 reg_class_t rclass ATTRIBUTE_UNUSED,
3445 bool in ATTRIBUTE_UNUSED)
3446 {
3447 return 2;
3448 }
3449
3450 /* Compute a (partial) cost for rtx X. Return true if the complete
3451 cost has been computed, and false if subexpressions should be
3452 scanned. In either case, *TOTAL contains the cost result. The
3453 initial value of *TOTAL is the default value computed by
3454 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3455 code of the superexpression of x. */
3456
3457 static bool
3458 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3459 int opno ATTRIBUTE_UNUSED,
3460 int *total, bool speed ATTRIBUTE_UNUSED)
3461 {
3462 int code = GET_CODE (x);
3463 switch (code)
3464 {
3465 case CONST:
3466 case CONST_INT:
3467 case LABEL_REF:
3468 case SYMBOL_REF:
3469 case CONST_DOUBLE:
3470 case CONST_WIDE_INT:
3471 case MEM:
3472 *total = 0;
3473 return true;
3474
3475 case SET:
3476 {
3477 /* Without this a conditional move instruction would be
3478 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3479 comparison operator). That's a bit pessimistic. */
3480
3481 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3482 return false;
3483
3484 rtx cond = XEXP (SET_SRC (x), 0);
3485
3486 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3487 return false;
3488
3489 /* It is going to be a load/store on condition. Make it
3490 slightly more expensive than a normal load. */
3491 *total = COSTS_N_INSNS (1) + 1;
3492
3493 rtx dst = SET_DEST (x);
3494 rtx then = XEXP (SET_SRC (x), 1);
3495 rtx els = XEXP (SET_SRC (x), 2);
3496
3497 /* It is a real IF-THEN-ELSE. An additional move will be
3498 needed to implement that. */
3499 if (reload_completed
3500 && !rtx_equal_p (dst, then)
3501 && !rtx_equal_p (dst, els))
3502 *total += COSTS_N_INSNS (1) / 2;
3503
3504 /* A minor penalty for constants we cannot directly handle. */
3505 if ((CONST_INT_P (then) || CONST_INT_P (els))
3506 && (!TARGET_Z13 || MEM_P (dst)
3507 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3508 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3509 *total += COSTS_N_INSNS (1) / 2;
3510
3511 /* A store on condition can only handle register src operands. */
3512 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3513 *total += COSTS_N_INSNS (1) / 2;
3514
3515 return true;
3516 }
3517 case IOR:
3518 /* risbg */
3519 if (GET_CODE (XEXP (x, 0)) == AND
3520 && GET_CODE (XEXP (x, 1)) == ASHIFT
3521 && REG_P (XEXP (XEXP (x, 0), 0))
3522 && REG_P (XEXP (XEXP (x, 1), 0))
3523 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3524 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3525 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3526 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3527 {
3528 *total = COSTS_N_INSNS (2);
3529 return true;
3530 }
3531
3532 /* ~AND on a 128 bit mode. This can be done using a vector
3533 instruction. */
3534 if (TARGET_VXE
3535 && GET_CODE (XEXP (x, 0)) == NOT
3536 && GET_CODE (XEXP (x, 1)) == NOT
3537 && REG_P (XEXP (XEXP (x, 0), 0))
3538 && REG_P (XEXP (XEXP (x, 1), 0))
3539 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3540 && s390_hard_regno_mode_ok (VR0_REGNUM,
3541 GET_MODE (XEXP (XEXP (x, 0), 0))))
3542 {
3543 *total = COSTS_N_INSNS (1);
3544 return true;
3545 }
3546 /* fallthrough */
3547 case ASHIFT:
3548 case ASHIFTRT:
3549 case LSHIFTRT:
3550 case ROTATE:
3551 case ROTATERT:
3552 case AND:
3553 case XOR:
3554 case NEG:
3555 case NOT:
3556 *total = COSTS_N_INSNS (1);
3557 return false;
3558
3559 case PLUS:
3560 case MINUS:
3561 *total = COSTS_N_INSNS (1);
3562 return false;
3563
3564 case MULT:
3565 switch (mode)
3566 {
3567 case E_SImode:
3568 {
3569 rtx left = XEXP (x, 0);
3570 rtx right = XEXP (x, 1);
3571 if (GET_CODE (right) == CONST_INT
3572 && CONST_OK_FOR_K (INTVAL (right)))
3573 *total = s390_cost->mhi;
3574 else if (GET_CODE (left) == SIGN_EXTEND)
3575 *total = s390_cost->mh;
3576 else
3577 *total = s390_cost->ms; /* msr, ms, msy */
3578 break;
3579 }
3580 case E_DImode:
3581 {
3582 rtx left = XEXP (x, 0);
3583 rtx right = XEXP (x, 1);
3584 if (TARGET_ZARCH)
3585 {
3586 if (GET_CODE (right) == CONST_INT
3587 && CONST_OK_FOR_K (INTVAL (right)))
3588 *total = s390_cost->mghi;
3589 else if (GET_CODE (left) == SIGN_EXTEND)
3590 *total = s390_cost->msgf;
3591 else
3592 *total = s390_cost->msg; /* msgr, msg */
3593 }
3594 else /* TARGET_31BIT */
3595 {
3596 if (GET_CODE (left) == SIGN_EXTEND
3597 && GET_CODE (right) == SIGN_EXTEND)
3598 /* mulsidi case: mr, m */
3599 *total = s390_cost->m;
3600 else if (GET_CODE (left) == ZERO_EXTEND
3601 && GET_CODE (right) == ZERO_EXTEND)
3602 /* umulsidi case: ml, mlr */
3603 *total = s390_cost->ml;
3604 else
3605 /* Complex calculation is required. */
3606 *total = COSTS_N_INSNS (40);
3607 }
3608 break;
3609 }
3610 case E_SFmode:
3611 case E_DFmode:
3612 *total = s390_cost->mult_df;
3613 break;
3614 case E_TFmode:
3615 *total = s390_cost->mxbr;
3616 break;
3617 default:
3618 return false;
3619 }
3620 return false;
3621
3622 case FMA:
3623 switch (mode)
3624 {
3625 case E_DFmode:
3626 *total = s390_cost->madbr;
3627 break;
3628 case E_SFmode:
3629 *total = s390_cost->maebr;
3630 break;
3631 default:
3632 return false;
3633 }
3634 /* Negate in the third argument is free: FMSUB. */
3635 if (GET_CODE (XEXP (x, 2)) == NEG)
3636 {
3637 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3638 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3639 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3640 return true;
3641 }
3642 return false;
3643
3644 case UDIV:
3645 case UMOD:
3646 if (mode == TImode) /* 128 bit division */
3647 *total = s390_cost->dlgr;
3648 else if (mode == DImode)
3649 {
3650 rtx right = XEXP (x, 1);
3651 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3652 *total = s390_cost->dlr;
3653 else /* 64 by 64 bit division */
3654 *total = s390_cost->dlgr;
3655 }
3656 else if (mode == SImode) /* 32 bit division */
3657 *total = s390_cost->dlr;
3658 return false;
3659
3660 case DIV:
3661 case MOD:
3662 if (mode == DImode)
3663 {
3664 rtx right = XEXP (x, 1);
3665 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3666 if (TARGET_ZARCH)
3667 *total = s390_cost->dsgfr;
3668 else
3669 *total = s390_cost->dr;
3670 else /* 64 by 64 bit division */
3671 *total = s390_cost->dsgr;
3672 }
3673 else if (mode == SImode) /* 32 bit division */
3674 *total = s390_cost->dlr;
3675 else if (mode == SFmode)
3676 {
3677 *total = s390_cost->debr;
3678 }
3679 else if (mode == DFmode)
3680 {
3681 *total = s390_cost->ddbr;
3682 }
3683 else if (mode == TFmode)
3684 {
3685 *total = s390_cost->dxbr;
3686 }
3687 return false;
3688
3689 case SQRT:
3690 if (mode == SFmode)
3691 *total = s390_cost->sqebr;
3692 else if (mode == DFmode)
3693 *total = s390_cost->sqdbr;
3694 else /* TFmode */
3695 *total = s390_cost->sqxbr;
3696 return false;
3697
3698 case SIGN_EXTEND:
3699 case ZERO_EXTEND:
3700 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3701 || outer_code == PLUS || outer_code == MINUS
3702 || outer_code == COMPARE)
3703 *total = 0;
3704 return false;
3705
3706 case COMPARE:
3707 *total = COSTS_N_INSNS (1);
3708 if (GET_CODE (XEXP (x, 0)) == AND
3709 && GET_CODE (XEXP (x, 1)) == CONST_INT
3710 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3711 {
3712 rtx op0 = XEXP (XEXP (x, 0), 0);
3713 rtx op1 = XEXP (XEXP (x, 0), 1);
3714 rtx op2 = XEXP (x, 1);
3715
3716 if (memory_operand (op0, GET_MODE (op0))
3717 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3718 return true;
3719 if (register_operand (op0, GET_MODE (op0))
3720 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3721 return true;
3722 }
3723 return false;
3724
3725 default:
3726 return false;
3727 }
3728 }
3729
3730 /* Return the cost of an address rtx ADDR. */
3731
3732 static int
3733 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3734 addr_space_t as ATTRIBUTE_UNUSED,
3735 bool speed ATTRIBUTE_UNUSED)
3736 {
3737 struct s390_address ad;
3738 if (!s390_decompose_address (addr, &ad))
3739 return 1000;
3740
3741 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3742 }
3743
3744 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3745 static int
3746 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3747 tree vectype,
3748 int misalign ATTRIBUTE_UNUSED)
3749 {
3750 switch (type_of_cost)
3751 {
3752 case scalar_stmt:
3753 case scalar_load:
3754 case scalar_store:
3755 case vector_stmt:
3756 case vector_load:
3757 case vector_store:
3758 case vector_gather_load:
3759 case vector_scatter_store:
3760 case vec_to_scalar:
3761 case scalar_to_vec:
3762 case cond_branch_not_taken:
3763 case vec_perm:
3764 case vec_promote_demote:
3765 case unaligned_load:
3766 case unaligned_store:
3767 return 1;
3768
3769 case cond_branch_taken:
3770 return 3;
3771
3772 case vec_construct:
3773 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3774
3775 default:
3776 gcc_unreachable ();
3777 }
3778 }
3779
3780 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3781 otherwise return 0. */
3782
3783 int
3784 tls_symbolic_operand (rtx op)
3785 {
3786 if (GET_CODE (op) != SYMBOL_REF)
3787 return 0;
3788 return SYMBOL_REF_TLS_MODEL (op);
3789 }
3790 \f
3791 /* Split DImode access register reference REG (on 64-bit) into its constituent
3792 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3793 gen_highpart cannot be used as they assume all registers are word-sized,
3794 while our access registers have only half that size. */
3795
3796 void
3797 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3798 {
3799 gcc_assert (TARGET_64BIT);
3800 gcc_assert (ACCESS_REG_P (reg));
3801 gcc_assert (GET_MODE (reg) == DImode);
3802 gcc_assert (!(REGNO (reg) & 1));
3803
3804 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3805 *hi = gen_rtx_REG (SImode, REGNO (reg));
3806 }
3807
3808 /* Return true if OP contains a symbol reference */
3809
3810 bool
3811 symbolic_reference_mentioned_p (rtx op)
3812 {
3813 const char *fmt;
3814 int i;
3815
3816 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3817 return 1;
3818
3819 fmt = GET_RTX_FORMAT (GET_CODE (op));
3820 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3821 {
3822 if (fmt[i] == 'E')
3823 {
3824 int j;
3825
3826 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3827 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3828 return 1;
3829 }
3830
3831 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3832 return 1;
3833 }
3834
3835 return 0;
3836 }
3837
3838 /* Return true if OP contains a reference to a thread-local symbol. */
3839
3840 bool
3841 tls_symbolic_reference_mentioned_p (rtx op)
3842 {
3843 const char *fmt;
3844 int i;
3845
3846 if (GET_CODE (op) == SYMBOL_REF)
3847 return tls_symbolic_operand (op);
3848
3849 fmt = GET_RTX_FORMAT (GET_CODE (op));
3850 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3851 {
3852 if (fmt[i] == 'E')
3853 {
3854 int j;
3855
3856 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3857 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3858 return true;
3859 }
3860
3861 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3862 return true;
3863 }
3864
3865 return false;
3866 }
3867
3868
3869 /* Return true if OP is a legitimate general operand when
3870 generating PIC code. It is given that flag_pic is on
3871 and that OP satisfies CONSTANT_P. */
3872
3873 int
3874 legitimate_pic_operand_p (rtx op)
3875 {
3876 /* Accept all non-symbolic constants. */
3877 if (!SYMBOLIC_CONST (op))
3878 return 1;
3879
3880 /* Accept addresses that can be expressed relative to (pc). */
3881 if (larl_operand (op, VOIDmode))
3882 return 1;
3883
3884 /* Reject everything else; must be handled
3885 via emit_symbolic_move. */
3886 return 0;
3887 }
3888
3889 /* Returns true if the constant value OP is a legitimate general operand.
3890 It is given that OP satisfies CONSTANT_P. */
3891
3892 static bool
3893 s390_legitimate_constant_p (machine_mode mode, rtx op)
3894 {
3895 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3896 {
3897 if (GET_MODE_SIZE (mode) != 16)
3898 return 0;
3899
3900 if (!satisfies_constraint_j00 (op)
3901 && !satisfies_constraint_jm1 (op)
3902 && !satisfies_constraint_jKK (op)
3903 && !satisfies_constraint_jxx (op)
3904 && !satisfies_constraint_jyy (op))
3905 return 0;
3906 }
3907
3908 /* Accept all non-symbolic constants. */
3909 if (!SYMBOLIC_CONST (op))
3910 return 1;
3911
3912 /* Accept immediate LARL operands. */
3913 if (larl_operand (op, mode))
3914 return 1;
3915
3916 /* Thread-local symbols are never legal constants. This is
3917 so that emit_call knows that computing such addresses
3918 might require a function call. */
3919 if (TLS_SYMBOLIC_CONST (op))
3920 return 0;
3921
3922 /* In the PIC case, symbolic constants must *not* be
3923 forced into the literal pool. We accept them here,
3924 so that they will be handled by emit_symbolic_move. */
3925 if (flag_pic)
3926 return 1;
3927
3928 /* All remaining non-PIC symbolic constants are
3929 forced into the literal pool. */
3930 return 0;
3931 }
3932
3933 /* Determine if it's legal to put X into the constant pool. This
3934 is not possible if X contains the address of a symbol that is
3935 not constant (TLS) or not known at final link time (PIC). */
3936
3937 static bool
3938 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3939 {
3940 switch (GET_CODE (x))
3941 {
3942 case CONST_INT:
3943 case CONST_DOUBLE:
3944 case CONST_WIDE_INT:
3945 case CONST_VECTOR:
3946 /* Accept all non-symbolic constants. */
3947 return false;
3948
3949 case LABEL_REF:
3950 /* Labels are OK iff we are non-PIC. */
3951 return flag_pic != 0;
3952
3953 case SYMBOL_REF:
3954 /* 'Naked' TLS symbol references are never OK,
3955 non-TLS symbols are OK iff we are non-PIC. */
3956 if (tls_symbolic_operand (x))
3957 return true;
3958 else
3959 return flag_pic != 0;
3960
3961 case CONST:
3962 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3963 case PLUS:
3964 case MINUS:
3965 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3966 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3967
3968 case UNSPEC:
3969 switch (XINT (x, 1))
3970 {
3971 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3972 case UNSPEC_LTREL_OFFSET:
3973 case UNSPEC_GOT:
3974 case UNSPEC_GOTOFF:
3975 case UNSPEC_PLTOFF:
3976 case UNSPEC_TLSGD:
3977 case UNSPEC_TLSLDM:
3978 case UNSPEC_NTPOFF:
3979 case UNSPEC_DTPOFF:
3980 case UNSPEC_GOTNTPOFF:
3981 case UNSPEC_INDNTPOFF:
3982 return false;
3983
3984 /* If the literal pool shares the code section, be put
3985 execute template placeholders into the pool as well. */
3986 case UNSPEC_INSN:
3987 default:
3988 return true;
3989 }
3990 break;
3991
3992 default:
3993 gcc_unreachable ();
3994 }
3995 }
3996
3997 /* Returns true if the constant value OP is a legitimate general
3998 operand during and after reload. The difference to
3999 legitimate_constant_p is that this function will not accept
4000 a constant that would need to be forced to the literal pool
4001 before it can be used as operand.
4002 This function accepts all constants which can be loaded directly
4003 into a GPR. */
4004
4005 bool
4006 legitimate_reload_constant_p (rtx op)
4007 {
4008 /* Accept la(y) operands. */
4009 if (GET_CODE (op) == CONST_INT
4010 && DISP_IN_RANGE (INTVAL (op)))
4011 return true;
4012
4013 /* Accept l(g)hi/l(g)fi operands. */
4014 if (GET_CODE (op) == CONST_INT
4015 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4016 return true;
4017
4018 /* Accept lliXX operands. */
4019 if (TARGET_ZARCH
4020 && GET_CODE (op) == CONST_INT
4021 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4022 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4023 return true;
4024
4025 if (TARGET_EXTIMM
4026 && GET_CODE (op) == CONST_INT
4027 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4028 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4029 return true;
4030
4031 /* Accept larl operands. */
4032 if (larl_operand (op, VOIDmode))
4033 return true;
4034
4035 /* Accept floating-point zero operands that fit into a single GPR. */
4036 if (GET_CODE (op) == CONST_DOUBLE
4037 && s390_float_const_zero_p (op)
4038 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4039 return true;
4040
4041 /* Accept double-word operands that can be split. */
4042 if (GET_CODE (op) == CONST_WIDE_INT
4043 || (GET_CODE (op) == CONST_INT
4044 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4045 {
4046 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4047 rtx hi = operand_subword (op, 0, 0, dword_mode);
4048 rtx lo = operand_subword (op, 1, 0, dword_mode);
4049 return legitimate_reload_constant_p (hi)
4050 && legitimate_reload_constant_p (lo);
4051 }
4052
4053 /* Everything else cannot be handled without reload. */
4054 return false;
4055 }
4056
4057 /* Returns true if the constant value OP is a legitimate fp operand
4058 during and after reload.
4059 This function accepts all constants which can be loaded directly
4060 into an FPR. */
4061
4062 static bool
4063 legitimate_reload_fp_constant_p (rtx op)
4064 {
4065 /* Accept floating-point zero operands if the load zero instruction
4066 can be used. Prior to z196 the load fp zero instruction caused a
4067 performance penalty if the result is used as BFP number. */
4068 if (TARGET_Z196
4069 && GET_CODE (op) == CONST_DOUBLE
4070 && s390_float_const_zero_p (op))
4071 return true;
4072
4073 return false;
4074 }
4075
4076 /* Returns true if the constant value OP is a legitimate vector operand
4077 during and after reload.
4078 This function accepts all constants which can be loaded directly
4079 into an VR. */
4080
4081 static bool
4082 legitimate_reload_vector_constant_p (rtx op)
4083 {
4084 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4085 && (satisfies_constraint_j00 (op)
4086 || satisfies_constraint_jm1 (op)
4087 || satisfies_constraint_jKK (op)
4088 || satisfies_constraint_jxx (op)
4089 || satisfies_constraint_jyy (op)))
4090 return true;
4091
4092 return false;
4093 }
4094
4095 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4096 return the class of reg to actually use. */
4097
4098 static reg_class_t
4099 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4100 {
4101 switch (GET_CODE (op))
4102 {
4103 /* Constants we cannot reload into general registers
4104 must be forced into the literal pool. */
4105 case CONST_VECTOR:
4106 case CONST_DOUBLE:
4107 case CONST_INT:
4108 case CONST_WIDE_INT:
4109 if (reg_class_subset_p (GENERAL_REGS, rclass)
4110 && legitimate_reload_constant_p (op))
4111 return GENERAL_REGS;
4112 else if (reg_class_subset_p (ADDR_REGS, rclass)
4113 && legitimate_reload_constant_p (op))
4114 return ADDR_REGS;
4115 else if (reg_class_subset_p (FP_REGS, rclass)
4116 && legitimate_reload_fp_constant_p (op))
4117 return FP_REGS;
4118 else if (reg_class_subset_p (VEC_REGS, rclass)
4119 && legitimate_reload_vector_constant_p (op))
4120 return VEC_REGS;
4121
4122 return NO_REGS;
4123
4124 /* If a symbolic constant or a PLUS is reloaded,
4125 it is most likely being used as an address, so
4126 prefer ADDR_REGS. If 'class' is not a superset
4127 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4128 case CONST:
4129 /* Symrefs cannot be pushed into the literal pool with -fPIC
4130 so we *MUST NOT* return NO_REGS for these cases
4131 (s390_cannot_force_const_mem will return true).
4132
4133 On the other hand we MUST return NO_REGS for symrefs with
4134 invalid addend which might have been pushed to the literal
4135 pool (no -fPIC). Usually we would expect them to be
4136 handled via secondary reload but this does not happen if
4137 they are used as literal pool slot replacement in reload
4138 inheritance (see emit_input_reload_insns). */
4139 if (GET_CODE (XEXP (op, 0)) == PLUS
4140 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4141 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4142 {
4143 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4144 return ADDR_REGS;
4145 else
4146 return NO_REGS;
4147 }
4148 /* fallthrough */
4149 case LABEL_REF:
4150 case SYMBOL_REF:
4151 if (!legitimate_reload_constant_p (op))
4152 return NO_REGS;
4153 /* fallthrough */
4154 case PLUS:
4155 /* load address will be used. */
4156 if (reg_class_subset_p (ADDR_REGS, rclass))
4157 return ADDR_REGS;
4158 else
4159 return NO_REGS;
4160
4161 default:
4162 break;
4163 }
4164
4165 return rclass;
4166 }
4167
4168 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4169 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4170 aligned. */
4171
4172 bool
4173 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4174 {
4175 HOST_WIDE_INT addend;
4176 rtx symref;
4177
4178 /* The "required alignment" might be 0 (e.g. for certain structs
4179 accessed via BLKmode). Early abort in this case, as well as when
4180 an alignment > 8 is required. */
4181 if (alignment < 2 || alignment > 8)
4182 return false;
4183
4184 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4185 return false;
4186
4187 if (addend & (alignment - 1))
4188 return false;
4189
4190 if (GET_CODE (symref) == SYMBOL_REF)
4191 {
4192 /* s390_encode_section_info is not called for anchors, since they don't
4193 have corresponding VAR_DECLs. Therefore, we cannot rely on
4194 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4195 if (SYMBOL_REF_ANCHOR_P (symref))
4196 {
4197 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4198 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4199 / BITS_PER_UNIT);
4200
4201 gcc_assert (block_offset >= 0);
4202 return ((block_offset & (alignment - 1)) == 0
4203 && block_alignment >= alignment);
4204 }
4205
4206 /* We have load-relative instructions for 2-byte, 4-byte, and
4207 8-byte alignment so allow only these. */
4208 switch (alignment)
4209 {
4210 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4211 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4212 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4213 default: return false;
4214 }
4215 }
4216
4217 if (GET_CODE (symref) == UNSPEC
4218 && alignment <= UNITS_PER_LONG)
4219 return true;
4220
4221 return false;
4222 }
4223
4224 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4225 operand SCRATCH is used to reload the even part of the address and
4226 adding one. */
4227
4228 void
4229 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4230 {
4231 HOST_WIDE_INT addend;
4232 rtx symref;
4233
4234 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4235 gcc_unreachable ();
4236
4237 if (!(addend & 1))
4238 /* Easy case. The addend is even so larl will do fine. */
4239 emit_move_insn (reg, addr);
4240 else
4241 {
4242 /* We can leave the scratch register untouched if the target
4243 register is a valid base register. */
4244 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4245 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4246 scratch = reg;
4247
4248 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4249 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4250
4251 if (addend != 1)
4252 emit_move_insn (scratch,
4253 gen_rtx_CONST (Pmode,
4254 gen_rtx_PLUS (Pmode, symref,
4255 GEN_INT (addend - 1))));
4256 else
4257 emit_move_insn (scratch, symref);
4258
4259 /* Increment the address using la in order to avoid clobbering cc. */
4260 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4261 }
4262 }
4263
4264 /* Generate what is necessary to move between REG and MEM using
4265 SCRATCH. The direction is given by TOMEM. */
4266
4267 void
4268 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4269 {
4270 /* Reload might have pulled a constant out of the literal pool.
4271 Force it back in. */
4272 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4273 || GET_CODE (mem) == CONST_WIDE_INT
4274 || GET_CODE (mem) == CONST_VECTOR
4275 || GET_CODE (mem) == CONST)
4276 mem = force_const_mem (GET_MODE (reg), mem);
4277
4278 gcc_assert (MEM_P (mem));
4279
4280 /* For a load from memory we can leave the scratch register
4281 untouched if the target register is a valid base register. */
4282 if (!tomem
4283 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4284 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4285 && GET_MODE (reg) == GET_MODE (scratch))
4286 scratch = reg;
4287
4288 /* Load address into scratch register. Since we can't have a
4289 secondary reload for a secondary reload we have to cover the case
4290 where larl would need a secondary reload here as well. */
4291 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4292
4293 /* Now we can use a standard load/store to do the move. */
4294 if (tomem)
4295 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4296 else
4297 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4298 }
4299
4300 /* Inform reload about cases where moving X with a mode MODE to a register in
4301 RCLASS requires an extra scratch or immediate register. Return the class
4302 needed for the immediate register. */
4303
4304 static reg_class_t
4305 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4306 machine_mode mode, secondary_reload_info *sri)
4307 {
4308 enum reg_class rclass = (enum reg_class) rclass_i;
4309
4310 /* Intermediate register needed. */
4311 if (reg_classes_intersect_p (CC_REGS, rclass))
4312 return GENERAL_REGS;
4313
4314 if (TARGET_VX)
4315 {
4316 /* The vst/vl vector move instructions allow only for short
4317 displacements. */
4318 if (MEM_P (x)
4319 && GET_CODE (XEXP (x, 0)) == PLUS
4320 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4321 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4322 && reg_class_subset_p (rclass, VEC_REGS)
4323 && (!reg_class_subset_p (rclass, FP_REGS)
4324 || (GET_MODE_SIZE (mode) > 8
4325 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4326 {
4327 if (in_p)
4328 sri->icode = (TARGET_64BIT ?
4329 CODE_FOR_reloaddi_la_in :
4330 CODE_FOR_reloadsi_la_in);
4331 else
4332 sri->icode = (TARGET_64BIT ?
4333 CODE_FOR_reloaddi_la_out :
4334 CODE_FOR_reloadsi_la_out);
4335 }
4336 }
4337
4338 if (TARGET_Z10)
4339 {
4340 HOST_WIDE_INT offset;
4341 rtx symref;
4342
4343 /* On z10 several optimizer steps may generate larl operands with
4344 an odd addend. */
4345 if (in_p
4346 && s390_loadrelative_operand_p (x, &symref, &offset)
4347 && mode == Pmode
4348 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4349 && (offset & 1) == 1)
4350 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4351 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4352
4353 /* Handle all the (mem (symref)) accesses we cannot use the z10
4354 instructions for. */
4355 if (MEM_P (x)
4356 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4357 && (mode == QImode
4358 || !reg_class_subset_p (rclass, GENERAL_REGS)
4359 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4360 || !s390_check_symref_alignment (XEXP (x, 0),
4361 GET_MODE_SIZE (mode))))
4362 {
4363 #define __SECONDARY_RELOAD_CASE(M,m) \
4364 case E_##M##mode: \
4365 if (TARGET_64BIT) \
4366 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4367 CODE_FOR_reload##m##di_tomem_z10; \
4368 else \
4369 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4370 CODE_FOR_reload##m##si_tomem_z10; \
4371 break;
4372
4373 switch (GET_MODE (x))
4374 {
4375 __SECONDARY_RELOAD_CASE (QI, qi);
4376 __SECONDARY_RELOAD_CASE (HI, hi);
4377 __SECONDARY_RELOAD_CASE (SI, si);
4378 __SECONDARY_RELOAD_CASE (DI, di);
4379 __SECONDARY_RELOAD_CASE (TI, ti);
4380 __SECONDARY_RELOAD_CASE (SF, sf);
4381 __SECONDARY_RELOAD_CASE (DF, df);
4382 __SECONDARY_RELOAD_CASE (TF, tf);
4383 __SECONDARY_RELOAD_CASE (SD, sd);
4384 __SECONDARY_RELOAD_CASE (DD, dd);
4385 __SECONDARY_RELOAD_CASE (TD, td);
4386 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4387 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4388 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4389 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4390 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4391 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4392 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4393 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4394 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4395 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4396 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4397 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4398 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4399 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4400 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4401 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4402 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4403 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4404 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4405 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4406 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4407 default:
4408 gcc_unreachable ();
4409 }
4410 #undef __SECONDARY_RELOAD_CASE
4411 }
4412 }
4413
4414 /* We need a scratch register when loading a PLUS expression which
4415 is not a legitimate operand of the LOAD ADDRESS instruction. */
4416 /* LRA can deal with transformation of plus op very well -- so we
4417 don't need to prompt LRA in this case. */
4418 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4419 sri->icode = (TARGET_64BIT ?
4420 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4421
4422 /* Performing a multiword move from or to memory we have to make sure the
4423 second chunk in memory is addressable without causing a displacement
4424 overflow. If that would be the case we calculate the address in
4425 a scratch register. */
4426 if (MEM_P (x)
4427 && GET_CODE (XEXP (x, 0)) == PLUS
4428 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4429 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4430 + GET_MODE_SIZE (mode) - 1))
4431 {
4432 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4433 in a s_operand address since we may fallback to lm/stm. So we only
4434 have to care about overflows in the b+i+d case. */
4435 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4436 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4437 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4438 /* For FP_REGS no lm/stm is available so this check is triggered
4439 for displacement overflows in b+i+d and b+d like addresses. */
4440 || (reg_classes_intersect_p (FP_REGS, rclass)
4441 && s390_class_max_nregs (FP_REGS, mode) > 1))
4442 {
4443 if (in_p)
4444 sri->icode = (TARGET_64BIT ?
4445 CODE_FOR_reloaddi_la_in :
4446 CODE_FOR_reloadsi_la_in);
4447 else
4448 sri->icode = (TARGET_64BIT ?
4449 CODE_FOR_reloaddi_la_out :
4450 CODE_FOR_reloadsi_la_out);
4451 }
4452 }
4453
4454 /* A scratch address register is needed when a symbolic constant is
4455 copied to r0 compiling with -fPIC. In other cases the target
4456 register might be used as temporary (see legitimize_pic_address). */
4457 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4458 sri->icode = (TARGET_64BIT ?
4459 CODE_FOR_reloaddi_PIC_addr :
4460 CODE_FOR_reloadsi_PIC_addr);
4461
4462 /* Either scratch or no register needed. */
4463 return NO_REGS;
4464 }
4465
4466 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4467
4468 We need secondary memory to move data between GPRs and FPRs.
4469
4470 - With DFP the ldgr lgdr instructions are available. Due to the
4471 different alignment we cannot use them for SFmode. For 31 bit a
4472 64 bit value in GPR would be a register pair so here we still
4473 need to go via memory.
4474
4475 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4476 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4477 in full VRs so as before also on z13 we do these moves via
4478 memory.
4479
4480 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4481
4482 static bool
4483 s390_secondary_memory_needed (machine_mode mode,
4484 reg_class_t class1, reg_class_t class2)
4485 {
4486 return (((reg_classes_intersect_p (class1, VEC_REGS)
4487 && reg_classes_intersect_p (class2, GENERAL_REGS))
4488 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4489 && reg_classes_intersect_p (class2, VEC_REGS)))
4490 && (!TARGET_DFP || !TARGET_64BIT || GET_MODE_SIZE (mode) != 8)
4491 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4492 && GET_MODE_SIZE (mode) > 8)));
4493 }
4494
4495 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4496
4497 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4498 because the movsi and movsf patterns don't handle r/f moves. */
4499
4500 static machine_mode
4501 s390_secondary_memory_needed_mode (machine_mode mode)
4502 {
4503 if (GET_MODE_BITSIZE (mode) < 32)
4504 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4505 return mode;
4506 }
4507
4508 /* Generate code to load SRC, which is PLUS that is not a
4509 legitimate operand for the LA instruction, into TARGET.
4510 SCRATCH may be used as scratch register. */
4511
4512 void
4513 s390_expand_plus_operand (rtx target, rtx src,
4514 rtx scratch)
4515 {
4516 rtx sum1, sum2;
4517 struct s390_address ad;
4518
4519 /* src must be a PLUS; get its two operands. */
4520 gcc_assert (GET_CODE (src) == PLUS);
4521 gcc_assert (GET_MODE (src) == Pmode);
4522
4523 /* Check if any of the two operands is already scheduled
4524 for replacement by reload. This can happen e.g. when
4525 float registers occur in an address. */
4526 sum1 = find_replacement (&XEXP (src, 0));
4527 sum2 = find_replacement (&XEXP (src, 1));
4528 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4529
4530 /* If the address is already strictly valid, there's nothing to do. */
4531 if (!s390_decompose_address (src, &ad)
4532 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4533 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4534 {
4535 /* Otherwise, one of the operands cannot be an address register;
4536 we reload its value into the scratch register. */
4537 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4538 {
4539 emit_move_insn (scratch, sum1);
4540 sum1 = scratch;
4541 }
4542 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4543 {
4544 emit_move_insn (scratch, sum2);
4545 sum2 = scratch;
4546 }
4547
4548 /* According to the way these invalid addresses are generated
4549 in reload.c, it should never happen (at least on s390) that
4550 *neither* of the PLUS components, after find_replacements
4551 was applied, is an address register. */
4552 if (sum1 == scratch && sum2 == scratch)
4553 {
4554 debug_rtx (src);
4555 gcc_unreachable ();
4556 }
4557
4558 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4559 }
4560
4561 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4562 is only ever performed on addresses, so we can mark the
4563 sum as legitimate for LA in any case. */
4564 s390_load_address (target, src);
4565 }
4566
4567
4568 /* Return true if ADDR is a valid memory address.
4569 STRICT specifies whether strict register checking applies. */
4570
4571 static bool
4572 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4573 {
4574 struct s390_address ad;
4575
4576 if (TARGET_Z10
4577 && larl_operand (addr, VOIDmode)
4578 && (mode == VOIDmode
4579 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4580 return true;
4581
4582 if (!s390_decompose_address (addr, &ad))
4583 return false;
4584
4585 if (strict)
4586 {
4587 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4588 return false;
4589
4590 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4591 return false;
4592 }
4593 else
4594 {
4595 if (ad.base
4596 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4597 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4598 return false;
4599
4600 if (ad.indx
4601 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4602 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4603 return false;
4604 }
4605 return true;
4606 }
4607
4608 /* Return true if OP is a valid operand for the LA instruction.
4609 In 31-bit, we need to prove that the result is used as an
4610 address, as LA performs only a 31-bit addition. */
4611
4612 bool
4613 legitimate_la_operand_p (rtx op)
4614 {
4615 struct s390_address addr;
4616 if (!s390_decompose_address (op, &addr))
4617 return false;
4618
4619 return (TARGET_64BIT || addr.pointer);
4620 }
4621
4622 /* Return true if it is valid *and* preferable to use LA to
4623 compute the sum of OP1 and OP2. */
4624
4625 bool
4626 preferred_la_operand_p (rtx op1, rtx op2)
4627 {
4628 struct s390_address addr;
4629
4630 if (op2 != const0_rtx)
4631 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4632
4633 if (!s390_decompose_address (op1, &addr))
4634 return false;
4635 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4636 return false;
4637 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4638 return false;
4639
4640 /* Avoid LA instructions with index (and base) register on z196 or
4641 later; it is preferable to use regular add instructions when
4642 possible. Starting with zEC12 the la with index register is
4643 "uncracked" again but still slower than a regular add. */
4644 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4645 return false;
4646
4647 if (!TARGET_64BIT && !addr.pointer)
4648 return false;
4649
4650 if (addr.pointer)
4651 return true;
4652
4653 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4654 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4655 return true;
4656
4657 return false;
4658 }
4659
4660 /* Emit a forced load-address operation to load SRC into DST.
4661 This will use the LOAD ADDRESS instruction even in situations
4662 where legitimate_la_operand_p (SRC) returns false. */
4663
4664 void
4665 s390_load_address (rtx dst, rtx src)
4666 {
4667 if (TARGET_64BIT)
4668 emit_move_insn (dst, src);
4669 else
4670 emit_insn (gen_force_la_31 (dst, src));
4671 }
4672
4673 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4674
4675 bool
4676 s390_rel_address_ok_p (rtx symbol_ref)
4677 {
4678 tree decl;
4679
4680 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4681 return true;
4682
4683 decl = SYMBOL_REF_DECL (symbol_ref);
4684
4685 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4686 return (s390_pic_data_is_text_relative
4687 || (decl
4688 && TREE_CODE (decl) == FUNCTION_DECL));
4689
4690 return false;
4691 }
4692
4693 /* Return a legitimate reference for ORIG (an address) using the
4694 register REG. If REG is 0, a new pseudo is generated.
4695
4696 There are two types of references that must be handled:
4697
4698 1. Global data references must load the address from the GOT, via
4699 the PIC reg. An insn is emitted to do this load, and the reg is
4700 returned.
4701
4702 2. Static data references, constant pool addresses, and code labels
4703 compute the address as an offset from the GOT, whose base is in
4704 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4705 differentiate them from global data objects. The returned
4706 address is the PIC reg + an unspec constant.
4707
4708 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4709 reg also appears in the address. */
4710
4711 rtx
4712 legitimize_pic_address (rtx orig, rtx reg)
4713 {
4714 rtx addr = orig;
4715 rtx addend = const0_rtx;
4716 rtx new_rtx = orig;
4717
4718 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4719
4720 if (GET_CODE (addr) == CONST)
4721 addr = XEXP (addr, 0);
4722
4723 if (GET_CODE (addr) == PLUS)
4724 {
4725 addend = XEXP (addr, 1);
4726 addr = XEXP (addr, 0);
4727 }
4728
4729 if ((GET_CODE (addr) == LABEL_REF
4730 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4731 || (GET_CODE (addr) == UNSPEC &&
4732 (XINT (addr, 1) == UNSPEC_GOTENT
4733 || XINT (addr, 1) == UNSPEC_PLT)))
4734 && GET_CODE (addend) == CONST_INT)
4735 {
4736 /* This can be locally addressed. */
4737
4738 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4739 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4740 gen_rtx_CONST (Pmode, addr) : addr);
4741
4742 if (larl_operand (const_addr, VOIDmode)
4743 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4744 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4745 {
4746 if (INTVAL (addend) & 1)
4747 {
4748 /* LARL can't handle odd offsets, so emit a pair of LARL
4749 and LA. */
4750 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4751
4752 if (!DISP_IN_RANGE (INTVAL (addend)))
4753 {
4754 HOST_WIDE_INT even = INTVAL (addend) - 1;
4755 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4756 addr = gen_rtx_CONST (Pmode, addr);
4757 addend = const1_rtx;
4758 }
4759
4760 emit_move_insn (temp, addr);
4761 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4762
4763 if (reg != 0)
4764 {
4765 s390_load_address (reg, new_rtx);
4766 new_rtx = reg;
4767 }
4768 }
4769 else
4770 {
4771 /* If the offset is even, we can just use LARL. This
4772 will happen automatically. */
4773 }
4774 }
4775 else
4776 {
4777 /* No larl - Access local symbols relative to the GOT. */
4778
4779 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4780
4781 if (reload_in_progress || reload_completed)
4782 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4783
4784 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4785 if (addend != const0_rtx)
4786 addr = gen_rtx_PLUS (Pmode, addr, addend);
4787 addr = gen_rtx_CONST (Pmode, addr);
4788 addr = force_const_mem (Pmode, addr);
4789 emit_move_insn (temp, addr);
4790
4791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4792 if (reg != 0)
4793 {
4794 s390_load_address (reg, new_rtx);
4795 new_rtx = reg;
4796 }
4797 }
4798 }
4799 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4800 {
4801 /* A non-local symbol reference without addend.
4802
4803 The symbol ref is wrapped into an UNSPEC to make sure the
4804 proper operand modifier (@GOT or @GOTENT) will be emitted.
4805 This will tell the linker to put the symbol into the GOT.
4806
4807 Additionally the code dereferencing the GOT slot is emitted here.
4808
4809 An addend to the symref needs to be added afterwards.
4810 legitimize_pic_address calls itself recursively to handle
4811 that case. So no need to do it here. */
4812
4813 if (reg == 0)
4814 reg = gen_reg_rtx (Pmode);
4815
4816 if (TARGET_Z10)
4817 {
4818 /* Use load relative if possible.
4819 lgrl <target>, sym@GOTENT */
4820 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4821 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4822 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4823
4824 emit_move_insn (reg, new_rtx);
4825 new_rtx = reg;
4826 }
4827 else if (flag_pic == 1)
4828 {
4829 /* Assume GOT offset is a valid displacement operand (< 4k
4830 or < 512k with z990). This is handled the same way in
4831 both 31- and 64-bit code (@GOT).
4832 lg <target>, sym@GOT(r12) */
4833
4834 if (reload_in_progress || reload_completed)
4835 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4836
4837 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4838 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4839 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4840 new_rtx = gen_const_mem (Pmode, new_rtx);
4841 emit_move_insn (reg, new_rtx);
4842 new_rtx = reg;
4843 }
4844 else
4845 {
4846 /* If the GOT offset might be >= 4k, we determine the position
4847 of the GOT entry via a PC-relative LARL (@GOTENT).
4848 larl temp, sym@GOTENT
4849 lg <target>, 0(temp) */
4850
4851 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4852
4853 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4854 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4855
4856 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4857 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4858 emit_move_insn (temp, new_rtx);
4859 new_rtx = gen_const_mem (Pmode, temp);
4860 emit_move_insn (reg, new_rtx);
4861
4862 new_rtx = reg;
4863 }
4864 }
4865 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4866 {
4867 gcc_assert (XVECLEN (addr, 0) == 1);
4868 switch (XINT (addr, 1))
4869 {
4870 /* These address symbols (or PLT slots) relative to the GOT
4871 (not GOT slots!). In general this will exceed the
4872 displacement range so these value belong into the literal
4873 pool. */
4874 case UNSPEC_GOTOFF:
4875 case UNSPEC_PLTOFF:
4876 new_rtx = force_const_mem (Pmode, orig);
4877 break;
4878
4879 /* For -fPIC the GOT size might exceed the displacement
4880 range so make sure the value is in the literal pool. */
4881 case UNSPEC_GOT:
4882 if (flag_pic == 2)
4883 new_rtx = force_const_mem (Pmode, orig);
4884 break;
4885
4886 /* For @GOTENT larl is used. This is handled like local
4887 symbol refs. */
4888 case UNSPEC_GOTENT:
4889 gcc_unreachable ();
4890 break;
4891
4892 /* For @PLT larl is used. This is handled like local
4893 symbol refs. */
4894 case UNSPEC_PLT:
4895 gcc_unreachable ();
4896 break;
4897
4898 /* Everything else cannot happen. */
4899 default:
4900 gcc_unreachable ();
4901 }
4902 }
4903 else if (addend != const0_rtx)
4904 {
4905 /* Otherwise, compute the sum. */
4906
4907 rtx base = legitimize_pic_address (addr, reg);
4908 new_rtx = legitimize_pic_address (addend,
4909 base == reg ? NULL_RTX : reg);
4910 if (GET_CODE (new_rtx) == CONST_INT)
4911 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4912 else
4913 {
4914 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4915 {
4916 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4917 new_rtx = XEXP (new_rtx, 1);
4918 }
4919 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4920 }
4921
4922 if (GET_CODE (new_rtx) == CONST)
4923 new_rtx = XEXP (new_rtx, 0);
4924 new_rtx = force_operand (new_rtx, 0);
4925 }
4926
4927 return new_rtx;
4928 }
4929
4930 /* Load the thread pointer into a register. */
4931
4932 rtx
4933 s390_get_thread_pointer (void)
4934 {
4935 rtx tp = gen_reg_rtx (Pmode);
4936
4937 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4938 mark_reg_pointer (tp, BITS_PER_WORD);
4939
4940 return tp;
4941 }
4942
4943 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4944 in s390_tls_symbol which always refers to __tls_get_offset.
4945 The returned offset is written to RESULT_REG and an USE rtx is
4946 generated for TLS_CALL. */
4947
4948 static GTY(()) rtx s390_tls_symbol;
4949
4950 static void
4951 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4952 {
4953 rtx insn;
4954
4955 if (!flag_pic)
4956 emit_insn (s390_load_got ());
4957
4958 if (!s390_tls_symbol)
4959 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4960
4961 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4962 gen_rtx_REG (Pmode, RETURN_REGNUM));
4963
4964 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4965 RTL_CONST_CALL_P (insn) = 1;
4966 }
4967
4968 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4969 this (thread-local) address. REG may be used as temporary. */
4970
4971 static rtx
4972 legitimize_tls_address (rtx addr, rtx reg)
4973 {
4974 rtx new_rtx, tls_call, temp, base, r2;
4975 rtx_insn *insn;
4976
4977 if (GET_CODE (addr) == SYMBOL_REF)
4978 switch (tls_symbolic_operand (addr))
4979 {
4980 case TLS_MODEL_GLOBAL_DYNAMIC:
4981 start_sequence ();
4982 r2 = gen_rtx_REG (Pmode, 2);
4983 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4984 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4985 new_rtx = force_const_mem (Pmode, new_rtx);
4986 emit_move_insn (r2, new_rtx);
4987 s390_emit_tls_call_insn (r2, tls_call);
4988 insn = get_insns ();
4989 end_sequence ();
4990
4991 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4992 temp = gen_reg_rtx (Pmode);
4993 emit_libcall_block (insn, temp, r2, new_rtx);
4994
4995 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4996 if (reg != 0)
4997 {
4998 s390_load_address (reg, new_rtx);
4999 new_rtx = reg;
5000 }
5001 break;
5002
5003 case TLS_MODEL_LOCAL_DYNAMIC:
5004 start_sequence ();
5005 r2 = gen_rtx_REG (Pmode, 2);
5006 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5007 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5008 new_rtx = force_const_mem (Pmode, new_rtx);
5009 emit_move_insn (r2, new_rtx);
5010 s390_emit_tls_call_insn (r2, tls_call);
5011 insn = get_insns ();
5012 end_sequence ();
5013
5014 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5015 temp = gen_reg_rtx (Pmode);
5016 emit_libcall_block (insn, temp, r2, new_rtx);
5017
5018 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5019 base = gen_reg_rtx (Pmode);
5020 s390_load_address (base, new_rtx);
5021
5022 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5023 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5024 new_rtx = force_const_mem (Pmode, new_rtx);
5025 temp = gen_reg_rtx (Pmode);
5026 emit_move_insn (temp, new_rtx);
5027
5028 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5029 if (reg != 0)
5030 {
5031 s390_load_address (reg, new_rtx);
5032 new_rtx = reg;
5033 }
5034 break;
5035
5036 case TLS_MODEL_INITIAL_EXEC:
5037 if (flag_pic == 1)
5038 {
5039 /* Assume GOT offset < 4k. This is handled the same way
5040 in both 31- and 64-bit code. */
5041
5042 if (reload_in_progress || reload_completed)
5043 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5044
5045 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5046 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5047 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5048 new_rtx = gen_const_mem (Pmode, new_rtx);
5049 temp = gen_reg_rtx (Pmode);
5050 emit_move_insn (temp, new_rtx);
5051 }
5052 else
5053 {
5054 /* If the GOT offset might be >= 4k, we determine the position
5055 of the GOT entry via a PC-relative LARL. */
5056
5057 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5058 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5059 temp = gen_reg_rtx (Pmode);
5060 emit_move_insn (temp, new_rtx);
5061
5062 new_rtx = gen_const_mem (Pmode, temp);
5063 temp = gen_reg_rtx (Pmode);
5064 emit_move_insn (temp, new_rtx);
5065 }
5066
5067 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5068 if (reg != 0)
5069 {
5070 s390_load_address (reg, new_rtx);
5071 new_rtx = reg;
5072 }
5073 break;
5074
5075 case TLS_MODEL_LOCAL_EXEC:
5076 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5077 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5078 new_rtx = force_const_mem (Pmode, new_rtx);
5079 temp = gen_reg_rtx (Pmode);
5080 emit_move_insn (temp, new_rtx);
5081
5082 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5083 if (reg != 0)
5084 {
5085 s390_load_address (reg, new_rtx);
5086 new_rtx = reg;
5087 }
5088 break;
5089
5090 default:
5091 gcc_unreachable ();
5092 }
5093
5094 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5095 {
5096 switch (XINT (XEXP (addr, 0), 1))
5097 {
5098 case UNSPEC_INDNTPOFF:
5099 new_rtx = addr;
5100 break;
5101
5102 default:
5103 gcc_unreachable ();
5104 }
5105 }
5106
5107 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5108 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5109 {
5110 new_rtx = XEXP (XEXP (addr, 0), 0);
5111 if (GET_CODE (new_rtx) != SYMBOL_REF)
5112 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5113
5114 new_rtx = legitimize_tls_address (new_rtx, reg);
5115 new_rtx = plus_constant (Pmode, new_rtx,
5116 INTVAL (XEXP (XEXP (addr, 0), 1)));
5117 new_rtx = force_operand (new_rtx, 0);
5118 }
5119
5120 else
5121 gcc_unreachable (); /* for now ... */
5122
5123 return new_rtx;
5124 }
5125
5126 /* Emit insns making the address in operands[1] valid for a standard
5127 move to operands[0]. operands[1] is replaced by an address which
5128 should be used instead of the former RTX to emit the move
5129 pattern. */
5130
5131 void
5132 emit_symbolic_move (rtx *operands)
5133 {
5134 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5135
5136 if (GET_CODE (operands[0]) == MEM)
5137 operands[1] = force_reg (Pmode, operands[1]);
5138 else if (TLS_SYMBOLIC_CONST (operands[1]))
5139 operands[1] = legitimize_tls_address (operands[1], temp);
5140 else if (flag_pic)
5141 operands[1] = legitimize_pic_address (operands[1], temp);
5142 }
5143
5144 /* Try machine-dependent ways of modifying an illegitimate address X
5145 to be legitimate. If we find one, return the new, valid address.
5146
5147 OLDX is the address as it was before break_out_memory_refs was called.
5148 In some cases it is useful to look at this to decide what needs to be done.
5149
5150 MODE is the mode of the operand pointed to by X.
5151
5152 When -fpic is used, special handling is needed for symbolic references.
5153 See comments by legitimize_pic_address for details. */
5154
5155 static rtx
5156 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5157 machine_mode mode ATTRIBUTE_UNUSED)
5158 {
5159 rtx constant_term = const0_rtx;
5160
5161 if (TLS_SYMBOLIC_CONST (x))
5162 {
5163 x = legitimize_tls_address (x, 0);
5164
5165 if (s390_legitimate_address_p (mode, x, FALSE))
5166 return x;
5167 }
5168 else if (GET_CODE (x) == PLUS
5169 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5170 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5171 {
5172 return x;
5173 }
5174 else if (flag_pic)
5175 {
5176 if (SYMBOLIC_CONST (x)
5177 || (GET_CODE (x) == PLUS
5178 && (SYMBOLIC_CONST (XEXP (x, 0))
5179 || SYMBOLIC_CONST (XEXP (x, 1)))))
5180 x = legitimize_pic_address (x, 0);
5181
5182 if (s390_legitimate_address_p (mode, x, FALSE))
5183 return x;
5184 }
5185
5186 x = eliminate_constant_term (x, &constant_term);
5187
5188 /* Optimize loading of large displacements by splitting them
5189 into the multiple of 4K and the rest; this allows the
5190 former to be CSE'd if possible.
5191
5192 Don't do this if the displacement is added to a register
5193 pointing into the stack frame, as the offsets will
5194 change later anyway. */
5195
5196 if (GET_CODE (constant_term) == CONST_INT
5197 && !TARGET_LONG_DISPLACEMENT
5198 && !DISP_IN_RANGE (INTVAL (constant_term))
5199 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5200 {
5201 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5202 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5203
5204 rtx temp = gen_reg_rtx (Pmode);
5205 rtx val = force_operand (GEN_INT (upper), temp);
5206 if (val != temp)
5207 emit_move_insn (temp, val);
5208
5209 x = gen_rtx_PLUS (Pmode, x, temp);
5210 constant_term = GEN_INT (lower);
5211 }
5212
5213 if (GET_CODE (x) == PLUS)
5214 {
5215 if (GET_CODE (XEXP (x, 0)) == REG)
5216 {
5217 rtx temp = gen_reg_rtx (Pmode);
5218 rtx val = force_operand (XEXP (x, 1), temp);
5219 if (val != temp)
5220 emit_move_insn (temp, val);
5221
5222 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5223 }
5224
5225 else if (GET_CODE (XEXP (x, 1)) == REG)
5226 {
5227 rtx temp = gen_reg_rtx (Pmode);
5228 rtx val = force_operand (XEXP (x, 0), temp);
5229 if (val != temp)
5230 emit_move_insn (temp, val);
5231
5232 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5233 }
5234 }
5235
5236 if (constant_term != const0_rtx)
5237 x = gen_rtx_PLUS (Pmode, x, constant_term);
5238
5239 return x;
5240 }
5241
5242 /* Try a machine-dependent way of reloading an illegitimate address AD
5243 operand. If we find one, push the reload and return the new address.
5244
5245 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5246 and TYPE is the reload type of the current reload. */
5247
5248 rtx
5249 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5250 int opnum, int type)
5251 {
5252 if (!optimize || TARGET_LONG_DISPLACEMENT)
5253 return NULL_RTX;
5254
5255 if (GET_CODE (ad) == PLUS)
5256 {
5257 rtx tem = simplify_binary_operation (PLUS, Pmode,
5258 XEXP (ad, 0), XEXP (ad, 1));
5259 if (tem)
5260 ad = tem;
5261 }
5262
5263 if (GET_CODE (ad) == PLUS
5264 && GET_CODE (XEXP (ad, 0)) == REG
5265 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5266 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5267 {
5268 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5269 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5270 rtx cst, tem, new_rtx;
5271
5272 cst = GEN_INT (upper);
5273 if (!legitimate_reload_constant_p (cst))
5274 cst = force_const_mem (Pmode, cst);
5275
5276 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5277 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5278
5279 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5280 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5281 opnum, (enum reload_type) type);
5282 return new_rtx;
5283 }
5284
5285 return NULL_RTX;
5286 }
5287
5288 /* Emit code to move LEN bytes from DST to SRC. */
5289
5290 bool
5291 s390_expand_movmem (rtx dst, rtx src, rtx len)
5292 {
5293 /* When tuning for z10 or higher we rely on the Glibc functions to
5294 do the right thing. Only for constant lengths below 64k we will
5295 generate inline code. */
5296 if (s390_tune >= PROCESSOR_2097_Z10
5297 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5298 return false;
5299
5300 /* Expand memcpy for constant length operands without a loop if it
5301 is shorter that way.
5302
5303 With a constant length argument a
5304 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5305 if (GET_CODE (len) == CONST_INT
5306 && INTVAL (len) >= 0
5307 && INTVAL (len) <= 256 * 6
5308 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5309 {
5310 HOST_WIDE_INT o, l;
5311
5312 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5313 {
5314 rtx newdst = adjust_address (dst, BLKmode, o);
5315 rtx newsrc = adjust_address (src, BLKmode, o);
5316 emit_insn (gen_movmem_short (newdst, newsrc,
5317 GEN_INT (l > 256 ? 255 : l - 1)));
5318 }
5319 }
5320
5321 else if (TARGET_MVCLE)
5322 {
5323 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5324 }
5325
5326 else
5327 {
5328 rtx dst_addr, src_addr, count, blocks, temp;
5329 rtx_code_label *loop_start_label = gen_label_rtx ();
5330 rtx_code_label *loop_end_label = gen_label_rtx ();
5331 rtx_code_label *end_label = gen_label_rtx ();
5332 machine_mode mode;
5333
5334 mode = GET_MODE (len);
5335 if (mode == VOIDmode)
5336 mode = Pmode;
5337
5338 dst_addr = gen_reg_rtx (Pmode);
5339 src_addr = gen_reg_rtx (Pmode);
5340 count = gen_reg_rtx (mode);
5341 blocks = gen_reg_rtx (mode);
5342
5343 convert_move (count, len, 1);
5344 emit_cmp_and_jump_insns (count, const0_rtx,
5345 EQ, NULL_RTX, mode, 1, end_label);
5346
5347 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5348 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5349 dst = change_address (dst, VOIDmode, dst_addr);
5350 src = change_address (src, VOIDmode, src_addr);
5351
5352 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5353 OPTAB_DIRECT);
5354 if (temp != count)
5355 emit_move_insn (count, temp);
5356
5357 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5358 OPTAB_DIRECT);
5359 if (temp != blocks)
5360 emit_move_insn (blocks, temp);
5361
5362 emit_cmp_and_jump_insns (blocks, const0_rtx,
5363 EQ, NULL_RTX, mode, 1, loop_end_label);
5364
5365 emit_label (loop_start_label);
5366
5367 if (TARGET_Z10
5368 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5369 {
5370 rtx prefetch;
5371
5372 /* Issue a read prefetch for the +3 cache line. */
5373 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5374 const0_rtx, const0_rtx);
5375 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5376 emit_insn (prefetch);
5377
5378 /* Issue a write prefetch for the +3 cache line. */
5379 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5380 const1_rtx, const0_rtx);
5381 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5382 emit_insn (prefetch);
5383 }
5384
5385 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5386 s390_load_address (dst_addr,
5387 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5388 s390_load_address (src_addr,
5389 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5390
5391 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5392 OPTAB_DIRECT);
5393 if (temp != blocks)
5394 emit_move_insn (blocks, temp);
5395
5396 emit_cmp_and_jump_insns (blocks, const0_rtx,
5397 EQ, NULL_RTX, mode, 1, loop_end_label);
5398
5399 emit_jump (loop_start_label);
5400 emit_label (loop_end_label);
5401
5402 emit_insn (gen_movmem_short (dst, src,
5403 convert_to_mode (Pmode, count, 1)));
5404 emit_label (end_label);
5405 }
5406 return true;
5407 }
5408
5409 /* Emit code to set LEN bytes at DST to VAL.
5410 Make use of clrmem if VAL is zero. */
5411
5412 void
5413 s390_expand_setmem (rtx dst, rtx len, rtx val)
5414 {
5415 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5416 return;
5417
5418 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5419
5420 /* Expand setmem/clrmem for a constant length operand without a
5421 loop if it will be shorter that way.
5422 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5423 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5424 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5425 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5426 if (GET_CODE (len) == CONST_INT
5427 && ((val == const0_rtx
5428 && (INTVAL (len) <= 256 * 4
5429 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5430 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5431 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5432 {
5433 HOST_WIDE_INT o, l;
5434
5435 if (val == const0_rtx)
5436 /* clrmem: emit 256 byte blockwise XCs. */
5437 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5438 {
5439 rtx newdst = adjust_address (dst, BLKmode, o);
5440 emit_insn (gen_clrmem_short (newdst,
5441 GEN_INT (l > 256 ? 255 : l - 1)));
5442 }
5443 else
5444 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5445 setting first byte to val and using a 256 byte mvc with one
5446 byte overlap to propagate the byte. */
5447 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5448 {
5449 rtx newdst = adjust_address (dst, BLKmode, o);
5450 emit_move_insn (adjust_address (dst, QImode, o), val);
5451 if (l > 1)
5452 {
5453 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5454 emit_insn (gen_movmem_short (newdstp1, newdst,
5455 GEN_INT (l > 257 ? 255 : l - 2)));
5456 }
5457 }
5458 }
5459
5460 else if (TARGET_MVCLE)
5461 {
5462 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5463 if (TARGET_64BIT)
5464 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5465 val));
5466 else
5467 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5468 val));
5469 }
5470
5471 else
5472 {
5473 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5474 rtx_code_label *loop_start_label = gen_label_rtx ();
5475 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5476 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5477 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5478 machine_mode mode;
5479
5480 mode = GET_MODE (len);
5481 if (mode == VOIDmode)
5482 mode = Pmode;
5483
5484 dst_addr = gen_reg_rtx (Pmode);
5485 count = gen_reg_rtx (mode);
5486 blocks = gen_reg_rtx (mode);
5487
5488 convert_move (count, len, 1);
5489 emit_cmp_and_jump_insns (count, const0_rtx,
5490 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5491 profile_probability::very_unlikely ());
5492
5493 /* We need to make a copy of the target address since memset is
5494 supposed to return it unmodified. We have to make it here
5495 already since the new reg is used at onebyte_end_label. */
5496 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5497 dst = change_address (dst, VOIDmode, dst_addr);
5498
5499 if (val != const0_rtx)
5500 {
5501 /* When using the overlapping mvc the original target
5502 address is only accessed as single byte entity (even by
5503 the mvc reading this value). */
5504 set_mem_size (dst, 1);
5505 dstp1 = adjust_address (dst, VOIDmode, 1);
5506 emit_cmp_and_jump_insns (count,
5507 const1_rtx, EQ, NULL_RTX, mode, 1,
5508 onebyte_end_label,
5509 profile_probability::very_unlikely ());
5510 }
5511
5512 /* There is one unconditional (mvi+mvc)/xc after the loop
5513 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5514 or one (xc) here leaves this number of bytes to be handled by
5515 it. */
5516 temp = expand_binop (mode, add_optab, count,
5517 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5518 count, 1, OPTAB_DIRECT);
5519 if (temp != count)
5520 emit_move_insn (count, temp);
5521
5522 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5523 OPTAB_DIRECT);
5524 if (temp != blocks)
5525 emit_move_insn (blocks, temp);
5526
5527 emit_cmp_and_jump_insns (blocks, const0_rtx,
5528 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5529
5530 emit_jump (loop_start_label);
5531
5532 if (val != const0_rtx)
5533 {
5534 /* The 1 byte != 0 special case. Not handled efficiently
5535 since we require two jumps for that. However, this
5536 should be very rare. */
5537 emit_label (onebyte_end_label);
5538 emit_move_insn (adjust_address (dst, QImode, 0), val);
5539 emit_jump (zerobyte_end_label);
5540 }
5541
5542 emit_label (loop_start_label);
5543
5544 if (TARGET_SETMEM_PFD (val, len))
5545 {
5546 /* Issue a write prefetch. */
5547 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5548 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5549 const1_rtx, const0_rtx);
5550 emit_insn (prefetch);
5551 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5552 }
5553
5554 if (val == const0_rtx)
5555 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5556 else
5557 {
5558 /* Set the first byte in the block to the value and use an
5559 overlapping mvc for the block. */
5560 emit_move_insn (adjust_address (dst, QImode, 0), val);
5561 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5562 }
5563 s390_load_address (dst_addr,
5564 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5565
5566 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5567 OPTAB_DIRECT);
5568 if (temp != blocks)
5569 emit_move_insn (blocks, temp);
5570
5571 emit_cmp_and_jump_insns (blocks, const0_rtx,
5572 NE, NULL_RTX, mode, 1, loop_start_label);
5573
5574 emit_label (restbyte_end_label);
5575
5576 if (val == const0_rtx)
5577 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5578 else
5579 {
5580 /* Set the first byte in the block to the value and use an
5581 overlapping mvc for the block. */
5582 emit_move_insn (adjust_address (dst, QImode, 0), val);
5583 /* execute only uses the lowest 8 bits of count that's
5584 exactly what we need here. */
5585 emit_insn (gen_movmem_short (dstp1, dst,
5586 convert_to_mode (Pmode, count, 1)));
5587 }
5588
5589 emit_label (zerobyte_end_label);
5590 }
5591 }
5592
5593 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5594 and return the result in TARGET. */
5595
5596 bool
5597 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5598 {
5599 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5600 rtx tmp;
5601
5602 /* When tuning for z10 or higher we rely on the Glibc functions to
5603 do the right thing. Only for constant lengths below 64k we will
5604 generate inline code. */
5605 if (s390_tune >= PROCESSOR_2097_Z10
5606 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5607 return false;
5608
5609 /* As the result of CMPINT is inverted compared to what we need,
5610 we have to swap the operands. */
5611 tmp = op0; op0 = op1; op1 = tmp;
5612
5613 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5614 {
5615 if (INTVAL (len) > 0)
5616 {
5617 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5618 emit_insn (gen_cmpint (target, ccreg));
5619 }
5620 else
5621 emit_move_insn (target, const0_rtx);
5622 }
5623 else if (TARGET_MVCLE)
5624 {
5625 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5626 emit_insn (gen_cmpint (target, ccreg));
5627 }
5628 else
5629 {
5630 rtx addr0, addr1, count, blocks, temp;
5631 rtx_code_label *loop_start_label = gen_label_rtx ();
5632 rtx_code_label *loop_end_label = gen_label_rtx ();
5633 rtx_code_label *end_label = gen_label_rtx ();
5634 machine_mode mode;
5635
5636 mode = GET_MODE (len);
5637 if (mode == VOIDmode)
5638 mode = Pmode;
5639
5640 addr0 = gen_reg_rtx (Pmode);
5641 addr1 = gen_reg_rtx (Pmode);
5642 count = gen_reg_rtx (mode);
5643 blocks = gen_reg_rtx (mode);
5644
5645 convert_move (count, len, 1);
5646 emit_cmp_and_jump_insns (count, const0_rtx,
5647 EQ, NULL_RTX, mode, 1, end_label);
5648
5649 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5650 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5651 op0 = change_address (op0, VOIDmode, addr0);
5652 op1 = change_address (op1, VOIDmode, addr1);
5653
5654 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5655 OPTAB_DIRECT);
5656 if (temp != count)
5657 emit_move_insn (count, temp);
5658
5659 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5660 OPTAB_DIRECT);
5661 if (temp != blocks)
5662 emit_move_insn (blocks, temp);
5663
5664 emit_cmp_and_jump_insns (blocks, const0_rtx,
5665 EQ, NULL_RTX, mode, 1, loop_end_label);
5666
5667 emit_label (loop_start_label);
5668
5669 if (TARGET_Z10
5670 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5671 {
5672 rtx prefetch;
5673
5674 /* Issue a read prefetch for the +2 cache line of operand 1. */
5675 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5676 const0_rtx, const0_rtx);
5677 emit_insn (prefetch);
5678 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5679
5680 /* Issue a read prefetch for the +2 cache line of operand 2. */
5681 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5682 const0_rtx, const0_rtx);
5683 emit_insn (prefetch);
5684 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5685 }
5686
5687 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5688 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5689 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5690 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5691 temp = gen_rtx_SET (pc_rtx, temp);
5692 emit_jump_insn (temp);
5693
5694 s390_load_address (addr0,
5695 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5696 s390_load_address (addr1,
5697 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5698
5699 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5700 OPTAB_DIRECT);
5701 if (temp != blocks)
5702 emit_move_insn (blocks, temp);
5703
5704 emit_cmp_and_jump_insns (blocks, const0_rtx,
5705 EQ, NULL_RTX, mode, 1, loop_end_label);
5706
5707 emit_jump (loop_start_label);
5708 emit_label (loop_end_label);
5709
5710 emit_insn (gen_cmpmem_short (op0, op1,
5711 convert_to_mode (Pmode, count, 1)));
5712 emit_label (end_label);
5713
5714 emit_insn (gen_cmpint (target, ccreg));
5715 }
5716 return true;
5717 }
5718
5719 /* Emit a conditional jump to LABEL for condition code mask MASK using
5720 comparsion operator COMPARISON. Return the emitted jump insn. */
5721
5722 static rtx_insn *
5723 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5724 {
5725 rtx temp;
5726
5727 gcc_assert (comparison == EQ || comparison == NE);
5728 gcc_assert (mask > 0 && mask < 15);
5729
5730 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5731 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5732 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5733 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5734 temp = gen_rtx_SET (pc_rtx, temp);
5735 return emit_jump_insn (temp);
5736 }
5737
5738 /* Emit the instructions to implement strlen of STRING and store the
5739 result in TARGET. The string has the known ALIGNMENT. This
5740 version uses vector instructions and is therefore not appropriate
5741 for targets prior to z13. */
5742
5743 void
5744 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5745 {
5746 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5747 rtx str_reg = gen_reg_rtx (V16QImode);
5748 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5749 rtx str_idx_reg = gen_reg_rtx (Pmode);
5750 rtx result_reg = gen_reg_rtx (V16QImode);
5751 rtx is_aligned_label = gen_label_rtx ();
5752 rtx into_loop_label = NULL_RTX;
5753 rtx loop_start_label = gen_label_rtx ();
5754 rtx temp;
5755 rtx len = gen_reg_rtx (QImode);
5756 rtx cond;
5757
5758 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5759 emit_move_insn (str_idx_reg, const0_rtx);
5760
5761 if (INTVAL (alignment) < 16)
5762 {
5763 /* Check whether the address happens to be aligned properly so
5764 jump directly to the aligned loop. */
5765 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5766 str_addr_base_reg, GEN_INT (15)),
5767 const0_rtx, EQ, NULL_RTX,
5768 Pmode, 1, is_aligned_label);
5769
5770 temp = gen_reg_rtx (Pmode);
5771 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5772 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5773 gcc_assert (REG_P (temp));
5774 highest_index_to_load_reg =
5775 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5776 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5777 gcc_assert (REG_P (highest_index_to_load_reg));
5778 emit_insn (gen_vllv16qi (str_reg,
5779 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5780 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5781
5782 into_loop_label = gen_label_rtx ();
5783 s390_emit_jump (into_loop_label, NULL_RTX);
5784 emit_barrier ();
5785 }
5786
5787 emit_label (is_aligned_label);
5788 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5789
5790 /* Reaching this point we are only performing 16 bytes aligned
5791 loads. */
5792 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5793
5794 emit_label (loop_start_label);
5795 LABEL_NUSES (loop_start_label) = 1;
5796
5797 /* Load 16 bytes of the string into VR. */
5798 emit_move_insn (str_reg,
5799 gen_rtx_MEM (V16QImode,
5800 gen_rtx_PLUS (Pmode, str_idx_reg,
5801 str_addr_base_reg)));
5802 if (into_loop_label != NULL_RTX)
5803 {
5804 emit_label (into_loop_label);
5805 LABEL_NUSES (into_loop_label) = 1;
5806 }
5807
5808 /* Increment string index by 16 bytes. */
5809 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5810 str_idx_reg, 1, OPTAB_DIRECT);
5811
5812 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5813 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5814
5815 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5816 REG_BR_PROB,
5817 profile_probability::very_likely ().to_reg_br_prob_note ());
5818 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5819
5820 /* If the string pointer wasn't aligned we have loaded less then 16
5821 bytes and the remaining bytes got filled with zeros (by vll).
5822 Now we have to check whether the resulting index lies within the
5823 bytes actually part of the string. */
5824
5825 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5826 highest_index_to_load_reg);
5827 s390_load_address (highest_index_to_load_reg,
5828 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5829 const1_rtx));
5830 if (TARGET_64BIT)
5831 emit_insn (gen_movdicc (str_idx_reg, cond,
5832 highest_index_to_load_reg, str_idx_reg));
5833 else
5834 emit_insn (gen_movsicc (str_idx_reg, cond,
5835 highest_index_to_load_reg, str_idx_reg));
5836
5837 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5838 profile_probability::very_unlikely ());
5839
5840 expand_binop (Pmode, add_optab, str_idx_reg,
5841 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5842 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5843 here. */
5844 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5845 convert_to_mode (Pmode, len, 1),
5846 target, 1, OPTAB_DIRECT);
5847 if (temp != target)
5848 emit_move_insn (target, temp);
5849 }
5850
5851 void
5852 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5853 {
5854 rtx temp = gen_reg_rtx (Pmode);
5855 rtx src_addr = XEXP (src, 0);
5856 rtx dst_addr = XEXP (dst, 0);
5857 rtx src_addr_reg = gen_reg_rtx (Pmode);
5858 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5859 rtx offset = gen_reg_rtx (Pmode);
5860 rtx vsrc = gen_reg_rtx (V16QImode);
5861 rtx vpos = gen_reg_rtx (V16QImode);
5862 rtx loadlen = gen_reg_rtx (SImode);
5863 rtx gpos_qi = gen_reg_rtx(QImode);
5864 rtx gpos = gen_reg_rtx (SImode);
5865 rtx done_label = gen_label_rtx ();
5866 rtx loop_label = gen_label_rtx ();
5867 rtx exit_label = gen_label_rtx ();
5868 rtx full_label = gen_label_rtx ();
5869
5870 /* Perform a quick check for string ending on the first up to 16
5871 bytes and exit early if successful. */
5872
5873 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5874 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5875 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5876 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5877 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5878 /* gpos is the byte index if a zero was found and 16 otherwise.
5879 So if it is lower than the loaded bytes we have a hit. */
5880 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5881 full_label);
5882 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5883
5884 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5885 1, OPTAB_DIRECT);
5886 emit_jump (exit_label);
5887 emit_barrier ();
5888
5889 emit_label (full_label);
5890 LABEL_NUSES (full_label) = 1;
5891
5892 /* Calculate `offset' so that src + offset points to the last byte
5893 before 16 byte alignment. */
5894
5895 /* temp = src_addr & 0xf */
5896 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5897 1, OPTAB_DIRECT);
5898
5899 /* offset = 0xf - temp */
5900 emit_move_insn (offset, GEN_INT (15));
5901 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5902 1, OPTAB_DIRECT);
5903
5904 /* Store `offset' bytes in the dstination string. The quick check
5905 has loaded at least `offset' bytes into vsrc. */
5906
5907 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5908
5909 /* Advance to the next byte to be loaded. */
5910 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5911 1, OPTAB_DIRECT);
5912
5913 /* Make sure the addresses are single regs which can be used as a
5914 base. */
5915 emit_move_insn (src_addr_reg, src_addr);
5916 emit_move_insn (dst_addr_reg, dst_addr);
5917
5918 /* MAIN LOOP */
5919
5920 emit_label (loop_label);
5921 LABEL_NUSES (loop_label) = 1;
5922
5923 emit_move_insn (vsrc,
5924 gen_rtx_MEM (V16QImode,
5925 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5926
5927 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5928 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5929 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5930 REG_BR_PROB, profile_probability::very_unlikely ()
5931 .to_reg_br_prob_note ());
5932
5933 emit_move_insn (gen_rtx_MEM (V16QImode,
5934 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5935 vsrc);
5936 /* offset += 16 */
5937 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5938 offset, 1, OPTAB_DIRECT);
5939
5940 emit_jump (loop_label);
5941 emit_barrier ();
5942
5943 /* REGULAR EXIT */
5944
5945 /* We are done. Add the offset of the zero character to the dst_addr
5946 pointer to get the result. */
5947
5948 emit_label (done_label);
5949 LABEL_NUSES (done_label) = 1;
5950
5951 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5952 1, OPTAB_DIRECT);
5953
5954 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5955 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5956
5957 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5958
5959 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5960 1, OPTAB_DIRECT);
5961
5962 /* EARLY EXIT */
5963
5964 emit_label (exit_label);
5965 LABEL_NUSES (exit_label) = 1;
5966 }
5967
5968
5969 /* Expand conditional increment or decrement using alc/slb instructions.
5970 Should generate code setting DST to either SRC or SRC + INCREMENT,
5971 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5972 Returns true if successful, false otherwise.
5973
5974 That makes it possible to implement some if-constructs without jumps e.g.:
5975 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5976 unsigned int a, b, c;
5977 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5978 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5979 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5980 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5981
5982 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5983 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5984 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5985 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5986 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5987
5988 bool
5989 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5990 rtx dst, rtx src, rtx increment)
5991 {
5992 machine_mode cmp_mode;
5993 machine_mode cc_mode;
5994 rtx op_res;
5995 rtx insn;
5996 rtvec p;
5997 int ret;
5998
5999 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6000 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6001 cmp_mode = SImode;
6002 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6003 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6004 cmp_mode = DImode;
6005 else
6006 return false;
6007
6008 /* Try ADD LOGICAL WITH CARRY. */
6009 if (increment == const1_rtx)
6010 {
6011 /* Determine CC mode to use. */
6012 if (cmp_code == EQ || cmp_code == NE)
6013 {
6014 if (cmp_op1 != const0_rtx)
6015 {
6016 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6017 NULL_RTX, 0, OPTAB_WIDEN);
6018 cmp_op1 = const0_rtx;
6019 }
6020
6021 cmp_code = cmp_code == EQ ? LEU : GTU;
6022 }
6023
6024 if (cmp_code == LTU || cmp_code == LEU)
6025 {
6026 rtx tem = cmp_op0;
6027 cmp_op0 = cmp_op1;
6028 cmp_op1 = tem;
6029 cmp_code = swap_condition (cmp_code);
6030 }
6031
6032 switch (cmp_code)
6033 {
6034 case GTU:
6035 cc_mode = CCUmode;
6036 break;
6037
6038 case GEU:
6039 cc_mode = CCL3mode;
6040 break;
6041
6042 default:
6043 return false;
6044 }
6045
6046 /* Emit comparison instruction pattern. */
6047 if (!register_operand (cmp_op0, cmp_mode))
6048 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6049
6050 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6051 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6052 /* We use insn_invalid_p here to add clobbers if required. */
6053 ret = insn_invalid_p (emit_insn (insn), false);
6054 gcc_assert (!ret);
6055
6056 /* Emit ALC instruction pattern. */
6057 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6058 gen_rtx_REG (cc_mode, CC_REGNUM),
6059 const0_rtx);
6060
6061 if (src != const0_rtx)
6062 {
6063 if (!register_operand (src, GET_MODE (dst)))
6064 src = force_reg (GET_MODE (dst), src);
6065
6066 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6067 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6068 }
6069
6070 p = rtvec_alloc (2);
6071 RTVEC_ELT (p, 0) =
6072 gen_rtx_SET (dst, op_res);
6073 RTVEC_ELT (p, 1) =
6074 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6075 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6076
6077 return true;
6078 }
6079
6080 /* Try SUBTRACT LOGICAL WITH BORROW. */
6081 if (increment == constm1_rtx)
6082 {
6083 /* Determine CC mode to use. */
6084 if (cmp_code == EQ || cmp_code == NE)
6085 {
6086 if (cmp_op1 != const0_rtx)
6087 {
6088 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6089 NULL_RTX, 0, OPTAB_WIDEN);
6090 cmp_op1 = const0_rtx;
6091 }
6092
6093 cmp_code = cmp_code == EQ ? LEU : GTU;
6094 }
6095
6096 if (cmp_code == GTU || cmp_code == GEU)
6097 {
6098 rtx tem = cmp_op0;
6099 cmp_op0 = cmp_op1;
6100 cmp_op1 = tem;
6101 cmp_code = swap_condition (cmp_code);
6102 }
6103
6104 switch (cmp_code)
6105 {
6106 case LEU:
6107 cc_mode = CCUmode;
6108 break;
6109
6110 case LTU:
6111 cc_mode = CCL3mode;
6112 break;
6113
6114 default:
6115 return false;
6116 }
6117
6118 /* Emit comparison instruction pattern. */
6119 if (!register_operand (cmp_op0, cmp_mode))
6120 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6121
6122 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6123 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6124 /* We use insn_invalid_p here to add clobbers if required. */
6125 ret = insn_invalid_p (emit_insn (insn), false);
6126 gcc_assert (!ret);
6127
6128 /* Emit SLB instruction pattern. */
6129 if (!register_operand (src, GET_MODE (dst)))
6130 src = force_reg (GET_MODE (dst), src);
6131
6132 op_res = gen_rtx_MINUS (GET_MODE (dst),
6133 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6134 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6135 gen_rtx_REG (cc_mode, CC_REGNUM),
6136 const0_rtx));
6137 p = rtvec_alloc (2);
6138 RTVEC_ELT (p, 0) =
6139 gen_rtx_SET (dst, op_res);
6140 RTVEC_ELT (p, 1) =
6141 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6142 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6143
6144 return true;
6145 }
6146
6147 return false;
6148 }
6149
6150 /* Expand code for the insv template. Return true if successful. */
6151
6152 bool
6153 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6154 {
6155 int bitsize = INTVAL (op1);
6156 int bitpos = INTVAL (op2);
6157 machine_mode mode = GET_MODE (dest);
6158 machine_mode smode;
6159 int smode_bsize, mode_bsize;
6160 rtx op, clobber;
6161
6162 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6163 return false;
6164
6165 /* Generate INSERT IMMEDIATE (IILL et al). */
6166 /* (set (ze (reg)) (const_int)). */
6167 if (TARGET_ZARCH
6168 && register_operand (dest, word_mode)
6169 && (bitpos % 16) == 0
6170 && (bitsize % 16) == 0
6171 && const_int_operand (src, VOIDmode))
6172 {
6173 HOST_WIDE_INT val = INTVAL (src);
6174 int regpos = bitpos + bitsize;
6175
6176 while (regpos > bitpos)
6177 {
6178 machine_mode putmode;
6179 int putsize;
6180
6181 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6182 putmode = SImode;
6183 else
6184 putmode = HImode;
6185
6186 putsize = GET_MODE_BITSIZE (putmode);
6187 regpos -= putsize;
6188 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6189 GEN_INT (putsize),
6190 GEN_INT (regpos)),
6191 gen_int_mode (val, putmode));
6192 val >>= putsize;
6193 }
6194 gcc_assert (regpos == bitpos);
6195 return true;
6196 }
6197
6198 smode = smallest_int_mode_for_size (bitsize);
6199 smode_bsize = GET_MODE_BITSIZE (smode);
6200 mode_bsize = GET_MODE_BITSIZE (mode);
6201
6202 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6203 if (bitpos == 0
6204 && (bitsize % BITS_PER_UNIT) == 0
6205 && MEM_P (dest)
6206 && (register_operand (src, word_mode)
6207 || const_int_operand (src, VOIDmode)))
6208 {
6209 /* Emit standard pattern if possible. */
6210 if (smode_bsize == bitsize)
6211 {
6212 emit_move_insn (adjust_address (dest, smode, 0),
6213 gen_lowpart (smode, src));
6214 return true;
6215 }
6216
6217 /* (set (ze (mem)) (const_int)). */
6218 else if (const_int_operand (src, VOIDmode))
6219 {
6220 int size = bitsize / BITS_PER_UNIT;
6221 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6222 BLKmode,
6223 UNITS_PER_WORD - size);
6224
6225 dest = adjust_address (dest, BLKmode, 0);
6226 set_mem_size (dest, size);
6227 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6228 return true;
6229 }
6230
6231 /* (set (ze (mem)) (reg)). */
6232 else if (register_operand (src, word_mode))
6233 {
6234 if (bitsize <= 32)
6235 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6236 const0_rtx), src);
6237 else
6238 {
6239 /* Emit st,stcmh sequence. */
6240 int stcmh_width = bitsize - 32;
6241 int size = stcmh_width / BITS_PER_UNIT;
6242
6243 emit_move_insn (adjust_address (dest, SImode, size),
6244 gen_lowpart (SImode, src));
6245 set_mem_size (dest, size);
6246 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6247 GEN_INT (stcmh_width),
6248 const0_rtx),
6249 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6250 }
6251 return true;
6252 }
6253 }
6254
6255 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6256 if ((bitpos % BITS_PER_UNIT) == 0
6257 && (bitsize % BITS_PER_UNIT) == 0
6258 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6259 && MEM_P (src)
6260 && (mode == DImode || mode == SImode)
6261 && register_operand (dest, mode))
6262 {
6263 /* Emit a strict_low_part pattern if possible. */
6264 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6265 {
6266 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6267 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6268 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6269 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6270 return true;
6271 }
6272
6273 /* ??? There are more powerful versions of ICM that are not
6274 completely represented in the md file. */
6275 }
6276
6277 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6278 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6279 {
6280 machine_mode mode_s = GET_MODE (src);
6281
6282 if (CONSTANT_P (src))
6283 {
6284 /* For constant zero values the representation with AND
6285 appears to be folded in more situations than the (set
6286 (zero_extract) ...).
6287 We only do this when the start and end of the bitfield
6288 remain in the same SImode chunk. That way nihf or nilf
6289 can be used.
6290 The AND patterns might still generate a risbg for this. */
6291 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6292 return false;
6293 else
6294 src = force_reg (mode, src);
6295 }
6296 else if (mode_s != mode)
6297 {
6298 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6299 src = force_reg (mode_s, src);
6300 src = gen_lowpart (mode, src);
6301 }
6302
6303 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6304 op = gen_rtx_SET (op, src);
6305
6306 if (!TARGET_ZEC12)
6307 {
6308 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6309 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6310 }
6311 emit_insn (op);
6312
6313 return true;
6314 }
6315
6316 return false;
6317 }
6318
6319 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6320 register that holds VAL of mode MODE shifted by COUNT bits. */
6321
6322 static inline rtx
6323 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6324 {
6325 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6326 NULL_RTX, 1, OPTAB_DIRECT);
6327 return expand_simple_binop (SImode, ASHIFT, val, count,
6328 NULL_RTX, 1, OPTAB_DIRECT);
6329 }
6330
6331 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6332 the result in TARGET. */
6333
6334 void
6335 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6336 rtx cmp_op1, rtx cmp_op2)
6337 {
6338 machine_mode mode = GET_MODE (target);
6339 bool neg_p = false, swap_p = false;
6340 rtx tmp;
6341
6342 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6343 {
6344 switch (cond)
6345 {
6346 /* NE a != b -> !(a == b) */
6347 case NE: cond = EQ; neg_p = true; break;
6348 /* UNGT a u> b -> !(b >= a) */
6349 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6350 /* UNGE a u>= b -> !(b > a) */
6351 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6352 /* LE: a <= b -> b >= a */
6353 case LE: cond = GE; swap_p = true; break;
6354 /* UNLE: a u<= b -> !(a > b) */
6355 case UNLE: cond = GT; neg_p = true; break;
6356 /* LT: a < b -> b > a */
6357 case LT: cond = GT; swap_p = true; break;
6358 /* UNLT: a u< b -> !(a >= b) */
6359 case UNLT: cond = GE; neg_p = true; break;
6360 case UNEQ:
6361 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6362 return;
6363 case LTGT:
6364 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6365 return;
6366 case ORDERED:
6367 emit_insn (gen_vec_ordered (target, cmp_op1, cmp_op2));
6368 return;
6369 case UNORDERED:
6370 emit_insn (gen_vec_unordered (target, cmp_op1, cmp_op2));
6371 return;
6372 default: break;
6373 }
6374 }
6375 else
6376 {
6377 switch (cond)
6378 {
6379 /* NE: a != b -> !(a == b) */
6380 case NE: cond = EQ; neg_p = true; break;
6381 /* GE: a >= b -> !(b > a) */
6382 case GE: cond = GT; neg_p = true; swap_p = true; break;
6383 /* GEU: a >= b -> !(b > a) */
6384 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6385 /* LE: a <= b -> !(a > b) */
6386 case LE: cond = GT; neg_p = true; break;
6387 /* LEU: a <= b -> !(a > b) */
6388 case LEU: cond = GTU; neg_p = true; break;
6389 /* LT: a < b -> b > a */
6390 case LT: cond = GT; swap_p = true; break;
6391 /* LTU: a < b -> b > a */
6392 case LTU: cond = GTU; swap_p = true; break;
6393 default: break;
6394 }
6395 }
6396
6397 if (swap_p)
6398 {
6399 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6400 }
6401
6402 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6403 mode,
6404 cmp_op1, cmp_op2)));
6405 if (neg_p)
6406 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6407 }
6408
6409 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6410 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6411 elements in CMP1 and CMP2 fulfill the comparison.
6412 This function is only used to emit patterns for the vx builtins and
6413 therefore only handles comparison codes required by the
6414 builtins. */
6415 void
6416 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6417 rtx cmp1, rtx cmp2, bool all_p)
6418 {
6419 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6420 rtx tmp_reg = gen_reg_rtx (SImode);
6421 bool swap_p = false;
6422
6423 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6424 {
6425 switch (code)
6426 {
6427 case EQ:
6428 case NE:
6429 cc_producer_mode = CCVEQmode;
6430 break;
6431 case GE:
6432 case LT:
6433 code = swap_condition (code);
6434 swap_p = true;
6435 /* fallthrough */
6436 case GT:
6437 case LE:
6438 cc_producer_mode = CCVIHmode;
6439 break;
6440 case GEU:
6441 case LTU:
6442 code = swap_condition (code);
6443 swap_p = true;
6444 /* fallthrough */
6445 case GTU:
6446 case LEU:
6447 cc_producer_mode = CCVIHUmode;
6448 break;
6449 default:
6450 gcc_unreachable ();
6451 }
6452
6453 scratch_mode = GET_MODE (cmp1);
6454 /* These codes represent inverted CC interpretations. Inverting
6455 an ALL CC mode results in an ANY CC mode and the other way
6456 around. Invert the all_p flag here to compensate for
6457 that. */
6458 if (code == NE || code == LE || code == LEU)
6459 all_p = !all_p;
6460
6461 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6462 }
6463 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6464 {
6465 bool inv_p = false;
6466
6467 switch (code)
6468 {
6469 case EQ: cc_producer_mode = CCVEQmode; break;
6470 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6471 case GT: cc_producer_mode = CCVFHmode; break;
6472 case GE: cc_producer_mode = CCVFHEmode; break;
6473 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6474 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6475 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6476 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6477 default: gcc_unreachable ();
6478 }
6479 scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
6480
6481 if (inv_p)
6482 all_p = !all_p;
6483
6484 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6485 }
6486 else
6487 gcc_unreachable ();
6488
6489 if (swap_p)
6490 {
6491 rtx tmp = cmp2;
6492 cmp2 = cmp1;
6493 cmp1 = tmp;
6494 }
6495
6496 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6497 gen_rtvec (2, gen_rtx_SET (
6498 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6499 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6500 gen_rtx_CLOBBER (VOIDmode,
6501 gen_rtx_SCRATCH (scratch_mode)))));
6502 emit_move_insn (target, const0_rtx);
6503 emit_move_insn (tmp_reg, const1_rtx);
6504
6505 emit_move_insn (target,
6506 gen_rtx_IF_THEN_ELSE (SImode,
6507 gen_rtx_fmt_ee (code, VOIDmode,
6508 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6509 const0_rtx),
6510 tmp_reg, target));
6511 }
6512
6513 /* Invert the comparison CODE applied to a CC mode. This is only safe
6514 if we know whether there result was created by a floating point
6515 compare or not. For the CCV modes this is encoded as part of the
6516 mode. */
6517 enum rtx_code
6518 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6519 {
6520 /* Reversal of FP compares takes care -- an ordered compare
6521 becomes an unordered compare and vice versa. */
6522 if (mode == CCVFALLmode || mode == CCVFANYmode)
6523 return reverse_condition_maybe_unordered (code);
6524 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6525 return reverse_condition (code);
6526 else
6527 gcc_unreachable ();
6528 }
6529
6530 /* Generate a vector comparison expression loading either elements of
6531 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6532 and CMP_OP2. */
6533
6534 void
6535 s390_expand_vcond (rtx target, rtx then, rtx els,
6536 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6537 {
6538 rtx tmp;
6539 machine_mode result_mode;
6540 rtx result_target;
6541
6542 machine_mode target_mode = GET_MODE (target);
6543 machine_mode cmp_mode = GET_MODE (cmp_op1);
6544 rtx op = (cond == LT) ? els : then;
6545
6546 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6547 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6548 for short and byte (x >> 15 and x >> 7 respectively). */
6549 if ((cond == LT || cond == GE)
6550 && target_mode == cmp_mode
6551 && cmp_op2 == CONST0_RTX (cmp_mode)
6552 && op == CONST0_RTX (target_mode)
6553 && s390_vector_mode_supported_p (target_mode)
6554 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6555 {
6556 rtx negop = (cond == LT) ? then : els;
6557
6558 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6559
6560 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6561 if (negop == CONST1_RTX (target_mode))
6562 {
6563 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6564 GEN_INT (shift), target,
6565 1, OPTAB_DIRECT);
6566 if (res != target)
6567 emit_move_insn (target, res);
6568 return;
6569 }
6570
6571 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6572 else if (all_ones_operand (negop, target_mode))
6573 {
6574 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6575 GEN_INT (shift), target,
6576 0, OPTAB_DIRECT);
6577 if (res != target)
6578 emit_move_insn (target, res);
6579 return;
6580 }
6581 }
6582
6583 /* We always use an integral type vector to hold the comparison
6584 result. */
6585 result_mode = mode_for_int_vector (cmp_mode).require ();
6586 result_target = gen_reg_rtx (result_mode);
6587
6588 /* We allow vector immediates as comparison operands that
6589 can be handled by the optimization above but not by the
6590 following code. Hence, force them into registers here. */
6591 if (!REG_P (cmp_op1))
6592 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6593
6594 if (!REG_P (cmp_op2))
6595 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6596
6597 s390_expand_vec_compare (result_target, cond,
6598 cmp_op1, cmp_op2);
6599
6600 /* If the results are supposed to be either -1 or 0 we are done
6601 since this is what our compare instructions generate anyway. */
6602 if (all_ones_operand (then, GET_MODE (then))
6603 && const0_operand (els, GET_MODE (els)))
6604 {
6605 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6606 result_target, 0));
6607 return;
6608 }
6609
6610 /* Otherwise we will do a vsel afterwards. */
6611 /* This gets triggered e.g.
6612 with gcc.c-torture/compile/pr53410-1.c */
6613 if (!REG_P (then))
6614 then = force_reg (target_mode, then);
6615
6616 if (!REG_P (els))
6617 els = force_reg (target_mode, els);
6618
6619 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6620 result_target,
6621 CONST0_RTX (result_mode));
6622
6623 /* We compared the result against zero above so we have to swap then
6624 and els here. */
6625 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6626
6627 gcc_assert (target_mode == GET_MODE (then));
6628 emit_insn (gen_rtx_SET (target, tmp));
6629 }
6630
6631 /* Emit the RTX necessary to initialize the vector TARGET with values
6632 in VALS. */
6633 void
6634 s390_expand_vec_init (rtx target, rtx vals)
6635 {
6636 machine_mode mode = GET_MODE (target);
6637 machine_mode inner_mode = GET_MODE_INNER (mode);
6638 int n_elts = GET_MODE_NUNITS (mode);
6639 bool all_same = true, all_regs = true, all_const_int = true;
6640 rtx x;
6641 int i;
6642
6643 for (i = 0; i < n_elts; ++i)
6644 {
6645 x = XVECEXP (vals, 0, i);
6646
6647 if (!CONST_INT_P (x))
6648 all_const_int = false;
6649
6650 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6651 all_same = false;
6652
6653 if (!REG_P (x))
6654 all_regs = false;
6655 }
6656
6657 /* Use vector gen mask or vector gen byte mask if possible. */
6658 if (all_same && all_const_int
6659 && (XVECEXP (vals, 0, 0) == const0_rtx
6660 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6661 NULL, NULL)
6662 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6663 {
6664 emit_insn (gen_rtx_SET (target,
6665 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6666 return;
6667 }
6668
6669 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6670 if (all_same)
6671 {
6672 rtx elem = XVECEXP (vals, 0, 0);
6673
6674 /* vec_splats accepts general_operand as source. */
6675 if (!general_operand (elem, GET_MODE (elem)))
6676 elem = force_reg (inner_mode, elem);
6677
6678 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6679 return;
6680 }
6681
6682 if (all_regs
6683 && REG_P (target)
6684 && n_elts == 2
6685 && GET_MODE_SIZE (inner_mode) == 8)
6686 {
6687 /* Use vector load pair. */
6688 emit_insn (gen_rtx_SET (target,
6689 gen_rtx_VEC_CONCAT (mode,
6690 XVECEXP (vals, 0, 0),
6691 XVECEXP (vals, 0, 1))));
6692 return;
6693 }
6694
6695 /* Use vector load logical element and zero. */
6696 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6697 {
6698 bool found = true;
6699
6700 x = XVECEXP (vals, 0, 0);
6701 if (memory_operand (x, inner_mode))
6702 {
6703 for (i = 1; i < n_elts; ++i)
6704 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6705
6706 if (found)
6707 {
6708 machine_mode half_mode = (inner_mode == SFmode
6709 ? V2SFmode : V2SImode);
6710 emit_insn (gen_rtx_SET (target,
6711 gen_rtx_VEC_CONCAT (mode,
6712 gen_rtx_VEC_CONCAT (half_mode,
6713 x,
6714 const0_rtx),
6715 gen_rtx_VEC_CONCAT (half_mode,
6716 const0_rtx,
6717 const0_rtx))));
6718 return;
6719 }
6720 }
6721 }
6722
6723 /* We are about to set the vector elements one by one. Zero out the
6724 full register first in order to help the data flow framework to
6725 detect it as full VR set. */
6726 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6727
6728 /* Unfortunately the vec_init expander is not allowed to fail. So
6729 we have to implement the fallback ourselves. */
6730 for (i = 0; i < n_elts; i++)
6731 {
6732 rtx elem = XVECEXP (vals, 0, i);
6733 if (!general_operand (elem, GET_MODE (elem)))
6734 elem = force_reg (inner_mode, elem);
6735
6736 emit_insn (gen_rtx_SET (target,
6737 gen_rtx_UNSPEC (mode,
6738 gen_rtvec (3, elem,
6739 GEN_INT (i), target),
6740 UNSPEC_VEC_SET)));
6741 }
6742 }
6743
6744 /* Structure to hold the initial parameters for a compare_and_swap operation
6745 in HImode and QImode. */
6746
6747 struct alignment_context
6748 {
6749 rtx memsi; /* SI aligned memory location. */
6750 rtx shift; /* Bit offset with regard to lsb. */
6751 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6752 rtx modemaski; /* ~modemask */
6753 bool aligned; /* True if memory is aligned, false else. */
6754 };
6755
6756 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6757 structure AC for transparent simplifying, if the memory alignment is known
6758 to be at least 32bit. MEM is the memory location for the actual operation
6759 and MODE its mode. */
6760
6761 static void
6762 init_alignment_context (struct alignment_context *ac, rtx mem,
6763 machine_mode mode)
6764 {
6765 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6766 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6767
6768 if (ac->aligned)
6769 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6770 else
6771 {
6772 /* Alignment is unknown. */
6773 rtx byteoffset, addr, align;
6774
6775 /* Force the address into a register. */
6776 addr = force_reg (Pmode, XEXP (mem, 0));
6777
6778 /* Align it to SImode. */
6779 align = expand_simple_binop (Pmode, AND, addr,
6780 GEN_INT (-GET_MODE_SIZE (SImode)),
6781 NULL_RTX, 1, OPTAB_DIRECT);
6782 /* Generate MEM. */
6783 ac->memsi = gen_rtx_MEM (SImode, align);
6784 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6785 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6786 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6787
6788 /* Calculate shiftcount. */
6789 byteoffset = expand_simple_binop (Pmode, AND, addr,
6790 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6791 NULL_RTX, 1, OPTAB_DIRECT);
6792 /* As we already have some offset, evaluate the remaining distance. */
6793 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6794 NULL_RTX, 1, OPTAB_DIRECT);
6795 }
6796
6797 /* Shift is the byte count, but we need the bitcount. */
6798 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6799 NULL_RTX, 1, OPTAB_DIRECT);
6800
6801 /* Calculate masks. */
6802 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6803 GEN_INT (GET_MODE_MASK (mode)),
6804 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6805 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6806 NULL_RTX, 1);
6807 }
6808
6809 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6810 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6811 perform the merge in SEQ2. */
6812
6813 static rtx
6814 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6815 machine_mode mode, rtx val, rtx ins)
6816 {
6817 rtx tmp;
6818
6819 if (ac->aligned)
6820 {
6821 start_sequence ();
6822 tmp = copy_to_mode_reg (SImode, val);
6823 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6824 const0_rtx, ins))
6825 {
6826 *seq1 = NULL;
6827 *seq2 = get_insns ();
6828 end_sequence ();
6829 return tmp;
6830 }
6831 end_sequence ();
6832 }
6833
6834 /* Failed to use insv. Generate a two part shift and mask. */
6835 start_sequence ();
6836 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6837 *seq1 = get_insns ();
6838 end_sequence ();
6839
6840 start_sequence ();
6841 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6842 *seq2 = get_insns ();
6843 end_sequence ();
6844
6845 return tmp;
6846 }
6847
6848 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6849 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6850 value to set if CMP == MEM. */
6851
6852 static void
6853 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6854 rtx cmp, rtx new_rtx, bool is_weak)
6855 {
6856 struct alignment_context ac;
6857 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6858 rtx res = gen_reg_rtx (SImode);
6859 rtx_code_label *csloop = NULL, *csend = NULL;
6860
6861 gcc_assert (MEM_P (mem));
6862
6863 init_alignment_context (&ac, mem, mode);
6864
6865 /* Load full word. Subsequent loads are performed by CS. */
6866 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6867 NULL_RTX, 1, OPTAB_DIRECT);
6868
6869 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6870 possible, we try to use insv to make this happen efficiently. If
6871 that fails we'll generate code both inside and outside the loop. */
6872 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6873 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6874
6875 if (seq0)
6876 emit_insn (seq0);
6877 if (seq1)
6878 emit_insn (seq1);
6879
6880 /* Start CS loop. */
6881 if (!is_weak)
6882 {
6883 /* Begin assuming success. */
6884 emit_move_insn (btarget, const1_rtx);
6885
6886 csloop = gen_label_rtx ();
6887 csend = gen_label_rtx ();
6888 emit_label (csloop);
6889 }
6890
6891 /* val = "<mem>00..0<mem>"
6892 * cmp = "00..0<cmp>00..0"
6893 * new = "00..0<new>00..0"
6894 */
6895
6896 emit_insn (seq2);
6897 emit_insn (seq3);
6898
6899 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6900 if (is_weak)
6901 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6902 else
6903 {
6904 rtx tmp;
6905
6906 /* Jump to end if we're done (likely?). */
6907 s390_emit_jump (csend, cc);
6908
6909 /* Check for changes outside mode, and loop internal if so.
6910 Arrange the moves so that the compare is adjacent to the
6911 branch so that we can generate CRJ. */
6912 tmp = copy_to_reg (val);
6913 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6914 1, OPTAB_DIRECT);
6915 cc = s390_emit_compare (NE, val, tmp);
6916 s390_emit_jump (csloop, cc);
6917
6918 /* Failed. */
6919 emit_move_insn (btarget, const0_rtx);
6920 emit_label (csend);
6921 }
6922
6923 /* Return the correct part of the bitfield. */
6924 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6925 NULL_RTX, 1, OPTAB_DIRECT), 1);
6926 }
6927
6928 /* Variant of s390_expand_cs for SI, DI and TI modes. */
6929 static void
6930 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6931 rtx cmp, rtx new_rtx, bool is_weak)
6932 {
6933 rtx output = vtarget;
6934 rtx_code_label *skip_cs_label = NULL;
6935 bool do_const_opt = false;
6936
6937 if (!register_operand (output, mode))
6938 output = gen_reg_rtx (mode);
6939
6940 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
6941 with the constant first and skip the compare_and_swap because its very
6942 expensive and likely to fail anyway.
6943 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
6944 cause spurious in that case.
6945 Note 2: It may be useful to do this also for non-constant INPUT.
6946 Note 3: Currently only targets with "load on condition" are supported
6947 (z196 and newer). */
6948
6949 if (TARGET_Z196
6950 && (mode == SImode || mode == DImode))
6951 do_const_opt = (is_weak && CONST_INT_P (cmp));
6952
6953 if (do_const_opt)
6954 {
6955 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6956
6957 skip_cs_label = gen_label_rtx ();
6958 emit_move_insn (btarget, const0_rtx);
6959 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
6960 {
6961 rtvec lt = rtvec_alloc (2);
6962
6963 /* Load-and-test + conditional jump. */
6964 RTVEC_ELT (lt, 0)
6965 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
6966 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
6967 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
6968 }
6969 else
6970 {
6971 emit_move_insn (output, mem);
6972 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
6973 }
6974 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
6975 add_reg_br_prob_note (get_last_insn (),
6976 profile_probability::very_unlikely ());
6977 /* If the jump is not taken, OUTPUT is the expected value. */
6978 cmp = output;
6979 /* Reload newval to a register manually, *after* the compare and jump
6980 above. Otherwise Reload might place it before the jump. */
6981 }
6982 else
6983 cmp = force_reg (mode, cmp);
6984 new_rtx = force_reg (mode, new_rtx);
6985 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
6986 (do_const_opt) ? CCZmode : CCZ1mode);
6987 if (skip_cs_label != NULL)
6988 emit_label (skip_cs_label);
6989
6990 /* We deliberately accept non-register operands in the predicate
6991 to ensure the write back to the output operand happens *before*
6992 the store-flags code below. This makes it easier for combine
6993 to merge the store-flags code with a potential test-and-branch
6994 pattern following (immediately!) afterwards. */
6995 if (output != vtarget)
6996 emit_move_insn (vtarget, output);
6997
6998 if (do_const_opt)
6999 {
7000 rtx cc, cond, ite;
7001
7002 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7003 btarget has already been initialized with 0 above. */
7004 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7005 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7006 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7007 emit_insn (gen_rtx_SET (btarget, ite));
7008 }
7009 else
7010 {
7011 rtx cc, cond;
7012
7013 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7014 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7015 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7016 }
7017 }
7018
7019 /* Expand an atomic compare and swap operation. MEM is the memory location,
7020 CMP the old value to compare MEM with and NEW_RTX the value to set if
7021 CMP == MEM. */
7022
7023 void
7024 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7025 rtx cmp, rtx new_rtx, bool is_weak)
7026 {
7027 switch (mode)
7028 {
7029 case E_TImode:
7030 case E_DImode:
7031 case E_SImode:
7032 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7033 break;
7034 case E_HImode:
7035 case E_QImode:
7036 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7037 break;
7038 default:
7039 gcc_unreachable ();
7040 }
7041 }
7042
7043 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7044 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7045 of MEM. */
7046
7047 void
7048 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7049 {
7050 machine_mode mode = GET_MODE (mem);
7051 rtx_code_label *csloop;
7052
7053 if (TARGET_Z196
7054 && (mode == DImode || mode == SImode)
7055 && CONST_INT_P (input) && INTVAL (input) == 0)
7056 {
7057 emit_move_insn (output, const0_rtx);
7058 if (mode == DImode)
7059 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7060 else
7061 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7062 return;
7063 }
7064
7065 input = force_reg (mode, input);
7066 emit_move_insn (output, mem);
7067 csloop = gen_label_rtx ();
7068 emit_label (csloop);
7069 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7070 input, CCZ1mode));
7071 }
7072
7073 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7074 and VAL the value to play with. If AFTER is true then store the value
7075 MEM holds after the operation, if AFTER is false then store the value MEM
7076 holds before the operation. If TARGET is zero then discard that value, else
7077 store it to TARGET. */
7078
7079 void
7080 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7081 rtx target, rtx mem, rtx val, bool after)
7082 {
7083 struct alignment_context ac;
7084 rtx cmp;
7085 rtx new_rtx = gen_reg_rtx (SImode);
7086 rtx orig = gen_reg_rtx (SImode);
7087 rtx_code_label *csloop = gen_label_rtx ();
7088
7089 gcc_assert (!target || register_operand (target, VOIDmode));
7090 gcc_assert (MEM_P (mem));
7091
7092 init_alignment_context (&ac, mem, mode);
7093
7094 /* Shift val to the correct bit positions.
7095 Preserve "icm", but prevent "ex icm". */
7096 if (!(ac.aligned && code == SET && MEM_P (val)))
7097 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7098
7099 /* Further preparation insns. */
7100 if (code == PLUS || code == MINUS)
7101 emit_move_insn (orig, val);
7102 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7103 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7104 NULL_RTX, 1, OPTAB_DIRECT);
7105
7106 /* Load full word. Subsequent loads are performed by CS. */
7107 cmp = force_reg (SImode, ac.memsi);
7108
7109 /* Start CS loop. */
7110 emit_label (csloop);
7111 emit_move_insn (new_rtx, cmp);
7112
7113 /* Patch new with val at correct position. */
7114 switch (code)
7115 {
7116 case PLUS:
7117 case MINUS:
7118 val = expand_simple_binop (SImode, code, new_rtx, orig,
7119 NULL_RTX, 1, OPTAB_DIRECT);
7120 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7121 NULL_RTX, 1, OPTAB_DIRECT);
7122 /* FALLTHRU */
7123 case SET:
7124 if (ac.aligned && MEM_P (val))
7125 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7126 0, 0, SImode, val, false);
7127 else
7128 {
7129 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7130 NULL_RTX, 1, OPTAB_DIRECT);
7131 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7132 NULL_RTX, 1, OPTAB_DIRECT);
7133 }
7134 break;
7135 case AND:
7136 case IOR:
7137 case XOR:
7138 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7139 NULL_RTX, 1, OPTAB_DIRECT);
7140 break;
7141 case MULT: /* NAND */
7142 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7143 NULL_RTX, 1, OPTAB_DIRECT);
7144 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7145 NULL_RTX, 1, OPTAB_DIRECT);
7146 break;
7147 default:
7148 gcc_unreachable ();
7149 }
7150
7151 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7152 ac.memsi, cmp, new_rtx,
7153 CCZ1mode));
7154
7155 /* Return the correct part of the bitfield. */
7156 if (target)
7157 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7158 after ? new_rtx : cmp, ac.shift,
7159 NULL_RTX, 1, OPTAB_DIRECT), 1);
7160 }
7161
7162 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7163 We need to emit DTP-relative relocations. */
7164
7165 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7166
7167 static void
7168 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7169 {
7170 switch (size)
7171 {
7172 case 4:
7173 fputs ("\t.long\t", file);
7174 break;
7175 case 8:
7176 fputs ("\t.quad\t", file);
7177 break;
7178 default:
7179 gcc_unreachable ();
7180 }
7181 output_addr_const (file, x);
7182 fputs ("@DTPOFF", file);
7183 }
7184
7185 /* Return the proper mode for REGNO being represented in the dwarf
7186 unwind table. */
7187 machine_mode
7188 s390_dwarf_frame_reg_mode (int regno)
7189 {
7190 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7191
7192 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7193 if (GENERAL_REGNO_P (regno))
7194 save_mode = Pmode;
7195
7196 /* The rightmost 64 bits of vector registers are call-clobbered. */
7197 if (GET_MODE_SIZE (save_mode) > 8)
7198 save_mode = DImode;
7199
7200 return save_mode;
7201 }
7202
7203 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7204 /* Implement TARGET_MANGLE_TYPE. */
7205
7206 static const char *
7207 s390_mangle_type (const_tree type)
7208 {
7209 type = TYPE_MAIN_VARIANT (type);
7210
7211 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7212 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7213 return NULL;
7214
7215 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7216 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7217 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7218 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7219
7220 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7221 && TARGET_LONG_DOUBLE_128)
7222 return "g";
7223
7224 /* For all other types, use normal C++ mangling. */
7225 return NULL;
7226 }
7227 #endif
7228
7229 /* In the name of slightly smaller debug output, and to cater to
7230 general assembler lossage, recognize various UNSPEC sequences
7231 and turn them back into a direct symbol reference. */
7232
7233 static rtx
7234 s390_delegitimize_address (rtx orig_x)
7235 {
7236 rtx x, y;
7237
7238 orig_x = delegitimize_mem_from_attrs (orig_x);
7239 x = orig_x;
7240
7241 /* Extract the symbol ref from:
7242 (plus:SI (reg:SI 12 %r12)
7243 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7244 UNSPEC_GOTOFF/PLTOFF)))
7245 and
7246 (plus:SI (reg:SI 12 %r12)
7247 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7248 UNSPEC_GOTOFF/PLTOFF)
7249 (const_int 4 [0x4])))) */
7250 if (GET_CODE (x) == PLUS
7251 && REG_P (XEXP (x, 0))
7252 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7253 && GET_CODE (XEXP (x, 1)) == CONST)
7254 {
7255 HOST_WIDE_INT offset = 0;
7256
7257 /* The const operand. */
7258 y = XEXP (XEXP (x, 1), 0);
7259
7260 if (GET_CODE (y) == PLUS
7261 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7262 {
7263 offset = INTVAL (XEXP (y, 1));
7264 y = XEXP (y, 0);
7265 }
7266
7267 if (GET_CODE (y) == UNSPEC
7268 && (XINT (y, 1) == UNSPEC_GOTOFF
7269 || XINT (y, 1) == UNSPEC_PLTOFF))
7270 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7271 }
7272
7273 if (GET_CODE (x) != MEM)
7274 return orig_x;
7275
7276 x = XEXP (x, 0);
7277 if (GET_CODE (x) == PLUS
7278 && GET_CODE (XEXP (x, 1)) == CONST
7279 && GET_CODE (XEXP (x, 0)) == REG
7280 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7281 {
7282 y = XEXP (XEXP (x, 1), 0);
7283 if (GET_CODE (y) == UNSPEC
7284 && XINT (y, 1) == UNSPEC_GOT)
7285 y = XVECEXP (y, 0, 0);
7286 else
7287 return orig_x;
7288 }
7289 else if (GET_CODE (x) == CONST)
7290 {
7291 /* Extract the symbol ref from:
7292 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7293 UNSPEC_PLT/GOTENT))) */
7294
7295 y = XEXP (x, 0);
7296 if (GET_CODE (y) == UNSPEC
7297 && (XINT (y, 1) == UNSPEC_GOTENT
7298 || XINT (y, 1) == UNSPEC_PLT))
7299 y = XVECEXP (y, 0, 0);
7300 else
7301 return orig_x;
7302 }
7303 else
7304 return orig_x;
7305
7306 if (GET_MODE (orig_x) != Pmode)
7307 {
7308 if (GET_MODE (orig_x) == BLKmode)
7309 return orig_x;
7310 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7311 if (y == NULL_RTX)
7312 return orig_x;
7313 }
7314 return y;
7315 }
7316
7317 /* Output operand OP to stdio stream FILE.
7318 OP is an address (register + offset) which is not used to address data;
7319 instead the rightmost bits are interpreted as the value. */
7320
7321 static void
7322 print_addrstyle_operand (FILE *file, rtx op)
7323 {
7324 HOST_WIDE_INT offset;
7325 rtx base;
7326
7327 /* Extract base register and offset. */
7328 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7329 gcc_unreachable ();
7330
7331 /* Sanity check. */
7332 if (base)
7333 {
7334 gcc_assert (GET_CODE (base) == REG);
7335 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7336 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7337 }
7338
7339 /* Offsets are constricted to twelve bits. */
7340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7341 if (base)
7342 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7343 }
7344
7345 /* Assigns the number of NOP halfwords to be emitted before and after the
7346 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7347 If hotpatching is disabled for the function, the values are set to zero.
7348 */
7349
7350 static void
7351 s390_function_num_hotpatch_hw (tree decl,
7352 int *hw_before,
7353 int *hw_after)
7354 {
7355 tree attr;
7356
7357 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7358
7359 /* Handle the arguments of the hotpatch attribute. The values
7360 specified via attribute might override the cmdline argument
7361 values. */
7362 if (attr)
7363 {
7364 tree args = TREE_VALUE (attr);
7365
7366 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7367 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7368 }
7369 else
7370 {
7371 /* Use the values specified by the cmdline arguments. */
7372 *hw_before = s390_hotpatch_hw_before_label;
7373 *hw_after = s390_hotpatch_hw_after_label;
7374 }
7375 }
7376
7377 /* Write the current .machine and .machinemode specification to the assembler
7378 file. */
7379
7380 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7381 static void
7382 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7383 {
7384 fprintf (asm_out_file, "\t.machinemode %s\n",
7385 (TARGET_ZARCH) ? "zarch" : "esa");
7386 fprintf (asm_out_file, "\t.machine \"%s",
7387 processor_table[s390_arch].binutils_name);
7388 if (S390_USE_ARCHITECTURE_MODIFIERS)
7389 {
7390 int cpu_flags;
7391
7392 cpu_flags = processor_flags_table[(int) s390_arch];
7393 if (TARGET_HTM && !(cpu_flags & PF_TX))
7394 fprintf (asm_out_file, "+htm");
7395 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7396 fprintf (asm_out_file, "+nohtm");
7397 if (TARGET_VX && !(cpu_flags & PF_VX))
7398 fprintf (asm_out_file, "+vx");
7399 else if (!TARGET_VX && (cpu_flags & PF_VX))
7400 fprintf (asm_out_file, "+novx");
7401 }
7402 fprintf (asm_out_file, "\"\n");
7403 }
7404
7405 /* Write an extra function header before the very start of the function. */
7406
7407 void
7408 s390_asm_output_function_prefix (FILE *asm_out_file,
7409 const char *fnname ATTRIBUTE_UNUSED)
7410 {
7411 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7412 return;
7413 /* Since only the function specific options are saved but not the indications
7414 which options are set, it's too much work here to figure out which options
7415 have actually changed. Thus, generate .machine and .machinemode whenever a
7416 function has the target attribute or pragma. */
7417 fprintf (asm_out_file, "\t.machinemode push\n");
7418 fprintf (asm_out_file, "\t.machine push\n");
7419 s390_asm_output_machine_for_arch (asm_out_file);
7420 }
7421
7422 /* Write an extra function footer after the very end of the function. */
7423
7424 void
7425 s390_asm_declare_function_size (FILE *asm_out_file,
7426 const char *fnname, tree decl)
7427 {
7428 if (!flag_inhibit_size_directive)
7429 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7430 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7431 return;
7432 fprintf (asm_out_file, "\t.machine pop\n");
7433 fprintf (asm_out_file, "\t.machinemode pop\n");
7434 }
7435 #endif
7436
7437 /* Write the extra assembler code needed to declare a function properly. */
7438
7439 void
7440 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7441 tree decl)
7442 {
7443 int hw_before, hw_after;
7444
7445 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7446 if (hw_before > 0)
7447 {
7448 unsigned int function_alignment;
7449 int i;
7450
7451 /* Add a trampoline code area before the function label and initialize it
7452 with two-byte nop instructions. This area can be overwritten with code
7453 that jumps to a patched version of the function. */
7454 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7455 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7456 hw_before);
7457 for (i = 1; i < hw_before; i++)
7458 fputs ("\tnopr\t%r0\n", asm_out_file);
7459
7460 /* Note: The function label must be aligned so that (a) the bytes of the
7461 following nop do not cross a cacheline boundary, and (b) a jump address
7462 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7463 stored directly before the label without crossing a cacheline
7464 boundary. All this is necessary to make sure the trampoline code can
7465 be changed atomically.
7466 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7467 if there are NOPs before the function label, the alignment is placed
7468 before them. So it is necessary to duplicate the alignment after the
7469 NOPs. */
7470 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7471 if (! DECL_USER_ALIGN (decl))
7472 function_alignment
7473 = MAX (function_alignment,
7474 (unsigned int) align_functions.levels[0].get_value ());
7475 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7476 ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7477 }
7478
7479 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7480 {
7481 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7482 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7483 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7484 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7485 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7486 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7487 s390_warn_framesize);
7488 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7489 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7490 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7491 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7492 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7493 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7494 TARGET_PACKED_STACK);
7495 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7496 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7497 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7498 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7499 s390_warn_dynamicstack_p);
7500 }
7501 ASM_OUTPUT_LABEL (asm_out_file, fname);
7502 if (hw_after > 0)
7503 asm_fprintf (asm_out_file,
7504 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7505 hw_after);
7506 }
7507
7508 /* Output machine-dependent UNSPECs occurring in address constant X
7509 in assembler syntax to stdio stream FILE. Returns true if the
7510 constant X could be recognized, false otherwise. */
7511
7512 static bool
7513 s390_output_addr_const_extra (FILE *file, rtx x)
7514 {
7515 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7516 switch (XINT (x, 1))
7517 {
7518 case UNSPEC_GOTENT:
7519 output_addr_const (file, XVECEXP (x, 0, 0));
7520 fprintf (file, "@GOTENT");
7521 return true;
7522 case UNSPEC_GOT:
7523 output_addr_const (file, XVECEXP (x, 0, 0));
7524 fprintf (file, "@GOT");
7525 return true;
7526 case UNSPEC_GOTOFF:
7527 output_addr_const (file, XVECEXP (x, 0, 0));
7528 fprintf (file, "@GOTOFF");
7529 return true;
7530 case UNSPEC_PLT:
7531 output_addr_const (file, XVECEXP (x, 0, 0));
7532 fprintf (file, "@PLT");
7533 return true;
7534 case UNSPEC_PLTOFF:
7535 output_addr_const (file, XVECEXP (x, 0, 0));
7536 fprintf (file, "@PLTOFF");
7537 return true;
7538 case UNSPEC_TLSGD:
7539 output_addr_const (file, XVECEXP (x, 0, 0));
7540 fprintf (file, "@TLSGD");
7541 return true;
7542 case UNSPEC_TLSLDM:
7543 assemble_name (file, get_some_local_dynamic_name ());
7544 fprintf (file, "@TLSLDM");
7545 return true;
7546 case UNSPEC_DTPOFF:
7547 output_addr_const (file, XVECEXP (x, 0, 0));
7548 fprintf (file, "@DTPOFF");
7549 return true;
7550 case UNSPEC_NTPOFF:
7551 output_addr_const (file, XVECEXP (x, 0, 0));
7552 fprintf (file, "@NTPOFF");
7553 return true;
7554 case UNSPEC_GOTNTPOFF:
7555 output_addr_const (file, XVECEXP (x, 0, 0));
7556 fprintf (file, "@GOTNTPOFF");
7557 return true;
7558 case UNSPEC_INDNTPOFF:
7559 output_addr_const (file, XVECEXP (x, 0, 0));
7560 fprintf (file, "@INDNTPOFF");
7561 return true;
7562 }
7563
7564 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7565 switch (XINT (x, 1))
7566 {
7567 case UNSPEC_POOL_OFFSET:
7568 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7569 output_addr_const (file, x);
7570 return true;
7571 }
7572 return false;
7573 }
7574
7575 /* Output address operand ADDR in assembler syntax to
7576 stdio stream FILE. */
7577
7578 void
7579 print_operand_address (FILE *file, rtx addr)
7580 {
7581 struct s390_address ad;
7582 memset (&ad, 0, sizeof (s390_address));
7583
7584 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7585 {
7586 if (!TARGET_Z10)
7587 {
7588 output_operand_lossage ("symbolic memory references are "
7589 "only supported on z10 or later");
7590 return;
7591 }
7592 output_addr_const (file, addr);
7593 return;
7594 }
7595
7596 if (!s390_decompose_address (addr, &ad)
7597 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7598 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7599 output_operand_lossage ("cannot decompose address");
7600
7601 if (ad.disp)
7602 output_addr_const (file, ad.disp);
7603 else
7604 fprintf (file, "0");
7605
7606 if (ad.base && ad.indx)
7607 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7608 reg_names[REGNO (ad.base)]);
7609 else if (ad.base)
7610 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7611 }
7612
7613 /* Output operand X in assembler syntax to stdio stream FILE.
7614 CODE specified the format flag. The following format flags
7615 are recognized:
7616
7617 'A': On z14 or higher: If operand is a mem print the alignment
7618 hint usable with vl/vst prefixed by a comma.
7619 'C': print opcode suffix for branch condition.
7620 'D': print opcode suffix for inverse branch condition.
7621 'E': print opcode suffix for branch on index instruction.
7622 'G': print the size of the operand in bytes.
7623 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7624 'M': print the second word of a TImode operand.
7625 'N': print the second word of a DImode operand.
7626 'O': print only the displacement of a memory reference or address.
7627 'R': print only the base register of a memory reference or address.
7628 'S': print S-type memory reference (base+displacement).
7629 'Y': print address style operand without index (e.g. shift count or setmem
7630 operand).
7631
7632 'b': print integer X as if it's an unsigned byte.
7633 'c': print integer X as if it's an signed byte.
7634 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7635 'f': "end" contiguous bitmask X in SImode.
7636 'h': print integer X as if it's a signed halfword.
7637 'i': print the first nonzero HImode part of X.
7638 'j': print the first HImode part unequal to -1 of X.
7639 'k': print the first nonzero SImode part of X.
7640 'm': print the first SImode part unequal to -1 of X.
7641 'o': print integer X as if it's an unsigned 32bit word.
7642 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7643 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7644 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7645 'x': print integer X as if it's an unsigned halfword.
7646 'v': print register number as vector register (v1 instead of f1).
7647 */
7648
7649 void
7650 print_operand (FILE *file, rtx x, int code)
7651 {
7652 HOST_WIDE_INT ival;
7653
7654 switch (code)
7655 {
7656 case 'A':
7657 #ifdef HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS
7658 if (TARGET_Z14 && MEM_P (x))
7659 {
7660 if (MEM_ALIGN (x) >= 128)
7661 fprintf (file, ",4");
7662 else if (MEM_ALIGN (x) == 64)
7663 fprintf (file, ",3");
7664 }
7665 #endif
7666 return;
7667 case 'C':
7668 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7669 return;
7670
7671 case 'D':
7672 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7673 return;
7674
7675 case 'E':
7676 if (GET_CODE (x) == LE)
7677 fprintf (file, "l");
7678 else if (GET_CODE (x) == GT)
7679 fprintf (file, "h");
7680 else
7681 output_operand_lossage ("invalid comparison operator "
7682 "for 'E' output modifier");
7683 return;
7684
7685 case 'J':
7686 if (GET_CODE (x) == SYMBOL_REF)
7687 {
7688 fprintf (file, "%s", ":tls_load:");
7689 output_addr_const (file, x);
7690 }
7691 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7692 {
7693 fprintf (file, "%s", ":tls_gdcall:");
7694 output_addr_const (file, XVECEXP (x, 0, 0));
7695 }
7696 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7697 {
7698 fprintf (file, "%s", ":tls_ldcall:");
7699 const char *name = get_some_local_dynamic_name ();
7700 gcc_assert (name);
7701 assemble_name (file, name);
7702 }
7703 else
7704 output_operand_lossage ("invalid reference for 'J' output modifier");
7705 return;
7706
7707 case 'G':
7708 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7709 return;
7710
7711 case 'O':
7712 {
7713 struct s390_address ad;
7714 int ret;
7715
7716 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7717
7718 if (!ret
7719 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7720 || ad.indx)
7721 {
7722 output_operand_lossage ("invalid address for 'O' output modifier");
7723 return;
7724 }
7725
7726 if (ad.disp)
7727 output_addr_const (file, ad.disp);
7728 else
7729 fprintf (file, "0");
7730 }
7731 return;
7732
7733 case 'R':
7734 {
7735 struct s390_address ad;
7736 int ret;
7737
7738 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7739
7740 if (!ret
7741 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7742 || ad.indx)
7743 {
7744 output_operand_lossage ("invalid address for 'R' output modifier");
7745 return;
7746 }
7747
7748 if (ad.base)
7749 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7750 else
7751 fprintf (file, "0");
7752 }
7753 return;
7754
7755 case 'S':
7756 {
7757 struct s390_address ad;
7758 int ret;
7759
7760 if (!MEM_P (x))
7761 {
7762 output_operand_lossage ("memory reference expected for "
7763 "'S' output modifier");
7764 return;
7765 }
7766 ret = s390_decompose_address (XEXP (x, 0), &ad);
7767
7768 if (!ret
7769 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7770 || ad.indx)
7771 {
7772 output_operand_lossage ("invalid address for 'S' output modifier");
7773 return;
7774 }
7775
7776 if (ad.disp)
7777 output_addr_const (file, ad.disp);
7778 else
7779 fprintf (file, "0");
7780
7781 if (ad.base)
7782 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7783 }
7784 return;
7785
7786 case 'N':
7787 if (GET_CODE (x) == REG)
7788 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7789 else if (GET_CODE (x) == MEM)
7790 x = change_address (x, VOIDmode,
7791 plus_constant (Pmode, XEXP (x, 0), 4));
7792 else
7793 output_operand_lossage ("register or memory expression expected "
7794 "for 'N' output modifier");
7795 break;
7796
7797 case 'M':
7798 if (GET_CODE (x) == REG)
7799 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7800 else if (GET_CODE (x) == MEM)
7801 x = change_address (x, VOIDmode,
7802 plus_constant (Pmode, XEXP (x, 0), 8));
7803 else
7804 output_operand_lossage ("register or memory expression expected "
7805 "for 'M' output modifier");
7806 break;
7807
7808 case 'Y':
7809 print_addrstyle_operand (file, x);
7810 return;
7811 }
7812
7813 switch (GET_CODE (x))
7814 {
7815 case REG:
7816 /* Print FP regs as fx instead of vx when they are accessed
7817 through non-vector mode. */
7818 if (code == 'v'
7819 || VECTOR_NOFP_REG_P (x)
7820 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7821 || (VECTOR_REG_P (x)
7822 && (GET_MODE_SIZE (GET_MODE (x)) /
7823 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7824 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7825 else
7826 fprintf (file, "%s", reg_names[REGNO (x)]);
7827 break;
7828
7829 case MEM:
7830 output_address (GET_MODE (x), XEXP (x, 0));
7831 break;
7832
7833 case CONST:
7834 case CODE_LABEL:
7835 case LABEL_REF:
7836 case SYMBOL_REF:
7837 output_addr_const (file, x);
7838 break;
7839
7840 case CONST_INT:
7841 ival = INTVAL (x);
7842 switch (code)
7843 {
7844 case 0:
7845 break;
7846 case 'b':
7847 ival &= 0xff;
7848 break;
7849 case 'c':
7850 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7851 break;
7852 case 'x':
7853 ival &= 0xffff;
7854 break;
7855 case 'h':
7856 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7857 break;
7858 case 'i':
7859 ival = s390_extract_part (x, HImode, 0);
7860 break;
7861 case 'j':
7862 ival = s390_extract_part (x, HImode, -1);
7863 break;
7864 case 'k':
7865 ival = s390_extract_part (x, SImode, 0);
7866 break;
7867 case 'm':
7868 ival = s390_extract_part (x, SImode, -1);
7869 break;
7870 case 'o':
7871 ival &= 0xffffffff;
7872 break;
7873 case 'e': case 'f':
7874 case 's': case 't':
7875 {
7876 int start, end;
7877 int len;
7878 bool ok;
7879
7880 len = (code == 's' || code == 'e' ? 64 : 32);
7881 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7882 gcc_assert (ok);
7883 if (code == 's' || code == 't')
7884 ival = start;
7885 else
7886 ival = end;
7887 }
7888 break;
7889 default:
7890 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7891 }
7892 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7893 break;
7894
7895 case CONST_WIDE_INT:
7896 if (code == 'b')
7897 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7898 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7899 else if (code == 'x')
7900 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7901 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7902 else if (code == 'h')
7903 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7904 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7905 else
7906 {
7907 if (code == 0)
7908 output_operand_lossage ("invalid constant - try using "
7909 "an output modifier");
7910 else
7911 output_operand_lossage ("invalid constant for output modifier '%c'",
7912 code);
7913 }
7914 break;
7915 case CONST_VECTOR:
7916 switch (code)
7917 {
7918 case 'h':
7919 gcc_assert (const_vec_duplicate_p (x));
7920 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7921 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7922 break;
7923 case 'e':
7924 case 's':
7925 {
7926 int start, end;
7927 bool ok;
7928
7929 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7930 gcc_assert (ok);
7931 ival = (code == 's') ? start : end;
7932 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7933 }
7934 break;
7935 case 't':
7936 {
7937 unsigned mask;
7938 bool ok = s390_bytemask_vector_p (x, &mask);
7939 gcc_assert (ok);
7940 fprintf (file, "%u", mask);
7941 }
7942 break;
7943
7944 default:
7945 output_operand_lossage ("invalid constant vector for output "
7946 "modifier '%c'", code);
7947 }
7948 break;
7949
7950 default:
7951 if (code == 0)
7952 output_operand_lossage ("invalid expression - try using "
7953 "an output modifier");
7954 else
7955 output_operand_lossage ("invalid expression for output "
7956 "modifier '%c'", code);
7957 break;
7958 }
7959 }
7960
7961 /* Target hook for assembling integer objects. We need to define it
7962 here to work a round a bug in some versions of GAS, which couldn't
7963 handle values smaller than INT_MIN when printed in decimal. */
7964
7965 static bool
7966 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7967 {
7968 if (size == 8 && aligned_p
7969 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7970 {
7971 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7972 INTVAL (x));
7973 return true;
7974 }
7975 return default_assemble_integer (x, size, aligned_p);
7976 }
7977
7978 /* Returns true if register REGNO is used for forming
7979 a memory address in expression X. */
7980
7981 static bool
7982 reg_used_in_mem_p (int regno, rtx x)
7983 {
7984 enum rtx_code code = GET_CODE (x);
7985 int i, j;
7986 const char *fmt;
7987
7988 if (code == MEM)
7989 {
7990 if (refers_to_regno_p (regno, XEXP (x, 0)))
7991 return true;
7992 }
7993 else if (code == SET
7994 && GET_CODE (SET_DEST (x)) == PC)
7995 {
7996 if (refers_to_regno_p (regno, SET_SRC (x)))
7997 return true;
7998 }
7999
8000 fmt = GET_RTX_FORMAT (code);
8001 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8002 {
8003 if (fmt[i] == 'e'
8004 && reg_used_in_mem_p (regno, XEXP (x, i)))
8005 return true;
8006
8007 else if (fmt[i] == 'E')
8008 for (j = 0; j < XVECLEN (x, i); j++)
8009 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8010 return true;
8011 }
8012 return false;
8013 }
8014
8015 /* Returns true if expression DEP_RTX sets an address register
8016 used by instruction INSN to address memory. */
8017
8018 static bool
8019 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8020 {
8021 rtx target, pat;
8022
8023 if (NONJUMP_INSN_P (dep_rtx))
8024 dep_rtx = PATTERN (dep_rtx);
8025
8026 if (GET_CODE (dep_rtx) == SET)
8027 {
8028 target = SET_DEST (dep_rtx);
8029 if (GET_CODE (target) == STRICT_LOW_PART)
8030 target = XEXP (target, 0);
8031 while (GET_CODE (target) == SUBREG)
8032 target = SUBREG_REG (target);
8033
8034 if (GET_CODE (target) == REG)
8035 {
8036 int regno = REGNO (target);
8037
8038 if (s390_safe_attr_type (insn) == TYPE_LA)
8039 {
8040 pat = PATTERN (insn);
8041 if (GET_CODE (pat) == PARALLEL)
8042 {
8043 gcc_assert (XVECLEN (pat, 0) == 2);
8044 pat = XVECEXP (pat, 0, 0);
8045 }
8046 gcc_assert (GET_CODE (pat) == SET);
8047 return refers_to_regno_p (regno, SET_SRC (pat));
8048 }
8049 else if (get_attr_atype (insn) == ATYPE_AGEN)
8050 return reg_used_in_mem_p (regno, PATTERN (insn));
8051 }
8052 }
8053 return false;
8054 }
8055
8056 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8057
8058 int
8059 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8060 {
8061 rtx dep_rtx = PATTERN (dep_insn);
8062 int i;
8063
8064 if (GET_CODE (dep_rtx) == SET
8065 && addr_generation_dependency_p (dep_rtx, insn))
8066 return 1;
8067 else if (GET_CODE (dep_rtx) == PARALLEL)
8068 {
8069 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8070 {
8071 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8072 return 1;
8073 }
8074 }
8075 return 0;
8076 }
8077
8078
8079 /* A C statement (sans semicolon) to update the integer scheduling priority
8080 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8081 reduce the priority to execute INSN later. Do not define this macro if
8082 you do not need to adjust the scheduling priorities of insns.
8083
8084 A STD instruction should be scheduled earlier,
8085 in order to use the bypass. */
8086 static int
8087 s390_adjust_priority (rtx_insn *insn, int priority)
8088 {
8089 if (! INSN_P (insn))
8090 return priority;
8091
8092 if (s390_tune <= PROCESSOR_2064_Z900)
8093 return priority;
8094
8095 switch (s390_safe_attr_type (insn))
8096 {
8097 case TYPE_FSTOREDF:
8098 case TYPE_FSTORESF:
8099 priority = priority << 3;
8100 break;
8101 case TYPE_STORE:
8102 case TYPE_STM:
8103 priority = priority << 1;
8104 break;
8105 default:
8106 break;
8107 }
8108 return priority;
8109 }
8110
8111
8112 /* The number of instructions that can be issued per cycle. */
8113
8114 static int
8115 s390_issue_rate (void)
8116 {
8117 switch (s390_tune)
8118 {
8119 case PROCESSOR_2084_Z990:
8120 case PROCESSOR_2094_Z9_109:
8121 case PROCESSOR_2094_Z9_EC:
8122 case PROCESSOR_2817_Z196:
8123 return 3;
8124 case PROCESSOR_2097_Z10:
8125 return 2;
8126 case PROCESSOR_2064_Z900:
8127 /* Starting with EC12 we use the sched_reorder hook to take care
8128 of instruction dispatch constraints. The algorithm only
8129 picks the best instruction and assumes only a single
8130 instruction gets issued per cycle. */
8131 case PROCESSOR_2827_ZEC12:
8132 case PROCESSOR_2964_Z13:
8133 case PROCESSOR_3906_Z14:
8134 default:
8135 return 1;
8136 }
8137 }
8138
8139 static int
8140 s390_first_cycle_multipass_dfa_lookahead (void)
8141 {
8142 return 4;
8143 }
8144
8145 static void
8146 annotate_constant_pool_refs_1 (rtx *x)
8147 {
8148 int i, j;
8149 const char *fmt;
8150
8151 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8152 || !CONSTANT_POOL_ADDRESS_P (*x));
8153
8154 /* Literal pool references can only occur inside a MEM ... */
8155 if (GET_CODE (*x) == MEM)
8156 {
8157 rtx memref = XEXP (*x, 0);
8158
8159 if (GET_CODE (memref) == SYMBOL_REF
8160 && CONSTANT_POOL_ADDRESS_P (memref))
8161 {
8162 rtx base = cfun->machine->base_reg;
8163 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8164 UNSPEC_LTREF);
8165
8166 *x = replace_equiv_address (*x, addr);
8167 return;
8168 }
8169
8170 if (GET_CODE (memref) == CONST
8171 && GET_CODE (XEXP (memref, 0)) == PLUS
8172 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8173 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8174 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8175 {
8176 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8177 rtx sym = XEXP (XEXP (memref, 0), 0);
8178 rtx base = cfun->machine->base_reg;
8179 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8180 UNSPEC_LTREF);
8181
8182 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8183 return;
8184 }
8185 }
8186
8187 /* ... or a load-address type pattern. */
8188 if (GET_CODE (*x) == SET)
8189 {
8190 rtx addrref = SET_SRC (*x);
8191
8192 if (GET_CODE (addrref) == SYMBOL_REF
8193 && CONSTANT_POOL_ADDRESS_P (addrref))
8194 {
8195 rtx base = cfun->machine->base_reg;
8196 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8197 UNSPEC_LTREF);
8198
8199 SET_SRC (*x) = addr;
8200 return;
8201 }
8202
8203 if (GET_CODE (addrref) == CONST
8204 && GET_CODE (XEXP (addrref, 0)) == PLUS
8205 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8206 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8207 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8208 {
8209 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8210 rtx sym = XEXP (XEXP (addrref, 0), 0);
8211 rtx base = cfun->machine->base_reg;
8212 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8213 UNSPEC_LTREF);
8214
8215 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8216 return;
8217 }
8218 }
8219
8220 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8221 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8222 {
8223 if (fmt[i] == 'e')
8224 {
8225 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8226 }
8227 else if (fmt[i] == 'E')
8228 {
8229 for (j = 0; j < XVECLEN (*x, i); j++)
8230 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8231 }
8232 }
8233 }
8234
8235 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8236 Fix up MEMs as required.
8237 Skip insns which support relative addressing, because they do not use a base
8238 register. */
8239
8240 static void
8241 annotate_constant_pool_refs (rtx_insn *insn)
8242 {
8243 if (s390_safe_relative_long_p (insn))
8244 return;
8245 annotate_constant_pool_refs_1 (&PATTERN (insn));
8246 }
8247
8248 static void
8249 find_constant_pool_ref_1 (rtx x, rtx *ref)
8250 {
8251 int i, j;
8252 const char *fmt;
8253
8254 /* Likewise POOL_ENTRY insns. */
8255 if (GET_CODE (x) == UNSPEC_VOLATILE
8256 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8257 return;
8258
8259 gcc_assert (GET_CODE (x) != SYMBOL_REF
8260 || !CONSTANT_POOL_ADDRESS_P (x));
8261
8262 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8263 {
8264 rtx sym = XVECEXP (x, 0, 0);
8265 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8266 && CONSTANT_POOL_ADDRESS_P (sym));
8267
8268 if (*ref == NULL_RTX)
8269 *ref = sym;
8270 else
8271 gcc_assert (*ref == sym);
8272
8273 return;
8274 }
8275
8276 fmt = GET_RTX_FORMAT (GET_CODE (x));
8277 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8278 {
8279 if (fmt[i] == 'e')
8280 {
8281 find_constant_pool_ref_1 (XEXP (x, i), ref);
8282 }
8283 else if (fmt[i] == 'E')
8284 {
8285 for (j = 0; j < XVECLEN (x, i); j++)
8286 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8287 }
8288 }
8289 }
8290
8291 /* Find an annotated literal pool symbol referenced in INSN,
8292 and store it at REF. Will abort if INSN contains references to
8293 more than one such pool symbol; multiple references to the same
8294 symbol are allowed, however.
8295
8296 The rtx pointed to by REF must be initialized to NULL_RTX
8297 by the caller before calling this routine.
8298
8299 Skip insns which support relative addressing, because they do not use a base
8300 register. */
8301
8302 static void
8303 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8304 {
8305 if (s390_safe_relative_long_p (insn))
8306 return;
8307 find_constant_pool_ref_1 (PATTERN (insn), ref);
8308 }
8309
8310 static void
8311 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8312 {
8313 int i, j;
8314 const char *fmt;
8315
8316 gcc_assert (*x != ref);
8317
8318 if (GET_CODE (*x) == UNSPEC
8319 && XINT (*x, 1) == UNSPEC_LTREF
8320 && XVECEXP (*x, 0, 0) == ref)
8321 {
8322 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8323 return;
8324 }
8325
8326 if (GET_CODE (*x) == PLUS
8327 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8328 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8329 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8330 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8331 {
8332 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8333 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8334 return;
8335 }
8336
8337 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8338 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8339 {
8340 if (fmt[i] == 'e')
8341 {
8342 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8343 }
8344 else if (fmt[i] == 'E')
8345 {
8346 for (j = 0; j < XVECLEN (*x, i); j++)
8347 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8348 }
8349 }
8350 }
8351
8352 /* Replace every reference to the annotated literal pool
8353 symbol REF in INSN by its base plus OFFSET.
8354 Skip insns which support relative addressing, because they do not use a base
8355 register. */
8356
8357 static void
8358 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8359 {
8360 if (s390_safe_relative_long_p (insn))
8361 return;
8362 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8363 }
8364
8365 /* We keep a list of constants which we have to add to internal
8366 constant tables in the middle of large functions. */
8367
8368 #define NR_C_MODES 32
8369 machine_mode constant_modes[NR_C_MODES] =
8370 {
8371 TFmode, TImode, TDmode,
8372 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8373 V4SFmode, V2DFmode, V1TFmode,
8374 DFmode, DImode, DDmode,
8375 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8376 SFmode, SImode, SDmode,
8377 V4QImode, V2HImode, V1SImode, V1SFmode,
8378 HImode,
8379 V2QImode, V1HImode,
8380 QImode,
8381 V1QImode
8382 };
8383
8384 struct constant
8385 {
8386 struct constant *next;
8387 rtx value;
8388 rtx_code_label *label;
8389 };
8390
8391 struct constant_pool
8392 {
8393 struct constant_pool *next;
8394 rtx_insn *first_insn;
8395 rtx_insn *pool_insn;
8396 bitmap insns;
8397 rtx_insn *emit_pool_after;
8398
8399 struct constant *constants[NR_C_MODES];
8400 struct constant *execute;
8401 rtx_code_label *label;
8402 int size;
8403 };
8404
8405 /* Allocate new constant_pool structure. */
8406
8407 static struct constant_pool *
8408 s390_alloc_pool (void)
8409 {
8410 struct constant_pool *pool;
8411 int i;
8412
8413 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8414 pool->next = NULL;
8415 for (i = 0; i < NR_C_MODES; i++)
8416 pool->constants[i] = NULL;
8417
8418 pool->execute = NULL;
8419 pool->label = gen_label_rtx ();
8420 pool->first_insn = NULL;
8421 pool->pool_insn = NULL;
8422 pool->insns = BITMAP_ALLOC (NULL);
8423 pool->size = 0;
8424 pool->emit_pool_after = NULL;
8425
8426 return pool;
8427 }
8428
8429 /* Create new constant pool covering instructions starting at INSN
8430 and chain it to the end of POOL_LIST. */
8431
8432 static struct constant_pool *
8433 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8434 {
8435 struct constant_pool *pool, **prev;
8436
8437 pool = s390_alloc_pool ();
8438 pool->first_insn = insn;
8439
8440 for (prev = pool_list; *prev; prev = &(*prev)->next)
8441 ;
8442 *prev = pool;
8443
8444 return pool;
8445 }
8446
8447 /* End range of instructions covered by POOL at INSN and emit
8448 placeholder insn representing the pool. */
8449
8450 static void
8451 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8452 {
8453 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8454
8455 if (!insn)
8456 insn = get_last_insn ();
8457
8458 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8459 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8460 }
8461
8462 /* Add INSN to the list of insns covered by POOL. */
8463
8464 static void
8465 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8466 {
8467 bitmap_set_bit (pool->insns, INSN_UID (insn));
8468 }
8469
8470 /* Return pool out of POOL_LIST that covers INSN. */
8471
8472 static struct constant_pool *
8473 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8474 {
8475 struct constant_pool *pool;
8476
8477 for (pool = pool_list; pool; pool = pool->next)
8478 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8479 break;
8480
8481 return pool;
8482 }
8483
8484 /* Add constant VAL of mode MODE to the constant pool POOL. */
8485
8486 static void
8487 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8488 {
8489 struct constant *c;
8490 int i;
8491
8492 for (i = 0; i < NR_C_MODES; i++)
8493 if (constant_modes[i] == mode)
8494 break;
8495 gcc_assert (i != NR_C_MODES);
8496
8497 for (c = pool->constants[i]; c != NULL; c = c->next)
8498 if (rtx_equal_p (val, c->value))
8499 break;
8500
8501 if (c == NULL)
8502 {
8503 c = (struct constant *) xmalloc (sizeof *c);
8504 c->value = val;
8505 c->label = gen_label_rtx ();
8506 c->next = pool->constants[i];
8507 pool->constants[i] = c;
8508 pool->size += GET_MODE_SIZE (mode);
8509 }
8510 }
8511
8512 /* Return an rtx that represents the offset of X from the start of
8513 pool POOL. */
8514
8515 static rtx
8516 s390_pool_offset (struct constant_pool *pool, rtx x)
8517 {
8518 rtx label;
8519
8520 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8521 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8522 UNSPEC_POOL_OFFSET);
8523 return gen_rtx_CONST (GET_MODE (x), x);
8524 }
8525
8526 /* Find constant VAL of mode MODE in the constant pool POOL.
8527 Return an RTX describing the distance from the start of
8528 the pool to the location of the new constant. */
8529
8530 static rtx
8531 s390_find_constant (struct constant_pool *pool, rtx val,
8532 machine_mode mode)
8533 {
8534 struct constant *c;
8535 int i;
8536
8537 for (i = 0; i < NR_C_MODES; i++)
8538 if (constant_modes[i] == mode)
8539 break;
8540 gcc_assert (i != NR_C_MODES);
8541
8542 for (c = pool->constants[i]; c != NULL; c = c->next)
8543 if (rtx_equal_p (val, c->value))
8544 break;
8545
8546 gcc_assert (c);
8547
8548 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8549 }
8550
8551 /* Check whether INSN is an execute. Return the label_ref to its
8552 execute target template if so, NULL_RTX otherwise. */
8553
8554 static rtx
8555 s390_execute_label (rtx insn)
8556 {
8557 if (INSN_P (insn)
8558 && GET_CODE (PATTERN (insn)) == PARALLEL
8559 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8560 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8561 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8562 {
8563 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8564 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8565 else
8566 {
8567 gcc_assert (JUMP_P (insn));
8568 /* For jump insns as execute target:
8569 - There is one operand less in the parallel (the
8570 modification register of the execute is always 0).
8571 - The execute target label is wrapped into an
8572 if_then_else in order to hide it from jump analysis. */
8573 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8574 }
8575 }
8576
8577 return NULL_RTX;
8578 }
8579
8580 /* Find execute target for INSN in the constant pool POOL.
8581 Return an RTX describing the distance from the start of
8582 the pool to the location of the execute target. */
8583
8584 static rtx
8585 s390_find_execute (struct constant_pool *pool, rtx insn)
8586 {
8587 struct constant *c;
8588
8589 for (c = pool->execute; c != NULL; c = c->next)
8590 if (INSN_UID (insn) == INSN_UID (c->value))
8591 break;
8592
8593 gcc_assert (c);
8594
8595 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8596 }
8597
8598 /* For an execute INSN, extract the execute target template. */
8599
8600 static rtx
8601 s390_execute_target (rtx insn)
8602 {
8603 rtx pattern = PATTERN (insn);
8604 gcc_assert (s390_execute_label (insn));
8605
8606 if (XVECLEN (pattern, 0) == 2)
8607 {
8608 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8609 }
8610 else
8611 {
8612 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8613 int i;
8614
8615 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8616 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8617
8618 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8619 }
8620
8621 return pattern;
8622 }
8623
8624 /* Indicate that INSN cannot be duplicated. This is the case for
8625 execute insns that carry a unique label. */
8626
8627 static bool
8628 s390_cannot_copy_insn_p (rtx_insn *insn)
8629 {
8630 rtx label = s390_execute_label (insn);
8631 return label && label != const0_rtx;
8632 }
8633
8634 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8635 do not emit the pool base label. */
8636
8637 static void
8638 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8639 {
8640 struct constant *c;
8641 rtx_insn *insn = pool->pool_insn;
8642 int i;
8643
8644 /* Switch to rodata section. */
8645 insn = emit_insn_after (gen_pool_section_start (), insn);
8646 INSN_ADDRESSES_NEW (insn, -1);
8647
8648 /* Ensure minimum pool alignment. */
8649 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8650 INSN_ADDRESSES_NEW (insn, -1);
8651
8652 /* Emit pool base label. */
8653 if (!remote_label)
8654 {
8655 insn = emit_label_after (pool->label, insn);
8656 INSN_ADDRESSES_NEW (insn, -1);
8657 }
8658
8659 /* Dump constants in descending alignment requirement order,
8660 ensuring proper alignment for every constant. */
8661 for (i = 0; i < NR_C_MODES; i++)
8662 for (c = pool->constants[i]; c; c = c->next)
8663 {
8664 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8665 rtx value = copy_rtx (c->value);
8666 if (GET_CODE (value) == CONST
8667 && GET_CODE (XEXP (value, 0)) == UNSPEC
8668 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8669 && XVECLEN (XEXP (value, 0), 0) == 1)
8670 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8671
8672 insn = emit_label_after (c->label, insn);
8673 INSN_ADDRESSES_NEW (insn, -1);
8674
8675 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8676 gen_rtvec (1, value),
8677 UNSPECV_POOL_ENTRY);
8678 insn = emit_insn_after (value, insn);
8679 INSN_ADDRESSES_NEW (insn, -1);
8680 }
8681
8682 /* Ensure minimum alignment for instructions. */
8683 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8684 INSN_ADDRESSES_NEW (insn, -1);
8685
8686 /* Output in-pool execute template insns. */
8687 for (c = pool->execute; c; c = c->next)
8688 {
8689 insn = emit_label_after (c->label, insn);
8690 INSN_ADDRESSES_NEW (insn, -1);
8691
8692 insn = emit_insn_after (s390_execute_target (c->value), insn);
8693 INSN_ADDRESSES_NEW (insn, -1);
8694 }
8695
8696 /* Switch back to previous section. */
8697 insn = emit_insn_after (gen_pool_section_end (), insn);
8698 INSN_ADDRESSES_NEW (insn, -1);
8699
8700 insn = emit_barrier_after (insn);
8701 INSN_ADDRESSES_NEW (insn, -1);
8702
8703 /* Remove placeholder insn. */
8704 remove_insn (pool->pool_insn);
8705 }
8706
8707 /* Free all memory used by POOL. */
8708
8709 static void
8710 s390_free_pool (struct constant_pool *pool)
8711 {
8712 struct constant *c, *next;
8713 int i;
8714
8715 for (i = 0; i < NR_C_MODES; i++)
8716 for (c = pool->constants[i]; c; c = next)
8717 {
8718 next = c->next;
8719 free (c);
8720 }
8721
8722 for (c = pool->execute; c; c = next)
8723 {
8724 next = c->next;
8725 free (c);
8726 }
8727
8728 BITMAP_FREE (pool->insns);
8729 free (pool);
8730 }
8731
8732
8733 /* Collect main literal pool. Return NULL on overflow. */
8734
8735 static struct constant_pool *
8736 s390_mainpool_start (void)
8737 {
8738 struct constant_pool *pool;
8739 rtx_insn *insn;
8740
8741 pool = s390_alloc_pool ();
8742
8743 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8744 {
8745 if (NONJUMP_INSN_P (insn)
8746 && GET_CODE (PATTERN (insn)) == SET
8747 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8748 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8749 {
8750 /* There might be two main_pool instructions if base_reg
8751 is call-clobbered; one for shrink-wrapped code and one
8752 for the rest. We want to keep the first. */
8753 if (pool->pool_insn)
8754 {
8755 insn = PREV_INSN (insn);
8756 delete_insn (NEXT_INSN (insn));
8757 continue;
8758 }
8759 pool->pool_insn = insn;
8760 }
8761
8762 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8763 {
8764 rtx pool_ref = NULL_RTX;
8765 find_constant_pool_ref (insn, &pool_ref);
8766 if (pool_ref)
8767 {
8768 rtx constant = get_pool_constant (pool_ref);
8769 machine_mode mode = get_pool_mode (pool_ref);
8770 s390_add_constant (pool, constant, mode);
8771 }
8772 }
8773
8774 /* If hot/cold partitioning is enabled we have to make sure that
8775 the literal pool is emitted in the same section where the
8776 initialization of the literal pool base pointer takes place.
8777 emit_pool_after is only used in the non-overflow case on non
8778 Z cpus where we can emit the literal pool at the end of the
8779 function body within the text section. */
8780 if (NOTE_P (insn)
8781 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8782 && !pool->emit_pool_after)
8783 pool->emit_pool_after = PREV_INSN (insn);
8784 }
8785
8786 gcc_assert (pool->pool_insn || pool->size == 0);
8787
8788 if (pool->size >= 4096)
8789 {
8790 /* We're going to chunkify the pool, so remove the main
8791 pool placeholder insn. */
8792 remove_insn (pool->pool_insn);
8793
8794 s390_free_pool (pool);
8795 pool = NULL;
8796 }
8797
8798 /* If the functions ends with the section where the literal pool
8799 should be emitted set the marker to its end. */
8800 if (pool && !pool->emit_pool_after)
8801 pool->emit_pool_after = get_last_insn ();
8802
8803 return pool;
8804 }
8805
8806 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8807 Modify the current function to output the pool constants as well as
8808 the pool register setup instruction. */
8809
8810 static void
8811 s390_mainpool_finish (struct constant_pool *pool)
8812 {
8813 rtx base_reg = cfun->machine->base_reg;
8814 rtx set;
8815 rtx_insn *insn;
8816
8817 /* If the pool is empty, we're done. */
8818 if (pool->size == 0)
8819 {
8820 /* We don't actually need a base register after all. */
8821 cfun->machine->base_reg = NULL_RTX;
8822
8823 if (pool->pool_insn)
8824 remove_insn (pool->pool_insn);
8825 s390_free_pool (pool);
8826 return;
8827 }
8828
8829 /* We need correct insn addresses. */
8830 shorten_branches (get_insns ());
8831
8832 /* Use a LARL to load the pool register. The pool is
8833 located in the .rodata section, so we emit it after the function. */
8834 set = gen_main_base_64 (base_reg, pool->label);
8835 insn = emit_insn_after (set, pool->pool_insn);
8836 INSN_ADDRESSES_NEW (insn, -1);
8837 remove_insn (pool->pool_insn);
8838
8839 insn = get_last_insn ();
8840 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8841 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8842
8843 s390_dump_pool (pool, 0);
8844
8845 /* Replace all literal pool references. */
8846
8847 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8848 {
8849 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8850 {
8851 rtx addr, pool_ref = NULL_RTX;
8852 find_constant_pool_ref (insn, &pool_ref);
8853 if (pool_ref)
8854 {
8855 if (s390_execute_label (insn))
8856 addr = s390_find_execute (pool, insn);
8857 else
8858 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8859 get_pool_mode (pool_ref));
8860
8861 replace_constant_pool_ref (insn, pool_ref, addr);
8862 INSN_CODE (insn) = -1;
8863 }
8864 }
8865 }
8866
8867
8868 /* Free the pool. */
8869 s390_free_pool (pool);
8870 }
8871
8872 /* Chunkify the literal pool. */
8873
8874 #define S390_POOL_CHUNK_MIN 0xc00
8875 #define S390_POOL_CHUNK_MAX 0xe00
8876
8877 static struct constant_pool *
8878 s390_chunkify_start (void)
8879 {
8880 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8881 bitmap far_labels;
8882 rtx_insn *insn;
8883
8884 /* We need correct insn addresses. */
8885
8886 shorten_branches (get_insns ());
8887
8888 /* Scan all insns and move literals to pool chunks. */
8889
8890 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8891 {
8892 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8893 {
8894 rtx pool_ref = NULL_RTX;
8895 find_constant_pool_ref (insn, &pool_ref);
8896 if (pool_ref)
8897 {
8898 rtx constant = get_pool_constant (pool_ref);
8899 machine_mode mode = get_pool_mode (pool_ref);
8900
8901 if (!curr_pool)
8902 curr_pool = s390_start_pool (&pool_list, insn);
8903
8904 s390_add_constant (curr_pool, constant, mode);
8905 s390_add_pool_insn (curr_pool, insn);
8906 }
8907 }
8908
8909 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8910 {
8911 if (curr_pool)
8912 s390_add_pool_insn (curr_pool, insn);
8913 }
8914
8915 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
8916 continue;
8917
8918 if (!curr_pool
8919 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8920 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8921 continue;
8922
8923 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8924 continue;
8925
8926 s390_end_pool (curr_pool, NULL);
8927 curr_pool = NULL;
8928 }
8929
8930 if (curr_pool)
8931 s390_end_pool (curr_pool, NULL);
8932
8933 /* Find all labels that are branched into
8934 from an insn belonging to a different chunk. */
8935
8936 far_labels = BITMAP_ALLOC (NULL);
8937
8938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8939 {
8940 rtx_jump_table_data *table;
8941
8942 /* Labels marked with LABEL_PRESERVE_P can be target
8943 of non-local jumps, so we have to mark them.
8944 The same holds for named labels.
8945
8946 Don't do that, however, if it is the label before
8947 a jump table. */
8948
8949 if (LABEL_P (insn)
8950 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8951 {
8952 rtx_insn *vec_insn = NEXT_INSN (insn);
8953 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8954 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8955 }
8956 /* Check potential targets in a table jump (casesi_jump). */
8957 else if (tablejump_p (insn, NULL, &table))
8958 {
8959 rtx vec_pat = PATTERN (table);
8960 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8961
8962 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8963 {
8964 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8965
8966 if (s390_find_pool (pool_list, label)
8967 != s390_find_pool (pool_list, insn))
8968 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8969 }
8970 }
8971 /* If we have a direct jump (conditional or unconditional),
8972 check all potential targets. */
8973 else if (JUMP_P (insn))
8974 {
8975 rtx pat = PATTERN (insn);
8976
8977 if (GET_CODE (pat) == PARALLEL)
8978 pat = XVECEXP (pat, 0, 0);
8979
8980 if (GET_CODE (pat) == SET)
8981 {
8982 rtx label = JUMP_LABEL (insn);
8983 if (label && !ANY_RETURN_P (label))
8984 {
8985 if (s390_find_pool (pool_list, label)
8986 != s390_find_pool (pool_list, insn))
8987 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8988 }
8989 }
8990 }
8991 }
8992
8993 /* Insert base register reload insns before every pool. */
8994
8995 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8996 {
8997 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
8998 curr_pool->label);
8999 rtx_insn *insn = curr_pool->first_insn;
9000 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9001 }
9002
9003 /* Insert base register reload insns at every far label. */
9004
9005 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9006 if (LABEL_P (insn)
9007 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9008 {
9009 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9010 if (pool)
9011 {
9012 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9013 pool->label);
9014 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9015 }
9016 }
9017
9018
9019 BITMAP_FREE (far_labels);
9020
9021
9022 /* Recompute insn addresses. */
9023
9024 init_insn_lengths ();
9025 shorten_branches (get_insns ());
9026
9027 return pool_list;
9028 }
9029
9030 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9031 After we have decided to use this list, finish implementing
9032 all changes to the current function as required. */
9033
9034 static void
9035 s390_chunkify_finish (struct constant_pool *pool_list)
9036 {
9037 struct constant_pool *curr_pool = NULL;
9038 rtx_insn *insn;
9039
9040
9041 /* Replace all literal pool references. */
9042
9043 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9044 {
9045 curr_pool = s390_find_pool (pool_list, insn);
9046 if (!curr_pool)
9047 continue;
9048
9049 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9050 {
9051 rtx addr, pool_ref = NULL_RTX;
9052 find_constant_pool_ref (insn, &pool_ref);
9053 if (pool_ref)
9054 {
9055 if (s390_execute_label (insn))
9056 addr = s390_find_execute (curr_pool, insn);
9057 else
9058 addr = s390_find_constant (curr_pool,
9059 get_pool_constant (pool_ref),
9060 get_pool_mode (pool_ref));
9061
9062 replace_constant_pool_ref (insn, pool_ref, addr);
9063 INSN_CODE (insn) = -1;
9064 }
9065 }
9066 }
9067
9068 /* Dump out all literal pools. */
9069
9070 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9071 s390_dump_pool (curr_pool, 0);
9072
9073 /* Free pool list. */
9074
9075 while (pool_list)
9076 {
9077 struct constant_pool *next = pool_list->next;
9078 s390_free_pool (pool_list);
9079 pool_list = next;
9080 }
9081 }
9082
9083 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9084
9085 void
9086 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9087 {
9088 switch (GET_MODE_CLASS (mode))
9089 {
9090 case MODE_FLOAT:
9091 case MODE_DECIMAL_FLOAT:
9092 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9093
9094 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9095 as_a <scalar_float_mode> (mode), align);
9096 break;
9097
9098 case MODE_INT:
9099 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9100 mark_symbol_refs_as_used (exp);
9101 break;
9102
9103 case MODE_VECTOR_INT:
9104 case MODE_VECTOR_FLOAT:
9105 {
9106 int i;
9107 machine_mode inner_mode;
9108 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9109
9110 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9111 for (i = 0; i < XVECLEN (exp, 0); i++)
9112 s390_output_pool_entry (XVECEXP (exp, 0, i),
9113 inner_mode,
9114 i == 0
9115 ? align
9116 : GET_MODE_BITSIZE (inner_mode));
9117 }
9118 break;
9119
9120 default:
9121 gcc_unreachable ();
9122 }
9123 }
9124
9125
9126 /* Return an RTL expression representing the value of the return address
9127 for the frame COUNT steps up from the current frame. FRAME is the
9128 frame pointer of that frame. */
9129
9130 rtx
9131 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9132 {
9133 int offset;
9134 rtx addr;
9135
9136 /* Without backchain, we fail for all but the current frame. */
9137
9138 if (!TARGET_BACKCHAIN && count > 0)
9139 return NULL_RTX;
9140
9141 /* For the current frame, we need to make sure the initial
9142 value of RETURN_REGNUM is actually saved. */
9143
9144 if (count == 0)
9145 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9146
9147 if (TARGET_PACKED_STACK)
9148 offset = -2 * UNITS_PER_LONG;
9149 else
9150 offset = RETURN_REGNUM * UNITS_PER_LONG;
9151
9152 addr = plus_constant (Pmode, frame, offset);
9153 addr = memory_address (Pmode, addr);
9154 return gen_rtx_MEM (Pmode, addr);
9155 }
9156
9157 /* Return an RTL expression representing the back chain stored in
9158 the current stack frame. */
9159
9160 rtx
9161 s390_back_chain_rtx (void)
9162 {
9163 rtx chain;
9164
9165 gcc_assert (TARGET_BACKCHAIN);
9166
9167 if (TARGET_PACKED_STACK)
9168 chain = plus_constant (Pmode, stack_pointer_rtx,
9169 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9170 else
9171 chain = stack_pointer_rtx;
9172
9173 chain = gen_rtx_MEM (Pmode, chain);
9174 return chain;
9175 }
9176
9177 /* Find first call clobbered register unused in a function.
9178 This could be used as base register in a leaf function
9179 or for holding the return address before epilogue. */
9180
9181 static int
9182 find_unused_clobbered_reg (void)
9183 {
9184 int i;
9185 for (i = 0; i < 6; i++)
9186 if (!df_regs_ever_live_p (i))
9187 return i;
9188 return 0;
9189 }
9190
9191
9192 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9193 clobbered hard regs in SETREG. */
9194
9195 static void
9196 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9197 {
9198 char *regs_ever_clobbered = (char *)data;
9199 unsigned int i, regno;
9200 machine_mode mode = GET_MODE (setreg);
9201
9202 if (GET_CODE (setreg) == SUBREG)
9203 {
9204 rtx inner = SUBREG_REG (setreg);
9205 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9206 return;
9207 regno = subreg_regno (setreg);
9208 }
9209 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9210 regno = REGNO (setreg);
9211 else
9212 return;
9213
9214 for (i = regno;
9215 i < end_hard_regno (mode, regno);
9216 i++)
9217 regs_ever_clobbered[i] = 1;
9218 }
9219
9220 /* Walks through all basic blocks of the current function looking
9221 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9222 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9223 each of those regs. */
9224
9225 static void
9226 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9227 {
9228 basic_block cur_bb;
9229 rtx_insn *cur_insn;
9230 unsigned int i;
9231
9232 memset (regs_ever_clobbered, 0, 32);
9233
9234 /* For non-leaf functions we have to consider all call clobbered regs to be
9235 clobbered. */
9236 if (!crtl->is_leaf)
9237 {
9238 for (i = 0; i < 32; i++)
9239 regs_ever_clobbered[i] = call_really_used_regs[i];
9240 }
9241
9242 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9243 this work is done by liveness analysis (mark_regs_live_at_end).
9244 Special care is needed for functions containing landing pads. Landing pads
9245 may use the eh registers, but the code which sets these registers is not
9246 contained in that function. Hence s390_regs_ever_clobbered is not able to
9247 deal with this automatically. */
9248 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9249 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9250 if (crtl->calls_eh_return
9251 || (cfun->machine->has_landing_pad_p
9252 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9253 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9254
9255 /* For nonlocal gotos all call-saved registers have to be saved.
9256 This flag is also set for the unwinding code in libgcc.
9257 See expand_builtin_unwind_init. For regs_ever_live this is done by
9258 reload. */
9259 if (crtl->saves_all_registers)
9260 for (i = 0; i < 32; i++)
9261 if (!call_really_used_regs[i])
9262 regs_ever_clobbered[i] = 1;
9263
9264 FOR_EACH_BB_FN (cur_bb, cfun)
9265 {
9266 FOR_BB_INSNS (cur_bb, cur_insn)
9267 {
9268 rtx pat;
9269
9270 if (!INSN_P (cur_insn))
9271 continue;
9272
9273 pat = PATTERN (cur_insn);
9274
9275 /* Ignore GPR restore insns. */
9276 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9277 {
9278 if (GET_CODE (pat) == SET
9279 && GENERAL_REG_P (SET_DEST (pat)))
9280 {
9281 /* lgdr */
9282 if (GET_MODE (SET_SRC (pat)) == DImode
9283 && FP_REG_P (SET_SRC (pat)))
9284 continue;
9285
9286 /* l / lg */
9287 if (GET_CODE (SET_SRC (pat)) == MEM)
9288 continue;
9289 }
9290
9291 /* lm / lmg */
9292 if (GET_CODE (pat) == PARALLEL
9293 && load_multiple_operation (pat, VOIDmode))
9294 continue;
9295 }
9296
9297 note_stores (pat,
9298 s390_reg_clobbered_rtx,
9299 regs_ever_clobbered);
9300 }
9301 }
9302 }
9303
9304 /* Determine the frame area which actually has to be accessed
9305 in the function epilogue. The values are stored at the
9306 given pointers AREA_BOTTOM (address of the lowest used stack
9307 address) and AREA_TOP (address of the first item which does
9308 not belong to the stack frame). */
9309
9310 static void
9311 s390_frame_area (int *area_bottom, int *area_top)
9312 {
9313 int b, t;
9314
9315 b = INT_MAX;
9316 t = INT_MIN;
9317
9318 if (cfun_frame_layout.first_restore_gpr != -1)
9319 {
9320 b = (cfun_frame_layout.gprs_offset
9321 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9322 t = b + (cfun_frame_layout.last_restore_gpr
9323 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9324 }
9325
9326 if (TARGET_64BIT && cfun_save_high_fprs_p)
9327 {
9328 b = MIN (b, cfun_frame_layout.f8_offset);
9329 t = MAX (t, (cfun_frame_layout.f8_offset
9330 + cfun_frame_layout.high_fprs * 8));
9331 }
9332
9333 if (!TARGET_64BIT)
9334 {
9335 if (cfun_fpr_save_p (FPR4_REGNUM))
9336 {
9337 b = MIN (b, cfun_frame_layout.f4_offset);
9338 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9339 }
9340 if (cfun_fpr_save_p (FPR6_REGNUM))
9341 {
9342 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9343 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9344 }
9345 }
9346 *area_bottom = b;
9347 *area_top = t;
9348 }
9349 /* Update gpr_save_slots in the frame layout trying to make use of
9350 FPRs as GPR save slots.
9351 This is a helper routine of s390_register_info. */
9352
9353 static void
9354 s390_register_info_gprtofpr ()
9355 {
9356 int save_reg_slot = FPR0_REGNUM;
9357 int i, j;
9358
9359 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9360 return;
9361
9362 /* builtin_eh_return needs to be able to modify the return address
9363 on the stack. It could also adjust the FPR save slot instead but
9364 is it worth the trouble?! */
9365 if (crtl->calls_eh_return)
9366 return;
9367
9368 for (i = 15; i >= 6; i--)
9369 {
9370 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9371 continue;
9372
9373 /* Advance to the next FP register which can be used as a
9374 GPR save slot. */
9375 while ((!call_really_used_regs[save_reg_slot]
9376 || df_regs_ever_live_p (save_reg_slot)
9377 || cfun_fpr_save_p (save_reg_slot))
9378 && FP_REGNO_P (save_reg_slot))
9379 save_reg_slot++;
9380 if (!FP_REGNO_P (save_reg_slot))
9381 {
9382 /* We only want to use ldgr/lgdr if we can get rid of
9383 stm/lm entirely. So undo the gpr slot allocation in
9384 case we ran out of FPR save slots. */
9385 for (j = 6; j <= 15; j++)
9386 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9387 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9388 break;
9389 }
9390 cfun_gpr_save_slot (i) = save_reg_slot++;
9391 }
9392 }
9393
9394 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9395 stdarg.
9396 This is a helper routine for s390_register_info. */
9397
9398 static void
9399 s390_register_info_stdarg_fpr ()
9400 {
9401 int i;
9402 int min_fpr;
9403 int max_fpr;
9404
9405 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9406 f0-f4 for 64 bit. */
9407 if (!cfun->stdarg
9408 || !TARGET_HARD_FLOAT
9409 || !cfun->va_list_fpr_size
9410 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9411 return;
9412
9413 min_fpr = crtl->args.info.fprs;
9414 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9415 if (max_fpr >= FP_ARG_NUM_REG)
9416 max_fpr = FP_ARG_NUM_REG - 1;
9417
9418 /* FPR argument regs start at f0. */
9419 min_fpr += FPR0_REGNUM;
9420 max_fpr += FPR0_REGNUM;
9421
9422 for (i = min_fpr; i <= max_fpr; i++)
9423 cfun_set_fpr_save (i);
9424 }
9425
9426 /* Reserve the GPR save slots for GPRs which need to be saved due to
9427 stdarg.
9428 This is a helper routine for s390_register_info. */
9429
9430 static void
9431 s390_register_info_stdarg_gpr ()
9432 {
9433 int i;
9434 int min_gpr;
9435 int max_gpr;
9436
9437 if (!cfun->stdarg
9438 || !cfun->va_list_gpr_size
9439 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9440 return;
9441
9442 min_gpr = crtl->args.info.gprs;
9443 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9444 if (max_gpr >= GP_ARG_NUM_REG)
9445 max_gpr = GP_ARG_NUM_REG - 1;
9446
9447 /* GPR argument regs start at r2. */
9448 min_gpr += GPR2_REGNUM;
9449 max_gpr += GPR2_REGNUM;
9450
9451 /* If r6 was supposed to be saved into an FPR and now needs to go to
9452 the stack for vararg we have to adjust the restore range to make
9453 sure that the restore is done from stack as well. */
9454 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9455 && min_gpr <= GPR6_REGNUM
9456 && max_gpr >= GPR6_REGNUM)
9457 {
9458 if (cfun_frame_layout.first_restore_gpr == -1
9459 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9460 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9461 if (cfun_frame_layout.last_restore_gpr == -1
9462 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9463 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9464 }
9465
9466 if (cfun_frame_layout.first_save_gpr == -1
9467 || cfun_frame_layout.first_save_gpr > min_gpr)
9468 cfun_frame_layout.first_save_gpr = min_gpr;
9469
9470 if (cfun_frame_layout.last_save_gpr == -1
9471 || cfun_frame_layout.last_save_gpr < max_gpr)
9472 cfun_frame_layout.last_save_gpr = max_gpr;
9473
9474 for (i = min_gpr; i <= max_gpr; i++)
9475 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9476 }
9477
9478 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9479 prologue and epilogue. */
9480
9481 static void
9482 s390_register_info_set_ranges ()
9483 {
9484 int i, j;
9485
9486 /* Find the first and the last save slot supposed to use the stack
9487 to set the restore range.
9488 Vararg regs might be marked as save to stack but only the
9489 call-saved regs really need restoring (i.e. r6). This code
9490 assumes that the vararg regs have not yet been recorded in
9491 cfun_gpr_save_slot. */
9492 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9493 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9494 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9495 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9496 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9497 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9498 }
9499
9500 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9501 for registers which need to be saved in function prologue.
9502 This function can be used until the insns emitted for save/restore
9503 of the regs are visible in the RTL stream. */
9504
9505 static void
9506 s390_register_info ()
9507 {
9508 int i;
9509 char clobbered_regs[32];
9510
9511 gcc_assert (!epilogue_completed);
9512
9513 if (reload_completed)
9514 /* After reload we rely on our own routine to determine which
9515 registers need saving. */
9516 s390_regs_ever_clobbered (clobbered_regs);
9517 else
9518 /* During reload we use regs_ever_live as a base since reload
9519 does changes in there which we otherwise would not be aware
9520 of. */
9521 for (i = 0; i < 32; i++)
9522 clobbered_regs[i] = df_regs_ever_live_p (i);
9523
9524 for (i = 0; i < 32; i++)
9525 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9526
9527 /* Mark the call-saved FPRs which need to be saved.
9528 This needs to be done before checking the special GPRs since the
9529 stack pointer usage depends on whether high FPRs have to be saved
9530 or not. */
9531 cfun_frame_layout.fpr_bitmap = 0;
9532 cfun_frame_layout.high_fprs = 0;
9533 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9534 if (clobbered_regs[i] && !call_really_used_regs[i])
9535 {
9536 cfun_set_fpr_save (i);
9537 if (i >= FPR8_REGNUM)
9538 cfun_frame_layout.high_fprs++;
9539 }
9540
9541 /* Register 12 is used for GOT address, but also as temp in prologue
9542 for split-stack stdarg functions (unless r14 is available). */
9543 clobbered_regs[12]
9544 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9545 || (flag_split_stack && cfun->stdarg
9546 && (crtl->is_leaf || TARGET_TPF_PROFILING
9547 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9548
9549 clobbered_regs[BASE_REGNUM]
9550 |= (cfun->machine->base_reg
9551 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9552
9553 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9554 |= !!frame_pointer_needed;
9555
9556 /* On pre z900 machines this might take until machine dependent
9557 reorg to decide.
9558 save_return_addr_p will only be set on non-zarch machines so
9559 there is no risk that r14 goes into an FPR instead of a stack
9560 slot. */
9561 clobbered_regs[RETURN_REGNUM]
9562 |= (!crtl->is_leaf
9563 || TARGET_TPF_PROFILING
9564 || cfun_frame_layout.save_return_addr_p
9565 || crtl->calls_eh_return);
9566
9567 clobbered_regs[STACK_POINTER_REGNUM]
9568 |= (!crtl->is_leaf
9569 || TARGET_TPF_PROFILING
9570 || cfun_save_high_fprs_p
9571 || get_frame_size () > 0
9572 || (reload_completed && cfun_frame_layout.frame_size > 0)
9573 || cfun->calls_alloca);
9574
9575 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9576
9577 for (i = 6; i < 16; i++)
9578 if (clobbered_regs[i])
9579 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9580
9581 s390_register_info_stdarg_fpr ();
9582 s390_register_info_gprtofpr ();
9583 s390_register_info_set_ranges ();
9584 /* stdarg functions might need to save GPRs 2 to 6. This might
9585 override the GPR->FPR save decision made by
9586 s390_register_info_gprtofpr for r6 since vararg regs must go to
9587 the stack. */
9588 s390_register_info_stdarg_gpr ();
9589 }
9590
9591 /* Return true if REGNO is a global register, but not one
9592 of the special ones that need to be saved/restored in anyway. */
9593
9594 static inline bool
9595 global_not_special_regno_p (int regno)
9596 {
9597 return (global_regs[regno]
9598 /* These registers are special and need to be
9599 restored in any case. */
9600 && !(regno == STACK_POINTER_REGNUM
9601 || regno == RETURN_REGNUM
9602 || regno == BASE_REGNUM
9603 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9604 }
9605
9606 /* This function is called by s390_optimize_prologue in order to get
9607 rid of unnecessary GPR save/restore instructions. The register info
9608 for the GPRs is re-computed and the ranges are re-calculated. */
9609
9610 static void
9611 s390_optimize_register_info ()
9612 {
9613 char clobbered_regs[32];
9614 int i;
9615
9616 gcc_assert (epilogue_completed);
9617
9618 s390_regs_ever_clobbered (clobbered_regs);
9619
9620 /* Global registers do not need to be saved and restored unless it
9621 is one of our special regs. (r12, r13, r14, or r15). */
9622 for (i = 0; i < 32; i++)
9623 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9624
9625 /* There is still special treatment needed for cases invisible to
9626 s390_regs_ever_clobbered. */
9627 clobbered_regs[RETURN_REGNUM]
9628 |= (TARGET_TPF_PROFILING
9629 /* When expanding builtin_return_addr in ESA mode we do not
9630 know whether r14 will later be needed as scratch reg when
9631 doing branch splitting. So the builtin always accesses the
9632 r14 save slot and we need to stick to the save/restore
9633 decision for r14 even if it turns out that it didn't get
9634 clobbered. */
9635 || cfun_frame_layout.save_return_addr_p
9636 || crtl->calls_eh_return);
9637
9638 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9639
9640 for (i = 6; i < 16; i++)
9641 if (!clobbered_regs[i])
9642 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9643
9644 s390_register_info_set_ranges ();
9645 s390_register_info_stdarg_gpr ();
9646 }
9647
9648 /* Fill cfun->machine with info about frame of current function. */
9649
9650 static void
9651 s390_frame_info (void)
9652 {
9653 HOST_WIDE_INT lowest_offset;
9654
9655 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9656 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9657
9658 /* The va_arg builtin uses a constant distance of 16 *
9659 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9660 pointer. So even if we are going to save the stack pointer in an
9661 FPR we need the stack space in order to keep the offsets
9662 correct. */
9663 if (cfun->stdarg && cfun_save_arg_fprs_p)
9664 {
9665 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9666
9667 if (cfun_frame_layout.first_save_gpr_slot == -1)
9668 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9669 }
9670
9671 cfun_frame_layout.frame_size = get_frame_size ();
9672 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9673 fatal_error (input_location,
9674 "total size of local variables exceeds architecture limit");
9675
9676 if (!TARGET_PACKED_STACK)
9677 {
9678 /* Fixed stack layout. */
9679 cfun_frame_layout.backchain_offset = 0;
9680 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9681 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9682 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9683 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9684 * UNITS_PER_LONG);
9685 }
9686 else if (TARGET_BACKCHAIN)
9687 {
9688 /* Kernel stack layout - packed stack, backchain, no float */
9689 gcc_assert (TARGET_SOFT_FLOAT);
9690 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9691 - UNITS_PER_LONG);
9692
9693 /* The distance between the backchain and the return address
9694 save slot must not change. So we always need a slot for the
9695 stack pointer which resides in between. */
9696 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9697
9698 cfun_frame_layout.gprs_offset
9699 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9700
9701 /* FPRs will not be saved. Nevertheless pick sane values to
9702 keep area calculations valid. */
9703 cfun_frame_layout.f0_offset =
9704 cfun_frame_layout.f4_offset =
9705 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9706 }
9707 else
9708 {
9709 int num_fprs;
9710
9711 /* Packed stack layout without backchain. */
9712
9713 /* With stdarg FPRs need their dedicated slots. */
9714 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9715 : (cfun_fpr_save_p (FPR4_REGNUM) +
9716 cfun_fpr_save_p (FPR6_REGNUM)));
9717 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9718
9719 num_fprs = (cfun->stdarg ? 2
9720 : (cfun_fpr_save_p (FPR0_REGNUM)
9721 + cfun_fpr_save_p (FPR2_REGNUM)));
9722 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9723
9724 cfun_frame_layout.gprs_offset
9725 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9726
9727 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9728 - cfun_frame_layout.high_fprs * 8);
9729 }
9730
9731 if (cfun_save_high_fprs_p)
9732 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9733
9734 if (!crtl->is_leaf)
9735 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9736
9737 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9738 sized area at the bottom of the stack. This is required also for
9739 leaf functions. When GCC generates a local stack reference it
9740 will always add STACK_POINTER_OFFSET to all these references. */
9741 if (crtl->is_leaf
9742 && !TARGET_TPF_PROFILING
9743 && cfun_frame_layout.frame_size == 0
9744 && !cfun->calls_alloca)
9745 return;
9746
9747 /* Calculate the number of bytes we have used in our own register
9748 save area. With the packed stack layout we can re-use the
9749 remaining bytes for normal stack elements. */
9750
9751 if (TARGET_PACKED_STACK)
9752 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9753 cfun_frame_layout.f4_offset),
9754 cfun_frame_layout.gprs_offset);
9755 else
9756 lowest_offset = 0;
9757
9758 if (TARGET_BACKCHAIN)
9759 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9760
9761 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9762
9763 /* If under 31 bit an odd number of gprs has to be saved we have to
9764 adjust the frame size to sustain 8 byte alignment of stack
9765 frames. */
9766 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9767 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9768 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9769 }
9770
9771 /* Generate frame layout. Fills in register and frame data for the current
9772 function in cfun->machine. This routine can be called multiple times;
9773 it will re-do the complete frame layout every time. */
9774
9775 static void
9776 s390_init_frame_layout (void)
9777 {
9778 HOST_WIDE_INT frame_size;
9779 int base_used;
9780
9781 /* After LRA the frame layout is supposed to be read-only and should
9782 not be re-computed. */
9783 if (reload_completed)
9784 return;
9785
9786 do
9787 {
9788 frame_size = cfun_frame_layout.frame_size;
9789
9790 /* Try to predict whether we'll need the base register. */
9791 base_used = crtl->uses_const_pool
9792 || (!DISP_IN_RANGE (frame_size)
9793 && !CONST_OK_FOR_K (frame_size));
9794
9795 /* Decide which register to use as literal pool base. In small
9796 leaf functions, try to use an unused call-clobbered register
9797 as base register to avoid save/restore overhead. */
9798 if (!base_used)
9799 cfun->machine->base_reg = NULL_RTX;
9800 else
9801 {
9802 int br = 0;
9803
9804 if (crtl->is_leaf)
9805 /* Prefer r5 (most likely to be free). */
9806 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9807 ;
9808 cfun->machine->base_reg =
9809 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9810 }
9811
9812 s390_register_info ();
9813 s390_frame_info ();
9814 }
9815 while (frame_size != cfun_frame_layout.frame_size);
9816 }
9817
9818 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9819 the TX is nonescaping. A transaction is considered escaping if
9820 there is at least one path from tbegin returning CC0 to the
9821 function exit block without an tend.
9822
9823 The check so far has some limitations:
9824 - only single tbegin/tend BBs are supported
9825 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9826 - when CC is copied to a GPR and the CC0 check is done with the GPR
9827 this is not supported
9828 */
9829
9830 static void
9831 s390_optimize_nonescaping_tx (void)
9832 {
9833 const unsigned int CC0 = 1 << 3;
9834 basic_block tbegin_bb = NULL;
9835 basic_block tend_bb = NULL;
9836 basic_block bb;
9837 rtx_insn *insn;
9838 bool result = true;
9839 int bb_index;
9840 rtx_insn *tbegin_insn = NULL;
9841
9842 if (!cfun->machine->tbegin_p)
9843 return;
9844
9845 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9846 {
9847 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9848
9849 if (!bb)
9850 continue;
9851
9852 FOR_BB_INSNS (bb, insn)
9853 {
9854 rtx ite, cc, pat, target;
9855 unsigned HOST_WIDE_INT mask;
9856
9857 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9858 continue;
9859
9860 pat = PATTERN (insn);
9861
9862 if (GET_CODE (pat) == PARALLEL)
9863 pat = XVECEXP (pat, 0, 0);
9864
9865 if (GET_CODE (pat) != SET
9866 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9867 continue;
9868
9869 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9870 {
9871 rtx_insn *tmp;
9872
9873 tbegin_insn = insn;
9874
9875 /* Just return if the tbegin doesn't have clobbers. */
9876 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9877 return;
9878
9879 if (tbegin_bb != NULL)
9880 return;
9881
9882 /* Find the next conditional jump. */
9883 for (tmp = NEXT_INSN (insn);
9884 tmp != NULL_RTX;
9885 tmp = NEXT_INSN (tmp))
9886 {
9887 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9888 return;
9889 if (!JUMP_P (tmp))
9890 continue;
9891
9892 ite = SET_SRC (PATTERN (tmp));
9893 if (GET_CODE (ite) != IF_THEN_ELSE)
9894 continue;
9895
9896 cc = XEXP (XEXP (ite, 0), 0);
9897 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9898 || GET_MODE (cc) != CCRAWmode
9899 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9900 return;
9901
9902 if (bb->succs->length () != 2)
9903 return;
9904
9905 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9906 if (GET_CODE (XEXP (ite, 0)) == NE)
9907 mask ^= 0xf;
9908
9909 if (mask == CC0)
9910 target = XEXP (ite, 1);
9911 else if (mask == (CC0 ^ 0xf))
9912 target = XEXP (ite, 2);
9913 else
9914 return;
9915
9916 {
9917 edge_iterator ei;
9918 edge e1, e2;
9919
9920 ei = ei_start (bb->succs);
9921 e1 = ei_safe_edge (ei);
9922 ei_next (&ei);
9923 e2 = ei_safe_edge (ei);
9924
9925 if (e2->flags & EDGE_FALLTHRU)
9926 {
9927 e2 = e1;
9928 e1 = ei_safe_edge (ei);
9929 }
9930
9931 if (!(e1->flags & EDGE_FALLTHRU))
9932 return;
9933
9934 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9935 }
9936 if (tmp == BB_END (bb))
9937 break;
9938 }
9939 }
9940
9941 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9942 {
9943 if (tend_bb != NULL)
9944 return;
9945 tend_bb = bb;
9946 }
9947 }
9948 }
9949
9950 /* Either we successfully remove the FPR clobbers here or we are not
9951 able to do anything for this TX. Both cases don't qualify for
9952 another look. */
9953 cfun->machine->tbegin_p = false;
9954
9955 if (tbegin_bb == NULL || tend_bb == NULL)
9956 return;
9957
9958 calculate_dominance_info (CDI_POST_DOMINATORS);
9959 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9960 free_dominance_info (CDI_POST_DOMINATORS);
9961
9962 if (!result)
9963 return;
9964
9965 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9966 gen_rtvec (2,
9967 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9968 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9969 INSN_CODE (tbegin_insn) = -1;
9970 df_insn_rescan (tbegin_insn);
9971
9972 return;
9973 }
9974
9975 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
9976 have the same size, this is equivalent to CLASS_MAX_NREGS. */
9977
9978 static unsigned int
9979 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
9980 {
9981 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
9982 }
9983
9984 /* Implement TARGET_HARD_REGNO_MODE_OK.
9985
9986 Integer modes <= word size fit into any GPR.
9987 Integer modes > word size fit into successive GPRs, starting with
9988 an even-numbered register.
9989 SImode and DImode fit into FPRs as well.
9990
9991 Floating point modes <= word size fit into any FPR or GPR.
9992 Floating point modes > word size (i.e. DFmode on 32-bit) fit
9993 into any FPR, or an even-odd GPR pair.
9994 TFmode fits only into an even-odd FPR pair.
9995
9996 Complex floating point modes fit either into two FPRs, or into
9997 successive GPRs (again starting with an even number).
9998 TCmode fits only into two successive even-odd FPR pairs.
9999
10000 Condition code modes fit only into the CC register. */
10001
10002 static bool
10003 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10004 {
10005 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10006 return false;
10007
10008 switch (REGNO_REG_CLASS (regno))
10009 {
10010 case VEC_REGS:
10011 return ((GET_MODE_CLASS (mode) == MODE_INT
10012 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10013 || mode == DFmode
10014 || (TARGET_VXE && mode == SFmode)
10015 || s390_vector_mode_supported_p (mode));
10016 break;
10017 case FP_REGS:
10018 if (TARGET_VX
10019 && ((GET_MODE_CLASS (mode) == MODE_INT
10020 && s390_class_max_nregs (FP_REGS, mode) == 1)
10021 || mode == DFmode
10022 || s390_vector_mode_supported_p (mode)))
10023 return true;
10024
10025 if (REGNO_PAIR_OK (regno, mode))
10026 {
10027 if (mode == SImode || mode == DImode)
10028 return true;
10029
10030 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10031 return true;
10032 }
10033 break;
10034 case ADDR_REGS:
10035 if (FRAME_REGNO_P (regno) && mode == Pmode)
10036 return true;
10037
10038 /* fallthrough */
10039 case GENERAL_REGS:
10040 if (REGNO_PAIR_OK (regno, mode))
10041 {
10042 if (TARGET_ZARCH
10043 || (mode != TFmode && mode != TCmode && mode != TDmode))
10044 return true;
10045 }
10046 break;
10047 case CC_REGS:
10048 if (GET_MODE_CLASS (mode) == MODE_CC)
10049 return true;
10050 break;
10051 case ACCESS_REGS:
10052 if (REGNO_PAIR_OK (regno, mode))
10053 {
10054 if (mode == SImode || mode == Pmode)
10055 return true;
10056 }
10057 break;
10058 default:
10059 return false;
10060 }
10061
10062 return false;
10063 }
10064
10065 /* Implement TARGET_MODES_TIEABLE_P. */
10066
10067 static bool
10068 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10069 {
10070 return ((mode1 == SFmode || mode1 == DFmode)
10071 == (mode2 == SFmode || mode2 == DFmode));
10072 }
10073
10074 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10075
10076 bool
10077 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10078 {
10079 /* Once we've decided upon a register to use as base register, it must
10080 no longer be used for any other purpose. */
10081 if (cfun->machine->base_reg)
10082 if (REGNO (cfun->machine->base_reg) == old_reg
10083 || REGNO (cfun->machine->base_reg) == new_reg)
10084 return false;
10085
10086 /* Prevent regrename from using call-saved regs which haven't
10087 actually been saved. This is necessary since regrename assumes
10088 the backend save/restore decisions are based on
10089 df_regs_ever_live. Since we have our own routine we have to tell
10090 regrename manually about it. */
10091 if (GENERAL_REGNO_P (new_reg)
10092 && !call_really_used_regs[new_reg]
10093 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10094 return false;
10095
10096 return true;
10097 }
10098
10099 /* Return nonzero if register REGNO can be used as a scratch register
10100 in peephole2. */
10101
10102 static bool
10103 s390_hard_regno_scratch_ok (unsigned int regno)
10104 {
10105 /* See s390_hard_regno_rename_ok. */
10106 if (GENERAL_REGNO_P (regno)
10107 && !call_really_used_regs[regno]
10108 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10109 return false;
10110
10111 return true;
10112 }
10113
10114 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10115 code that runs in z/Architecture mode, but conforms to the 31-bit
10116 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10117 bytes are saved across calls, however. */
10118
10119 static bool
10120 s390_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED,
10121 unsigned int regno, machine_mode mode)
10122 {
10123 if (!TARGET_64BIT
10124 && TARGET_ZARCH
10125 && GET_MODE_SIZE (mode) > 4
10126 && ((regno >= 6 && regno <= 15) || regno == 32))
10127 return true;
10128
10129 if (TARGET_VX
10130 && GET_MODE_SIZE (mode) > 8
10131 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10132 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10133 return true;
10134
10135 return false;
10136 }
10137
10138 /* Maximum number of registers to represent a value of mode MODE
10139 in a register of class RCLASS. */
10140
10141 int
10142 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10143 {
10144 int reg_size;
10145 bool reg_pair_required_p = false;
10146
10147 switch (rclass)
10148 {
10149 case FP_REGS:
10150 case VEC_REGS:
10151 reg_size = TARGET_VX ? 16 : 8;
10152
10153 /* TF and TD modes would fit into a VR but we put them into a
10154 register pair since we do not have 128bit FP instructions on
10155 full VRs. */
10156 if (TARGET_VX
10157 && SCALAR_FLOAT_MODE_P (mode)
10158 && GET_MODE_SIZE (mode) >= 16)
10159 reg_pair_required_p = true;
10160
10161 /* Even if complex types would fit into a single FPR/VR we force
10162 them into a register pair to deal with the parts more easily.
10163 (FIXME: What about complex ints?) */
10164 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10165 reg_pair_required_p = true;
10166 break;
10167 case ACCESS_REGS:
10168 reg_size = 4;
10169 break;
10170 default:
10171 reg_size = UNITS_PER_WORD;
10172 break;
10173 }
10174
10175 if (reg_pair_required_p)
10176 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10177
10178 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10179 }
10180
10181 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10182
10183 static bool
10184 s390_can_change_mode_class (machine_mode from_mode,
10185 machine_mode to_mode,
10186 reg_class_t rclass)
10187 {
10188 machine_mode small_mode;
10189 machine_mode big_mode;
10190
10191 /* V1TF and TF have different representations in vector
10192 registers. */
10193 if (reg_classes_intersect_p (VEC_REGS, rclass)
10194 && ((from_mode == V1TFmode && to_mode == TFmode)
10195 || (from_mode == TFmode && to_mode == V1TFmode)))
10196 return false;
10197
10198 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10199 return true;
10200
10201 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10202 {
10203 small_mode = from_mode;
10204 big_mode = to_mode;
10205 }
10206 else
10207 {
10208 small_mode = to_mode;
10209 big_mode = from_mode;
10210 }
10211
10212 /* Values residing in VRs are little-endian style. All modes are
10213 placed left-aligned in an VR. This means that we cannot allow
10214 switching between modes with differing sizes. Also if the vector
10215 facility is available we still place TFmode values in VR register
10216 pairs, since the only instructions we have operating on TFmodes
10217 only deal with register pairs. Therefore we have to allow DFmode
10218 subregs of TFmodes to enable the TFmode splitters. */
10219 if (reg_classes_intersect_p (VEC_REGS, rclass)
10220 && (GET_MODE_SIZE (small_mode) < 8
10221 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10222 return false;
10223
10224 /* Likewise for access registers, since they have only half the
10225 word size on 64-bit. */
10226 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10227 return false;
10228
10229 return true;
10230 }
10231
10232 /* Return true if we use LRA instead of reload pass. */
10233 static bool
10234 s390_lra_p (void)
10235 {
10236 return s390_lra_flag;
10237 }
10238
10239 /* Return true if register FROM can be eliminated via register TO. */
10240
10241 static bool
10242 s390_can_eliminate (const int from, const int to)
10243 {
10244 /* We have not marked the base register as fixed.
10245 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10246 If a function requires the base register, we say here that this
10247 elimination cannot be performed. This will cause reload to free
10248 up the base register (as if it were fixed). On the other hand,
10249 if the current function does *not* require the base register, we
10250 say here the elimination succeeds, which in turn allows reload
10251 to allocate the base register for any other purpose. */
10252 if (from == BASE_REGNUM && to == BASE_REGNUM)
10253 {
10254 s390_init_frame_layout ();
10255 return cfun->machine->base_reg == NULL_RTX;
10256 }
10257
10258 /* Everything else must point into the stack frame. */
10259 gcc_assert (to == STACK_POINTER_REGNUM
10260 || to == HARD_FRAME_POINTER_REGNUM);
10261
10262 gcc_assert (from == FRAME_POINTER_REGNUM
10263 || from == ARG_POINTER_REGNUM
10264 || from == RETURN_ADDRESS_POINTER_REGNUM);
10265
10266 /* Make sure we actually saved the return address. */
10267 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10268 if (!crtl->calls_eh_return
10269 && !cfun->stdarg
10270 && !cfun_frame_layout.save_return_addr_p)
10271 return false;
10272
10273 return true;
10274 }
10275
10276 /* Return offset between register FROM and TO initially after prolog. */
10277
10278 HOST_WIDE_INT
10279 s390_initial_elimination_offset (int from, int to)
10280 {
10281 HOST_WIDE_INT offset;
10282
10283 /* ??? Why are we called for non-eliminable pairs? */
10284 if (!s390_can_eliminate (from, to))
10285 return 0;
10286
10287 switch (from)
10288 {
10289 case FRAME_POINTER_REGNUM:
10290 offset = (get_frame_size()
10291 + STACK_POINTER_OFFSET
10292 + crtl->outgoing_args_size);
10293 break;
10294
10295 case ARG_POINTER_REGNUM:
10296 s390_init_frame_layout ();
10297 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10298 break;
10299
10300 case RETURN_ADDRESS_POINTER_REGNUM:
10301 s390_init_frame_layout ();
10302
10303 if (cfun_frame_layout.first_save_gpr_slot == -1)
10304 {
10305 /* If it turns out that for stdarg nothing went into the reg
10306 save area we also do not need the return address
10307 pointer. */
10308 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10309 return 0;
10310
10311 gcc_unreachable ();
10312 }
10313
10314 /* In order to make the following work it is not necessary for
10315 r14 to have a save slot. It is sufficient if one other GPR
10316 got one. Since the GPRs are always stored without gaps we
10317 are able to calculate where the r14 save slot would
10318 reside. */
10319 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10320 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10321 UNITS_PER_LONG);
10322 break;
10323
10324 case BASE_REGNUM:
10325 offset = 0;
10326 break;
10327
10328 default:
10329 gcc_unreachable ();
10330 }
10331
10332 return offset;
10333 }
10334
10335 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10336 to register BASE. Return generated insn. */
10337
10338 static rtx
10339 save_fpr (rtx base, int offset, int regnum)
10340 {
10341 rtx addr;
10342 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10343
10344 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10345 set_mem_alias_set (addr, get_varargs_alias_set ());
10346 else
10347 set_mem_alias_set (addr, get_frame_alias_set ());
10348
10349 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10350 }
10351
10352 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10353 to register BASE. Return generated insn. */
10354
10355 static rtx
10356 restore_fpr (rtx base, int offset, int regnum)
10357 {
10358 rtx addr;
10359 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10360 set_mem_alias_set (addr, get_frame_alias_set ());
10361
10362 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10363 }
10364
10365 /* Generate insn to save registers FIRST to LAST into
10366 the register save area located at offset OFFSET
10367 relative to register BASE. */
10368
10369 static rtx
10370 save_gprs (rtx base, int offset, int first, int last)
10371 {
10372 rtx addr, insn, note;
10373 int i;
10374
10375 addr = plus_constant (Pmode, base, offset);
10376 addr = gen_rtx_MEM (Pmode, addr);
10377
10378 set_mem_alias_set (addr, get_frame_alias_set ());
10379
10380 /* Special-case single register. */
10381 if (first == last)
10382 {
10383 if (TARGET_64BIT)
10384 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10385 else
10386 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10387
10388 if (!global_not_special_regno_p (first))
10389 RTX_FRAME_RELATED_P (insn) = 1;
10390 return insn;
10391 }
10392
10393
10394 insn = gen_store_multiple (addr,
10395 gen_rtx_REG (Pmode, first),
10396 GEN_INT (last - first + 1));
10397
10398 if (first <= 6 && cfun->stdarg)
10399 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10400 {
10401 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10402
10403 if (first + i <= 6)
10404 set_mem_alias_set (mem, get_varargs_alias_set ());
10405 }
10406
10407 /* We need to set the FRAME_RELATED flag on all SETs
10408 inside the store-multiple pattern.
10409
10410 However, we must not emit DWARF records for registers 2..5
10411 if they are stored for use by variable arguments ...
10412
10413 ??? Unfortunately, it is not enough to simply not the
10414 FRAME_RELATED flags for those SETs, because the first SET
10415 of the PARALLEL is always treated as if it had the flag
10416 set, even if it does not. Therefore we emit a new pattern
10417 without those registers as REG_FRAME_RELATED_EXPR note. */
10418
10419 if (first >= 6 && !global_not_special_regno_p (first))
10420 {
10421 rtx pat = PATTERN (insn);
10422
10423 for (i = 0; i < XVECLEN (pat, 0); i++)
10424 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10425 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10426 0, i)))))
10427 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10428
10429 RTX_FRAME_RELATED_P (insn) = 1;
10430 }
10431 else if (last >= 6)
10432 {
10433 int start;
10434
10435 for (start = first >= 6 ? first : 6; start <= last; start++)
10436 if (!global_not_special_regno_p (start))
10437 break;
10438
10439 if (start > last)
10440 return insn;
10441
10442 addr = plus_constant (Pmode, base,
10443 offset + (start - first) * UNITS_PER_LONG);
10444
10445 if (start == last)
10446 {
10447 if (TARGET_64BIT)
10448 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10449 gen_rtx_REG (Pmode, start));
10450 else
10451 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10452 gen_rtx_REG (Pmode, start));
10453 note = PATTERN (note);
10454
10455 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10456 RTX_FRAME_RELATED_P (insn) = 1;
10457
10458 return insn;
10459 }
10460
10461 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10462 gen_rtx_REG (Pmode, start),
10463 GEN_INT (last - start + 1));
10464 note = PATTERN (note);
10465
10466 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10467
10468 for (i = 0; i < XVECLEN (note, 0); i++)
10469 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10470 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10471 0, i)))))
10472 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10473
10474 RTX_FRAME_RELATED_P (insn) = 1;
10475 }
10476
10477 return insn;
10478 }
10479
10480 /* Generate insn to restore registers FIRST to LAST from
10481 the register save area located at offset OFFSET
10482 relative to register BASE. */
10483
10484 static rtx
10485 restore_gprs (rtx base, int offset, int first, int last)
10486 {
10487 rtx addr, insn;
10488
10489 addr = plus_constant (Pmode, base, offset);
10490 addr = gen_rtx_MEM (Pmode, addr);
10491 set_mem_alias_set (addr, get_frame_alias_set ());
10492
10493 /* Special-case single register. */
10494 if (first == last)
10495 {
10496 if (TARGET_64BIT)
10497 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10498 else
10499 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10500
10501 RTX_FRAME_RELATED_P (insn) = 1;
10502 return insn;
10503 }
10504
10505 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10506 addr,
10507 GEN_INT (last - first + 1));
10508 RTX_FRAME_RELATED_P (insn) = 1;
10509 return insn;
10510 }
10511
10512 /* Return insn sequence to load the GOT register. */
10513
10514 rtx_insn *
10515 s390_load_got (void)
10516 {
10517 rtx_insn *insns;
10518
10519 /* We cannot use pic_offset_table_rtx here since we use this
10520 function also for non-pic if __tls_get_offset is called and in
10521 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10522 aren't usable. */
10523 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10524
10525 start_sequence ();
10526
10527 emit_move_insn (got_rtx, s390_got_symbol ());
10528
10529 insns = get_insns ();
10530 end_sequence ();
10531 return insns;
10532 }
10533
10534 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10535 and the change to the stack pointer. */
10536
10537 static void
10538 s390_emit_stack_tie (void)
10539 {
10540 rtx mem = gen_frame_mem (BLKmode,
10541 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10542
10543 emit_insn (gen_stack_tie (mem));
10544 }
10545
10546 /* Copy GPRS into FPR save slots. */
10547
10548 static void
10549 s390_save_gprs_to_fprs (void)
10550 {
10551 int i;
10552
10553 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10554 return;
10555
10556 for (i = 6; i < 16; i++)
10557 {
10558 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10559 {
10560 rtx_insn *insn =
10561 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10562 gen_rtx_REG (DImode, i));
10563 RTX_FRAME_RELATED_P (insn) = 1;
10564 /* This prevents dwarf2cfi from interpreting the set. Doing
10565 so it might emit def_cfa_register infos setting an FPR as
10566 new CFA. */
10567 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10568 }
10569 }
10570 }
10571
10572 /* Restore GPRs from FPR save slots. */
10573
10574 static void
10575 s390_restore_gprs_from_fprs (void)
10576 {
10577 int i;
10578
10579 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10580 return;
10581
10582 for (i = 6; i < 16; i++)
10583 {
10584 rtx_insn *insn;
10585
10586 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10587 continue;
10588
10589 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10590
10591 if (i == STACK_POINTER_REGNUM)
10592 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10593 else
10594 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10595
10596 df_set_regs_ever_live (i, true);
10597 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10598 if (i == STACK_POINTER_REGNUM)
10599 add_reg_note (insn, REG_CFA_DEF_CFA,
10600 plus_constant (Pmode, stack_pointer_rtx,
10601 STACK_POINTER_OFFSET));
10602 RTX_FRAME_RELATED_P (insn) = 1;
10603 }
10604 }
10605
10606
10607 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10608 generation. */
10609
10610 namespace {
10611
10612 const pass_data pass_data_s390_early_mach =
10613 {
10614 RTL_PASS, /* type */
10615 "early_mach", /* name */
10616 OPTGROUP_NONE, /* optinfo_flags */
10617 TV_MACH_DEP, /* tv_id */
10618 0, /* properties_required */
10619 0, /* properties_provided */
10620 0, /* properties_destroyed */
10621 0, /* todo_flags_start */
10622 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10623 };
10624
10625 class pass_s390_early_mach : public rtl_opt_pass
10626 {
10627 public:
10628 pass_s390_early_mach (gcc::context *ctxt)
10629 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10630 {}
10631
10632 /* opt_pass methods: */
10633 virtual unsigned int execute (function *);
10634
10635 }; // class pass_s390_early_mach
10636
10637 unsigned int
10638 pass_s390_early_mach::execute (function *fun)
10639 {
10640 rtx_insn *insn;
10641
10642 /* Try to get rid of the FPR clobbers. */
10643 s390_optimize_nonescaping_tx ();
10644
10645 /* Re-compute register info. */
10646 s390_register_info ();
10647
10648 /* If we're using a base register, ensure that it is always valid for
10649 the first non-prologue instruction. */
10650 if (fun->machine->base_reg)
10651 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10652
10653 /* Annotate all constant pool references to let the scheduler know
10654 they implicitly use the base register. */
10655 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10656 if (INSN_P (insn))
10657 {
10658 annotate_constant_pool_refs (insn);
10659 df_insn_rescan (insn);
10660 }
10661 return 0;
10662 }
10663
10664 } // anon namespace
10665
10666 rtl_opt_pass *
10667 make_pass_s390_early_mach (gcc::context *ctxt)
10668 {
10669 return new pass_s390_early_mach (ctxt);
10670 }
10671
10672 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10673 - push too big immediates to the literal pool and annotate the refs
10674 - emit frame related notes for stack pointer changes. */
10675
10676 static rtx
10677 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10678 {
10679 rtx_insn *insn;
10680 rtx orig_offset = offset;
10681
10682 gcc_assert (REG_P (target));
10683 gcc_assert (REG_P (reg));
10684 gcc_assert (CONST_INT_P (offset));
10685
10686 if (offset == const0_rtx) /* lr/lgr */
10687 {
10688 insn = emit_move_insn (target, reg);
10689 }
10690 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
10691 {
10692 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10693 offset));
10694 }
10695 else
10696 {
10697 if (!satisfies_constraint_K (offset) /* ahi/aghi */
10698 && (!TARGET_EXTIMM
10699 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
10700 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10701 offset = force_const_mem (Pmode, offset);
10702
10703 if (target != reg)
10704 {
10705 insn = emit_move_insn (target, reg);
10706 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10707 }
10708
10709 insn = emit_insn (gen_add2_insn (target, offset));
10710
10711 if (!CONST_INT_P (offset))
10712 {
10713 annotate_constant_pool_refs (insn);
10714
10715 if (frame_related_p)
10716 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10717 gen_rtx_SET (target,
10718 gen_rtx_PLUS (Pmode, target,
10719 orig_offset)));
10720 }
10721 }
10722
10723 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10724
10725 /* If this is a stack adjustment and we are generating a stack clash
10726 prologue, then add a REG_STACK_CHECK note to signal that this insn
10727 should be left alone. */
10728 if (flag_stack_clash_protection && target == stack_pointer_rtx)
10729 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10730
10731 return insn;
10732 }
10733
10734 /* Emit a compare instruction with a volatile memory access as stack
10735 probe. It does not waste store tags and does not clobber any
10736 registers apart from the condition code. */
10737 static void
10738 s390_emit_stack_probe (rtx addr)
10739 {
10740 rtx tmp = gen_rtx_MEM (Pmode, addr);
10741 MEM_VOLATILE_P (tmp) = 1;
10742 s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10743 emit_insn (gen_blockage ());
10744 }
10745
10746 /* Use a runtime loop if we have to emit more probes than this. */
10747 #define MIN_UNROLL_PROBES 3
10748
10749 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10750 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
10751 probe relative to the stack pointer.
10752
10753 Note that SIZE is negative.
10754
10755 The return value is true if TEMP_REG has been clobbered. */
10756 static bool
10757 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10758 rtx temp_reg)
10759 {
10760 bool temp_reg_clobbered_p = false;
10761 HOST_WIDE_INT probe_interval
10762 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
10763 HOST_WIDE_INT guard_size
10764 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE);
10765
10766 if (flag_stack_clash_protection)
10767 {
10768 if (last_probe_offset + -INTVAL (size) < guard_size)
10769 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10770 else
10771 {
10772 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10773 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10774 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10775 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10776
10777 if (num_probes < MIN_UNROLL_PROBES)
10778 {
10779 /* Emit unrolled probe statements. */
10780
10781 for (unsigned int i = 0; i < num_probes; i++)
10782 {
10783 s390_prologue_plus_offset (stack_pointer_rtx,
10784 stack_pointer_rtx,
10785 GEN_INT (-probe_interval), true);
10786 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10787 stack_pointer_rtx,
10788 offset));
10789 }
10790 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10791 }
10792 else
10793 {
10794 /* Emit a loop probing the pages. */
10795
10796 rtx_code_label *loop_start_label = gen_label_rtx ();
10797
10798 /* From now on temp_reg will be the CFA register. */
10799 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
10800 GEN_INT (-rounded_size), true);
10801 emit_label (loop_start_label);
10802
10803 s390_prologue_plus_offset (stack_pointer_rtx,
10804 stack_pointer_rtx,
10805 GEN_INT (-probe_interval), false);
10806 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10807 stack_pointer_rtx,
10808 offset));
10809 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
10810 GT, NULL_RTX,
10811 Pmode, 1, loop_start_label);
10812
10813 /* Without this make_edges ICEes. */
10814 JUMP_LABEL (get_last_insn ()) = loop_start_label;
10815 LABEL_NUSES (loop_start_label) = 1;
10816
10817 /* That's going to be a NOP since stack pointer and
10818 temp_reg are supposed to be the same here. We just
10819 emit it to set the CFA reg back to r15. */
10820 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
10821 const0_rtx, true);
10822 temp_reg_clobbered_p = true;
10823 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
10824 }
10825
10826 /* Handle any residual allocation request. */
10827 s390_prologue_plus_offset (stack_pointer_rtx,
10828 stack_pointer_rtx,
10829 GEN_INT (-residual), true);
10830 last_probe_offset += residual;
10831 if (last_probe_offset >= probe_interval)
10832 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10833 stack_pointer_rtx,
10834 GEN_INT (residual
10835 - UNITS_PER_LONG)));
10836
10837 return temp_reg_clobbered_p;
10838 }
10839 }
10840
10841 /* Subtract frame size from stack pointer. */
10842 s390_prologue_plus_offset (stack_pointer_rtx,
10843 stack_pointer_rtx,
10844 size, true);
10845
10846 return temp_reg_clobbered_p;
10847 }
10848
10849 /* Expand the prologue into a bunch of separate insns. */
10850
10851 void
10852 s390_emit_prologue (void)
10853 {
10854 rtx insn, addr;
10855 rtx temp_reg;
10856 int i;
10857 int offset;
10858 int next_fpr = 0;
10859
10860 /* Choose best register to use for temp use within prologue.
10861 TPF with profiling must avoid the register 14 - the tracing function
10862 needs the original contents of r14 to be preserved. */
10863
10864 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10865 && !crtl->is_leaf
10866 && !TARGET_TPF_PROFILING)
10867 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10868 else if (flag_split_stack && cfun->stdarg)
10869 temp_reg = gen_rtx_REG (Pmode, 12);
10870 else
10871 temp_reg = gen_rtx_REG (Pmode, 1);
10872
10873 /* When probing for stack-clash mitigation, we have to track the distance
10874 between the stack pointer and closest known reference.
10875
10876 Most of the time we have to make a worst case assumption. The
10877 only exception is when TARGET_BACKCHAIN is active, in which case
10878 we know *sp (offset 0) was written. */
10879 HOST_WIDE_INT probe_interval
10880 = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
10881 HOST_WIDE_INT last_probe_offset
10882 = (TARGET_BACKCHAIN
10883 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
10884 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
10885
10886 s390_save_gprs_to_fprs ();
10887
10888 /* Save call saved gprs. */
10889 if (cfun_frame_layout.first_save_gpr != -1)
10890 {
10891 insn = save_gprs (stack_pointer_rtx,
10892 cfun_frame_layout.gprs_offset +
10893 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10894 - cfun_frame_layout.first_save_gpr_slot),
10895 cfun_frame_layout.first_save_gpr,
10896 cfun_frame_layout.last_save_gpr);
10897
10898 /* This is not 100% correct. If we have more than one register saved,
10899 then LAST_PROBE_OFFSET can move even closer to sp. */
10900 last_probe_offset
10901 = (cfun_frame_layout.gprs_offset +
10902 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10903 - cfun_frame_layout.first_save_gpr_slot));
10904
10905 emit_insn (insn);
10906 }
10907
10908 /* Dummy insn to mark literal pool slot. */
10909
10910 if (cfun->machine->base_reg)
10911 emit_insn (gen_main_pool (cfun->machine->base_reg));
10912
10913 offset = cfun_frame_layout.f0_offset;
10914
10915 /* Save f0 and f2. */
10916 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10917 {
10918 if (cfun_fpr_save_p (i))
10919 {
10920 save_fpr (stack_pointer_rtx, offset, i);
10921 if (offset < last_probe_offset)
10922 last_probe_offset = offset;
10923 offset += 8;
10924 }
10925 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10926 offset += 8;
10927 }
10928
10929 /* Save f4 and f6. */
10930 offset = cfun_frame_layout.f4_offset;
10931 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10932 {
10933 if (cfun_fpr_save_p (i))
10934 {
10935 insn = save_fpr (stack_pointer_rtx, offset, i);
10936 if (offset < last_probe_offset)
10937 last_probe_offset = offset;
10938 offset += 8;
10939
10940 /* If f4 and f6 are call clobbered they are saved due to
10941 stdargs and therefore are not frame related. */
10942 if (!call_really_used_regs[i])
10943 RTX_FRAME_RELATED_P (insn) = 1;
10944 }
10945 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10946 offset += 8;
10947 }
10948
10949 if (TARGET_PACKED_STACK
10950 && cfun_save_high_fprs_p
10951 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10952 {
10953 offset = (cfun_frame_layout.f8_offset
10954 + (cfun_frame_layout.high_fprs - 1) * 8);
10955
10956 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10957 if (cfun_fpr_save_p (i))
10958 {
10959 insn = save_fpr (stack_pointer_rtx, offset, i);
10960 if (offset < last_probe_offset)
10961 last_probe_offset = offset;
10962
10963 RTX_FRAME_RELATED_P (insn) = 1;
10964 offset -= 8;
10965 }
10966 if (offset >= cfun_frame_layout.f8_offset)
10967 next_fpr = i;
10968 }
10969
10970 if (!TARGET_PACKED_STACK)
10971 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10972
10973 if (flag_stack_usage_info)
10974 current_function_static_stack_size = cfun_frame_layout.frame_size;
10975
10976 /* Decrement stack pointer. */
10977
10978 if (cfun_frame_layout.frame_size > 0)
10979 {
10980 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10981 rtx_insn *stack_pointer_backup_loc;
10982 bool temp_reg_clobbered_p;
10983
10984 if (s390_stack_size)
10985 {
10986 HOST_WIDE_INT stack_guard;
10987
10988 if (s390_stack_guard)
10989 stack_guard = s390_stack_guard;
10990 else
10991 {
10992 /* If no value for stack guard is provided the smallest power of 2
10993 larger than the current frame size is chosen. */
10994 stack_guard = 1;
10995 while (stack_guard < cfun_frame_layout.frame_size)
10996 stack_guard <<= 1;
10997 }
10998
10999 if (cfun_frame_layout.frame_size >= s390_stack_size)
11000 {
11001 warning (0, "frame size of function %qs is %wd"
11002 " bytes exceeding user provided stack limit of "
11003 "%d bytes. "
11004 "An unconditional trap is added.",
11005 current_function_name(), cfun_frame_layout.frame_size,
11006 s390_stack_size);
11007 emit_insn (gen_trap ());
11008 emit_barrier ();
11009 }
11010 else
11011 {
11012 /* stack_guard has to be smaller than s390_stack_size.
11013 Otherwise we would emit an AND with zero which would
11014 not match the test under mask pattern. */
11015 if (stack_guard >= s390_stack_size)
11016 {
11017 warning (0, "frame size of function %qs is %wd"
11018 " bytes which is more than half the stack size. "
11019 "The dynamic check would not be reliable. "
11020 "No check emitted for this function.",
11021 current_function_name(),
11022 cfun_frame_layout.frame_size);
11023 }
11024 else
11025 {
11026 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11027 & ~(stack_guard - 1));
11028
11029 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11030 GEN_INT (stack_check_mask));
11031 if (TARGET_64BIT)
11032 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11033 t, const0_rtx),
11034 t, const0_rtx, const0_rtx));
11035 else
11036 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11037 t, const0_rtx),
11038 t, const0_rtx, const0_rtx));
11039 }
11040 }
11041 }
11042
11043 if (s390_warn_framesize > 0
11044 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11045 warning (0, "frame size of %qs is %wd bytes",
11046 current_function_name (), cfun_frame_layout.frame_size);
11047
11048 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11049 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11050
11051 /* Save the location where we could backup the incoming stack
11052 pointer. */
11053 stack_pointer_backup_loc = get_last_insn ();
11054
11055 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11056 temp_reg);
11057
11058 if (TARGET_BACKCHAIN || next_fpr)
11059 {
11060 if (temp_reg_clobbered_p)
11061 {
11062 /* allocate_stack_space had to make use of temp_reg and
11063 we need it to hold a backup of the incoming stack
11064 pointer. Calculate back that value from the current
11065 stack pointer. */
11066 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11067 GEN_INT (cfun_frame_layout.frame_size),
11068 false);
11069 }
11070 else
11071 {
11072 /* allocate_stack_space didn't actually required
11073 temp_reg. Insert the stack pointer backup insn
11074 before the stack pointer decrement code - knowing now
11075 that the value will survive. */
11076 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11077 stack_pointer_backup_loc);
11078 }
11079 }
11080
11081 /* Set backchain. */
11082
11083 if (TARGET_BACKCHAIN)
11084 {
11085 if (cfun_frame_layout.backchain_offset)
11086 addr = gen_rtx_MEM (Pmode,
11087 plus_constant (Pmode, stack_pointer_rtx,
11088 cfun_frame_layout.backchain_offset));
11089 else
11090 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11091 set_mem_alias_set (addr, get_frame_alias_set ());
11092 insn = emit_insn (gen_move_insn (addr, temp_reg));
11093 }
11094
11095 /* If we support non-call exceptions (e.g. for Java),
11096 we need to make sure the backchain pointer is set up
11097 before any possibly trapping memory access. */
11098 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11099 {
11100 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11101 emit_clobber (addr);
11102 }
11103 }
11104 else if (flag_stack_clash_protection)
11105 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11106
11107 /* Save fprs 8 - 15 (64 bit ABI). */
11108
11109 if (cfun_save_high_fprs_p && next_fpr)
11110 {
11111 /* If the stack might be accessed through a different register
11112 we have to make sure that the stack pointer decrement is not
11113 moved below the use of the stack slots. */
11114 s390_emit_stack_tie ();
11115
11116 insn = emit_insn (gen_add2_insn (temp_reg,
11117 GEN_INT (cfun_frame_layout.f8_offset)));
11118
11119 offset = 0;
11120
11121 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11122 if (cfun_fpr_save_p (i))
11123 {
11124 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11125 cfun_frame_layout.frame_size
11126 + cfun_frame_layout.f8_offset
11127 + offset);
11128
11129 insn = save_fpr (temp_reg, offset, i);
11130 offset += 8;
11131 RTX_FRAME_RELATED_P (insn) = 1;
11132 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11133 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11134 gen_rtx_REG (DFmode, i)));
11135 }
11136 }
11137
11138 /* Set frame pointer, if needed. */
11139
11140 if (frame_pointer_needed)
11141 {
11142 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11143 RTX_FRAME_RELATED_P (insn) = 1;
11144 }
11145
11146 /* Set up got pointer, if needed. */
11147
11148 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11149 {
11150 rtx_insn *insns = s390_load_got ();
11151
11152 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11153 annotate_constant_pool_refs (insn);
11154
11155 emit_insn (insns);
11156 }
11157
11158 if (TARGET_TPF_PROFILING)
11159 {
11160 /* Generate a BAS instruction to serve as a function
11161 entry intercept to facilitate the use of tracing
11162 algorithms located at the branch target. */
11163 emit_insn (gen_prologue_tpf ());
11164
11165 /* Emit a blockage here so that all code
11166 lies between the profiling mechanisms. */
11167 emit_insn (gen_blockage ());
11168 }
11169 }
11170
11171 /* Expand the epilogue into a bunch of separate insns. */
11172
11173 void
11174 s390_emit_epilogue (bool sibcall)
11175 {
11176 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11177 int area_bottom, area_top, offset = 0;
11178 int next_offset;
11179 int i;
11180
11181 if (TARGET_TPF_PROFILING)
11182 {
11183
11184 /* Generate a BAS instruction to serve as a function
11185 entry intercept to facilitate the use of tracing
11186 algorithms located at the branch target. */
11187
11188 /* Emit a blockage here so that all code
11189 lies between the profiling mechanisms. */
11190 emit_insn (gen_blockage ());
11191
11192 emit_insn (gen_epilogue_tpf ());
11193 }
11194
11195 /* Check whether to use frame or stack pointer for restore. */
11196
11197 frame_pointer = (frame_pointer_needed
11198 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11199
11200 s390_frame_area (&area_bottom, &area_top);
11201
11202 /* Check whether we can access the register save area.
11203 If not, increment the frame pointer as required. */
11204
11205 if (area_top <= area_bottom)
11206 {
11207 /* Nothing to restore. */
11208 }
11209 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11210 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11211 {
11212 /* Area is in range. */
11213 offset = cfun_frame_layout.frame_size;
11214 }
11215 else
11216 {
11217 rtx_insn *insn;
11218 rtx frame_off, cfa;
11219
11220 offset = area_bottom < 0 ? -area_bottom : 0;
11221 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11222
11223 cfa = gen_rtx_SET (frame_pointer,
11224 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11225 if (DISP_IN_RANGE (INTVAL (frame_off)))
11226 {
11227 rtx set;
11228
11229 set = gen_rtx_SET (frame_pointer,
11230 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11231 insn = emit_insn (set);
11232 }
11233 else
11234 {
11235 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11236 frame_off = force_const_mem (Pmode, frame_off);
11237
11238 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11239 annotate_constant_pool_refs (insn);
11240 }
11241 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11242 RTX_FRAME_RELATED_P (insn) = 1;
11243 }
11244
11245 /* Restore call saved fprs. */
11246
11247 if (TARGET_64BIT)
11248 {
11249 if (cfun_save_high_fprs_p)
11250 {
11251 next_offset = cfun_frame_layout.f8_offset;
11252 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11253 {
11254 if (cfun_fpr_save_p (i))
11255 {
11256 restore_fpr (frame_pointer,
11257 offset + next_offset, i);
11258 cfa_restores
11259 = alloc_reg_note (REG_CFA_RESTORE,
11260 gen_rtx_REG (DFmode, i), cfa_restores);
11261 next_offset += 8;
11262 }
11263 }
11264 }
11265
11266 }
11267 else
11268 {
11269 next_offset = cfun_frame_layout.f4_offset;
11270 /* f4, f6 */
11271 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11272 {
11273 if (cfun_fpr_save_p (i))
11274 {
11275 restore_fpr (frame_pointer,
11276 offset + next_offset, i);
11277 cfa_restores
11278 = alloc_reg_note (REG_CFA_RESTORE,
11279 gen_rtx_REG (DFmode, i), cfa_restores);
11280 next_offset += 8;
11281 }
11282 else if (!TARGET_PACKED_STACK)
11283 next_offset += 8;
11284 }
11285
11286 }
11287
11288 /* Restore call saved gprs. */
11289
11290 if (cfun_frame_layout.first_restore_gpr != -1)
11291 {
11292 rtx insn, addr;
11293 int i;
11294
11295 /* Check for global register and save them
11296 to stack location from where they get restored. */
11297
11298 for (i = cfun_frame_layout.first_restore_gpr;
11299 i <= cfun_frame_layout.last_restore_gpr;
11300 i++)
11301 {
11302 if (global_not_special_regno_p (i))
11303 {
11304 addr = plus_constant (Pmode, frame_pointer,
11305 offset + cfun_frame_layout.gprs_offset
11306 + (i - cfun_frame_layout.first_save_gpr_slot)
11307 * UNITS_PER_LONG);
11308 addr = gen_rtx_MEM (Pmode, addr);
11309 set_mem_alias_set (addr, get_frame_alias_set ());
11310 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11311 }
11312 else
11313 cfa_restores
11314 = alloc_reg_note (REG_CFA_RESTORE,
11315 gen_rtx_REG (Pmode, i), cfa_restores);
11316 }
11317
11318 /* Fetch return address from stack before load multiple,
11319 this will do good for scheduling.
11320
11321 Only do this if we already decided that r14 needs to be
11322 saved to a stack slot. (And not just because r14 happens to
11323 be in between two GPRs which need saving.) Otherwise it
11324 would be difficult to take that decision back in
11325 s390_optimize_prologue.
11326
11327 This optimization is only helpful on in-order machines. */
11328 if (! sibcall
11329 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11330 && s390_tune <= PROCESSOR_2097_Z10)
11331 {
11332 int return_regnum = find_unused_clobbered_reg();
11333 if (!return_regnum
11334 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11335 && !TARGET_CPU_Z10
11336 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11337 {
11338 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11339 return_regnum = 4;
11340 }
11341 return_reg = gen_rtx_REG (Pmode, return_regnum);
11342
11343 addr = plus_constant (Pmode, frame_pointer,
11344 offset + cfun_frame_layout.gprs_offset
11345 + (RETURN_REGNUM
11346 - cfun_frame_layout.first_save_gpr_slot)
11347 * UNITS_PER_LONG);
11348 addr = gen_rtx_MEM (Pmode, addr);
11349 set_mem_alias_set (addr, get_frame_alias_set ());
11350 emit_move_insn (return_reg, addr);
11351
11352 /* Once we did that optimization we have to make sure
11353 s390_optimize_prologue does not try to remove the store
11354 of r14 since we will not be able to find the load issued
11355 here. */
11356 cfun_frame_layout.save_return_addr_p = true;
11357 }
11358
11359 insn = restore_gprs (frame_pointer,
11360 offset + cfun_frame_layout.gprs_offset
11361 + (cfun_frame_layout.first_restore_gpr
11362 - cfun_frame_layout.first_save_gpr_slot)
11363 * UNITS_PER_LONG,
11364 cfun_frame_layout.first_restore_gpr,
11365 cfun_frame_layout.last_restore_gpr);
11366 insn = emit_insn (insn);
11367 REG_NOTES (insn) = cfa_restores;
11368 add_reg_note (insn, REG_CFA_DEF_CFA,
11369 plus_constant (Pmode, stack_pointer_rtx,
11370 STACK_POINTER_OFFSET));
11371 RTX_FRAME_RELATED_P (insn) = 1;
11372 }
11373
11374 s390_restore_gprs_from_fprs ();
11375
11376 if (! sibcall)
11377 {
11378 if (!return_reg && !s390_can_use_return_insn ())
11379 /* We planned to emit (return), be we are not allowed to. */
11380 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11381
11382 if (return_reg)
11383 /* Emit (return) and (use). */
11384 emit_jump_insn (gen_return_use (return_reg));
11385 else
11386 /* The fact that RETURN_REGNUM is used is already reflected by
11387 EPILOGUE_USES. Emit plain (return). */
11388 emit_jump_insn (gen_return ());
11389 }
11390 }
11391
11392 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11393
11394 static void
11395 s300_set_up_by_prologue (hard_reg_set_container *regs)
11396 {
11397 if (cfun->machine->base_reg
11398 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11399 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11400 }
11401
11402 /* -fsplit-stack support. */
11403
11404 /* A SYMBOL_REF for __morestack. */
11405 static GTY(()) rtx morestack_ref;
11406
11407 /* When using -fsplit-stack, the allocation routines set a field in
11408 the TCB to the bottom of the stack plus this much space, measured
11409 in bytes. */
11410
11411 #define SPLIT_STACK_AVAILABLE 1024
11412
11413 /* Emit -fsplit-stack prologue, which goes before the regular function
11414 prologue. */
11415
11416 void
11417 s390_expand_split_stack_prologue (void)
11418 {
11419 rtx r1, guard, cc = NULL;
11420 rtx_insn *insn;
11421 /* Offset from thread pointer to __private_ss. */
11422 int psso = TARGET_64BIT ? 0x38 : 0x20;
11423 /* Pointer size in bytes. */
11424 /* Frame size and argument size - the two parameters to __morestack. */
11425 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11426 /* Align argument size to 8 bytes - simplifies __morestack code. */
11427 HOST_WIDE_INT args_size = crtl->args.size >= 0
11428 ? ((crtl->args.size + 7) & ~7)
11429 : 0;
11430 /* Label to be called by __morestack. */
11431 rtx_code_label *call_done = NULL;
11432 rtx_code_label *parm_base = NULL;
11433 rtx tmp;
11434
11435 gcc_assert (flag_split_stack && reload_completed);
11436
11437 r1 = gen_rtx_REG (Pmode, 1);
11438
11439 /* If no stack frame will be allocated, don't do anything. */
11440 if (!frame_size)
11441 {
11442 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11443 {
11444 /* If va_start is used, just use r15. */
11445 emit_move_insn (r1,
11446 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11447 GEN_INT (STACK_POINTER_OFFSET)));
11448
11449 }
11450 return;
11451 }
11452
11453 if (morestack_ref == NULL_RTX)
11454 {
11455 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11456 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11457 | SYMBOL_FLAG_FUNCTION);
11458 }
11459
11460 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11461 {
11462 /* If frame_size will fit in an add instruction, do a stack space
11463 check, and only call __morestack if there's not enough space. */
11464
11465 /* Get thread pointer. r1 is the only register we can always destroy - r0
11466 could contain a static chain (and cannot be used to address memory
11467 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11468 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11469 /* Aim at __private_ss. */
11470 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11471
11472 /* If less that 1kiB used, skip addition and compare directly with
11473 __private_ss. */
11474 if (frame_size > SPLIT_STACK_AVAILABLE)
11475 {
11476 emit_move_insn (r1, guard);
11477 if (TARGET_64BIT)
11478 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11479 else
11480 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11481 guard = r1;
11482 }
11483
11484 /* Compare the (maybe adjusted) guard with the stack pointer. */
11485 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11486 }
11487
11488 call_done = gen_label_rtx ();
11489 parm_base = gen_label_rtx ();
11490
11491 /* Emit the parameter block. */
11492 tmp = gen_split_stack_data (parm_base, call_done,
11493 GEN_INT (frame_size),
11494 GEN_INT (args_size));
11495 insn = emit_insn (tmp);
11496 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11497 LABEL_NUSES (call_done)++;
11498 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11499 LABEL_NUSES (parm_base)++;
11500
11501 /* %r1 = litbase. */
11502 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11503 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11504 LABEL_NUSES (parm_base)++;
11505
11506 /* Now, we need to call __morestack. It has very special calling
11507 conventions: it preserves param/return/static chain registers for
11508 calling main function body, and looks for its own parameters at %r1. */
11509
11510 if (cc != NULL)
11511 {
11512 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11513
11514 insn = emit_jump_insn (tmp);
11515 JUMP_LABEL (insn) = call_done;
11516 LABEL_NUSES (call_done)++;
11517
11518 /* Mark the jump as very unlikely to be taken. */
11519 add_reg_br_prob_note (insn,
11520 profile_probability::very_unlikely ());
11521
11522 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11523 {
11524 /* If va_start is used, and __morestack was not called, just use
11525 r15. */
11526 emit_move_insn (r1,
11527 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11528 GEN_INT (STACK_POINTER_OFFSET)));
11529 }
11530 }
11531 else
11532 {
11533 tmp = gen_split_stack_call (morestack_ref, call_done);
11534 insn = emit_jump_insn (tmp);
11535 JUMP_LABEL (insn) = call_done;
11536 LABEL_NUSES (call_done)++;
11537 emit_barrier ();
11538 }
11539
11540 /* __morestack will call us here. */
11541
11542 emit_label (call_done);
11543 }
11544
11545 /* We may have to tell the dataflow pass that the split stack prologue
11546 is initializing a register. */
11547
11548 static void
11549 s390_live_on_entry (bitmap regs)
11550 {
11551 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11552 {
11553 gcc_assert (flag_split_stack);
11554 bitmap_set_bit (regs, 1);
11555 }
11556 }
11557
11558 /* Return true if the function can use simple_return to return outside
11559 of a shrink-wrapped region. At present shrink-wrapping is supported
11560 in all cases. */
11561
11562 bool
11563 s390_can_use_simple_return_insn (void)
11564 {
11565 return true;
11566 }
11567
11568 /* Return true if the epilogue is guaranteed to contain only a return
11569 instruction and if a direct return can therefore be used instead.
11570 One of the main advantages of using direct return instructions
11571 is that we can then use conditional returns. */
11572
11573 bool
11574 s390_can_use_return_insn (void)
11575 {
11576 int i;
11577
11578 if (!reload_completed)
11579 return false;
11580
11581 if (crtl->profile)
11582 return false;
11583
11584 if (TARGET_TPF_PROFILING)
11585 return false;
11586
11587 for (i = 0; i < 16; i++)
11588 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11589 return false;
11590
11591 /* For 31 bit this is not covered by the frame_size check below
11592 since f4, f6 are saved in the register save area without needing
11593 additional stack space. */
11594 if (!TARGET_64BIT
11595 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11596 return false;
11597
11598 if (cfun->machine->base_reg
11599 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11600 return false;
11601
11602 return cfun_frame_layout.frame_size == 0;
11603 }
11604
11605 /* The VX ABI differs for vararg functions. Therefore we need the
11606 prototype of the callee to be available when passing vector type
11607 values. */
11608 static const char *
11609 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11610 {
11611 return ((TARGET_VX_ABI
11612 && typelist == 0
11613 && VECTOR_TYPE_P (TREE_TYPE (val))
11614 && (funcdecl == NULL_TREE
11615 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11616 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11617 ? N_("vector argument passed to unprototyped function")
11618 : NULL);
11619 }
11620
11621
11622 /* Return the size in bytes of a function argument of
11623 type TYPE and/or mode MODE. At least one of TYPE or
11624 MODE must be specified. */
11625
11626 static int
11627 s390_function_arg_size (machine_mode mode, const_tree type)
11628 {
11629 if (type)
11630 return int_size_in_bytes (type);
11631
11632 /* No type info available for some library calls ... */
11633 if (mode != BLKmode)
11634 return GET_MODE_SIZE (mode);
11635
11636 /* If we have neither type nor mode, abort */
11637 gcc_unreachable ();
11638 }
11639
11640 /* Return true if a function argument of type TYPE and mode MODE
11641 is to be passed in a vector register, if available. */
11642
11643 bool
11644 s390_function_arg_vector (machine_mode mode, const_tree type)
11645 {
11646 if (!TARGET_VX_ABI)
11647 return false;
11648
11649 if (s390_function_arg_size (mode, type) > 16)
11650 return false;
11651
11652 /* No type info available for some library calls ... */
11653 if (!type)
11654 return VECTOR_MODE_P (mode);
11655
11656 /* The ABI says that record types with a single member are treated
11657 just like that member would be. */
11658 while (TREE_CODE (type) == RECORD_TYPE)
11659 {
11660 tree field, single = NULL_TREE;
11661
11662 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11663 {
11664 if (TREE_CODE (field) != FIELD_DECL)
11665 continue;
11666
11667 if (single == NULL_TREE)
11668 single = TREE_TYPE (field);
11669 else
11670 return false;
11671 }
11672
11673 if (single == NULL_TREE)
11674 return false;
11675 else
11676 {
11677 /* If the field declaration adds extra byte due to
11678 e.g. padding this is not accepted as vector type. */
11679 if (int_size_in_bytes (single) <= 0
11680 || int_size_in_bytes (single) != int_size_in_bytes (type))
11681 return false;
11682 type = single;
11683 }
11684 }
11685
11686 return VECTOR_TYPE_P (type);
11687 }
11688
11689 /* Return true if a function argument of type TYPE and mode MODE
11690 is to be passed in a floating-point register, if available. */
11691
11692 static bool
11693 s390_function_arg_float (machine_mode mode, const_tree type)
11694 {
11695 if (s390_function_arg_size (mode, type) > 8)
11696 return false;
11697
11698 /* Soft-float changes the ABI: no floating-point registers are used. */
11699 if (TARGET_SOFT_FLOAT)
11700 return false;
11701
11702 /* No type info available for some library calls ... */
11703 if (!type)
11704 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11705
11706 /* The ABI says that record types with a single member are treated
11707 just like that member would be. */
11708 while (TREE_CODE (type) == RECORD_TYPE)
11709 {
11710 tree field, single = NULL_TREE;
11711
11712 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11713 {
11714 if (TREE_CODE (field) != FIELD_DECL)
11715 continue;
11716
11717 if (single == NULL_TREE)
11718 single = TREE_TYPE (field);
11719 else
11720 return false;
11721 }
11722
11723 if (single == NULL_TREE)
11724 return false;
11725 else
11726 type = single;
11727 }
11728
11729 return TREE_CODE (type) == REAL_TYPE;
11730 }
11731
11732 /* Return true if a function argument of type TYPE and mode MODE
11733 is to be passed in an integer register, or a pair of integer
11734 registers, if available. */
11735
11736 static bool
11737 s390_function_arg_integer (machine_mode mode, const_tree type)
11738 {
11739 int size = s390_function_arg_size (mode, type);
11740 if (size > 8)
11741 return false;
11742
11743 /* No type info available for some library calls ... */
11744 if (!type)
11745 return GET_MODE_CLASS (mode) == MODE_INT
11746 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11747
11748 /* We accept small integral (and similar) types. */
11749 if (INTEGRAL_TYPE_P (type)
11750 || POINTER_TYPE_P (type)
11751 || TREE_CODE (type) == NULLPTR_TYPE
11752 || TREE_CODE (type) == OFFSET_TYPE
11753 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11754 return true;
11755
11756 /* We also accept structs of size 1, 2, 4, 8 that are not
11757 passed in floating-point registers. */
11758 if (AGGREGATE_TYPE_P (type)
11759 && exact_log2 (size) >= 0
11760 && !s390_function_arg_float (mode, type))
11761 return true;
11762
11763 return false;
11764 }
11765
11766 /* Return 1 if a function argument of type TYPE and mode MODE
11767 is to be passed by reference. The ABI specifies that only
11768 structures of size 1, 2, 4, or 8 bytes are passed by value,
11769 all other structures (and complex numbers) are passed by
11770 reference. */
11771
11772 static bool
11773 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11774 machine_mode mode, const_tree type,
11775 bool named ATTRIBUTE_UNUSED)
11776 {
11777 int size = s390_function_arg_size (mode, type);
11778
11779 if (s390_function_arg_vector (mode, type))
11780 return false;
11781
11782 if (size > 8)
11783 return true;
11784
11785 if (type)
11786 {
11787 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11788 return true;
11789
11790 if (TREE_CODE (type) == COMPLEX_TYPE
11791 || TREE_CODE (type) == VECTOR_TYPE)
11792 return true;
11793 }
11794
11795 return false;
11796 }
11797
11798 /* Update the data in CUM to advance over an argument of mode MODE and
11799 data type TYPE. (TYPE is null for libcalls where that information
11800 may not be available.). The boolean NAMED specifies whether the
11801 argument is a named argument (as opposed to an unnamed argument
11802 matching an ellipsis). */
11803
11804 static void
11805 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11806 const_tree type, bool named)
11807 {
11808 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11809
11810 if (s390_function_arg_vector (mode, type))
11811 {
11812 /* We are called for unnamed vector stdarg arguments which are
11813 passed on the stack. In this case this hook does not have to
11814 do anything since stack arguments are tracked by common
11815 code. */
11816 if (!named)
11817 return;
11818 cum->vrs += 1;
11819 }
11820 else if (s390_function_arg_float (mode, type))
11821 {
11822 cum->fprs += 1;
11823 }
11824 else if (s390_function_arg_integer (mode, type))
11825 {
11826 int size = s390_function_arg_size (mode, type);
11827 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11828 }
11829 else
11830 gcc_unreachable ();
11831 }
11832
11833 /* Define where to put the arguments to a function.
11834 Value is zero to push the argument on the stack,
11835 or a hard register in which to store the argument.
11836
11837 MODE is the argument's machine mode.
11838 TYPE is the data type of the argument (as a tree).
11839 This is null for libcalls where that information may
11840 not be available.
11841 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11842 the preceding args and about the function being called.
11843 NAMED is nonzero if this argument is a named parameter
11844 (otherwise it is an extra parameter matching an ellipsis).
11845
11846 On S/390, we use general purpose registers 2 through 6 to
11847 pass integer, pointer, and certain structure arguments, and
11848 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11849 to pass floating point arguments. All remaining arguments
11850 are pushed to the stack. */
11851
11852 static rtx
11853 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11854 const_tree type, bool named)
11855 {
11856 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11857
11858 if (!named)
11859 s390_check_type_for_vector_abi (type, true, false);
11860
11861 if (s390_function_arg_vector (mode, type))
11862 {
11863 /* Vector arguments being part of the ellipsis are passed on the
11864 stack. */
11865 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11866 return NULL_RTX;
11867
11868 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11869 }
11870 else if (s390_function_arg_float (mode, type))
11871 {
11872 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11873 return NULL_RTX;
11874 else
11875 return gen_rtx_REG (mode, cum->fprs + 16);
11876 }
11877 else if (s390_function_arg_integer (mode, type))
11878 {
11879 int size = s390_function_arg_size (mode, type);
11880 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11881
11882 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11883 return NULL_RTX;
11884 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11885 return gen_rtx_REG (mode, cum->gprs + 2);
11886 else if (n_gprs == 2)
11887 {
11888 rtvec p = rtvec_alloc (2);
11889
11890 RTVEC_ELT (p, 0)
11891 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11892 const0_rtx);
11893 RTVEC_ELT (p, 1)
11894 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11895 GEN_INT (4));
11896
11897 return gen_rtx_PARALLEL (mode, p);
11898 }
11899 }
11900
11901 /* After the real arguments, expand_call calls us once again
11902 with a void_type_node type. Whatever we return here is
11903 passed as operand 2 to the call expanders.
11904
11905 We don't need this feature ... */
11906 else if (type == void_type_node)
11907 return const0_rtx;
11908
11909 gcc_unreachable ();
11910 }
11911
11912 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
11913 left-justified when placed on the stack during parameter passing. */
11914
11915 static pad_direction
11916 s390_function_arg_padding (machine_mode mode, const_tree type)
11917 {
11918 if (s390_function_arg_vector (mode, type))
11919 return PAD_UPWARD;
11920
11921 return default_function_arg_padding (mode, type);
11922 }
11923
11924 /* Return true if return values of type TYPE should be returned
11925 in a memory buffer whose address is passed by the caller as
11926 hidden first argument. */
11927
11928 static bool
11929 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11930 {
11931 /* We accept small integral (and similar) types. */
11932 if (INTEGRAL_TYPE_P (type)
11933 || POINTER_TYPE_P (type)
11934 || TREE_CODE (type) == OFFSET_TYPE
11935 || TREE_CODE (type) == REAL_TYPE)
11936 return int_size_in_bytes (type) > 8;
11937
11938 /* vector types which fit into a VR. */
11939 if (TARGET_VX_ABI
11940 && VECTOR_TYPE_P (type)
11941 && int_size_in_bytes (type) <= 16)
11942 return false;
11943
11944 /* Aggregates and similar constructs are always returned
11945 in memory. */
11946 if (AGGREGATE_TYPE_P (type)
11947 || TREE_CODE (type) == COMPLEX_TYPE
11948 || VECTOR_TYPE_P (type))
11949 return true;
11950
11951 /* ??? We get called on all sorts of random stuff from
11952 aggregate_value_p. We can't abort, but it's not clear
11953 what's safe to return. Pretend it's a struct I guess. */
11954 return true;
11955 }
11956
11957 /* Function arguments and return values are promoted to word size. */
11958
11959 static machine_mode
11960 s390_promote_function_mode (const_tree type, machine_mode mode,
11961 int *punsignedp,
11962 const_tree fntype ATTRIBUTE_UNUSED,
11963 int for_return ATTRIBUTE_UNUSED)
11964 {
11965 if (INTEGRAL_MODE_P (mode)
11966 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11967 {
11968 if (type != NULL_TREE && POINTER_TYPE_P (type))
11969 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11970 return Pmode;
11971 }
11972
11973 return mode;
11974 }
11975
11976 /* Define where to return a (scalar) value of type RET_TYPE.
11977 If RET_TYPE is null, define where to return a (scalar)
11978 value of mode MODE from a libcall. */
11979
11980 static rtx
11981 s390_function_and_libcall_value (machine_mode mode,
11982 const_tree ret_type,
11983 const_tree fntype_or_decl,
11984 bool outgoing ATTRIBUTE_UNUSED)
11985 {
11986 /* For vector return types it is important to use the RET_TYPE
11987 argument whenever available since the middle-end might have
11988 changed the mode to a scalar mode. */
11989 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11990 || (!ret_type && VECTOR_MODE_P (mode)));
11991
11992 /* For normal functions perform the promotion as
11993 promote_function_mode would do. */
11994 if (ret_type)
11995 {
11996 int unsignedp = TYPE_UNSIGNED (ret_type);
11997 mode = promote_function_mode (ret_type, mode, &unsignedp,
11998 fntype_or_decl, 1);
11999 }
12000
12001 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12002 || SCALAR_FLOAT_MODE_P (mode)
12003 || (TARGET_VX_ABI && vector_ret_type_p));
12004 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12005
12006 if (TARGET_VX_ABI && vector_ret_type_p)
12007 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12008 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12009 return gen_rtx_REG (mode, 16);
12010 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12011 || UNITS_PER_LONG == UNITS_PER_WORD)
12012 return gen_rtx_REG (mode, 2);
12013 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12014 {
12015 /* This case is triggered when returning a 64 bit value with
12016 -m31 -mzarch. Although the value would fit into a single
12017 register it has to be forced into a 32 bit register pair in
12018 order to match the ABI. */
12019 rtvec p = rtvec_alloc (2);
12020
12021 RTVEC_ELT (p, 0)
12022 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12023 RTVEC_ELT (p, 1)
12024 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12025
12026 return gen_rtx_PARALLEL (mode, p);
12027 }
12028
12029 gcc_unreachable ();
12030 }
12031
12032 /* Define where to return a scalar return value of type RET_TYPE. */
12033
12034 static rtx
12035 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12036 bool outgoing)
12037 {
12038 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12039 fn_decl_or_type, outgoing);
12040 }
12041
12042 /* Define where to return a scalar libcall return value of mode
12043 MODE. */
12044
12045 static rtx
12046 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12047 {
12048 return s390_function_and_libcall_value (mode, NULL_TREE,
12049 NULL_TREE, true);
12050 }
12051
12052
12053 /* Create and return the va_list datatype.
12054
12055 On S/390, va_list is an array type equivalent to
12056
12057 typedef struct __va_list_tag
12058 {
12059 long __gpr;
12060 long __fpr;
12061 void *__overflow_arg_area;
12062 void *__reg_save_area;
12063 } va_list[1];
12064
12065 where __gpr and __fpr hold the number of general purpose
12066 or floating point arguments used up to now, respectively,
12067 __overflow_arg_area points to the stack location of the
12068 next argument passed on the stack, and __reg_save_area
12069 always points to the start of the register area in the
12070 call frame of the current function. The function prologue
12071 saves all registers used for argument passing into this
12072 area if the function uses variable arguments. */
12073
12074 static tree
12075 s390_build_builtin_va_list (void)
12076 {
12077 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12078
12079 record = lang_hooks.types.make_type (RECORD_TYPE);
12080
12081 type_decl =
12082 build_decl (BUILTINS_LOCATION,
12083 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12084
12085 f_gpr = build_decl (BUILTINS_LOCATION,
12086 FIELD_DECL, get_identifier ("__gpr"),
12087 long_integer_type_node);
12088 f_fpr = build_decl (BUILTINS_LOCATION,
12089 FIELD_DECL, get_identifier ("__fpr"),
12090 long_integer_type_node);
12091 f_ovf = build_decl (BUILTINS_LOCATION,
12092 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12093 ptr_type_node);
12094 f_sav = build_decl (BUILTINS_LOCATION,
12095 FIELD_DECL, get_identifier ("__reg_save_area"),
12096 ptr_type_node);
12097
12098 va_list_gpr_counter_field = f_gpr;
12099 va_list_fpr_counter_field = f_fpr;
12100
12101 DECL_FIELD_CONTEXT (f_gpr) = record;
12102 DECL_FIELD_CONTEXT (f_fpr) = record;
12103 DECL_FIELD_CONTEXT (f_ovf) = record;
12104 DECL_FIELD_CONTEXT (f_sav) = record;
12105
12106 TYPE_STUB_DECL (record) = type_decl;
12107 TYPE_NAME (record) = type_decl;
12108 TYPE_FIELDS (record) = f_gpr;
12109 DECL_CHAIN (f_gpr) = f_fpr;
12110 DECL_CHAIN (f_fpr) = f_ovf;
12111 DECL_CHAIN (f_ovf) = f_sav;
12112
12113 layout_type (record);
12114
12115 /* The correct type is an array type of one element. */
12116 return build_array_type (record, build_index_type (size_zero_node));
12117 }
12118
12119 /* Implement va_start by filling the va_list structure VALIST.
12120 STDARG_P is always true, and ignored.
12121 NEXTARG points to the first anonymous stack argument.
12122
12123 The following global variables are used to initialize
12124 the va_list structure:
12125
12126 crtl->args.info:
12127 holds number of gprs and fprs used for named arguments.
12128 crtl->args.arg_offset_rtx:
12129 holds the offset of the first anonymous stack argument
12130 (relative to the virtual arg pointer). */
12131
12132 static void
12133 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12134 {
12135 HOST_WIDE_INT n_gpr, n_fpr;
12136 int off;
12137 tree f_gpr, f_fpr, f_ovf, f_sav;
12138 tree gpr, fpr, ovf, sav, t;
12139
12140 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12141 f_fpr = DECL_CHAIN (f_gpr);
12142 f_ovf = DECL_CHAIN (f_fpr);
12143 f_sav = DECL_CHAIN (f_ovf);
12144
12145 valist = build_simple_mem_ref (valist);
12146 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12147 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12148 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12149 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12150
12151 /* Count number of gp and fp argument registers used. */
12152
12153 n_gpr = crtl->args.info.gprs;
12154 n_fpr = crtl->args.info.fprs;
12155
12156 if (cfun->va_list_gpr_size)
12157 {
12158 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12159 build_int_cst (NULL_TREE, n_gpr));
12160 TREE_SIDE_EFFECTS (t) = 1;
12161 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12162 }
12163
12164 if (cfun->va_list_fpr_size)
12165 {
12166 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12167 build_int_cst (NULL_TREE, n_fpr));
12168 TREE_SIDE_EFFECTS (t) = 1;
12169 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12170 }
12171
12172 if (flag_split_stack
12173 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12174 == NULL)
12175 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12176 {
12177 rtx reg;
12178 rtx_insn *seq;
12179
12180 reg = gen_reg_rtx (Pmode);
12181 cfun->machine->split_stack_varargs_pointer = reg;
12182
12183 start_sequence ();
12184 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12185 seq = get_insns ();
12186 end_sequence ();
12187
12188 push_topmost_sequence ();
12189 emit_insn_after (seq, entry_of_function ());
12190 pop_topmost_sequence ();
12191 }
12192
12193 /* Find the overflow area.
12194 FIXME: This currently is too pessimistic when the vector ABI is
12195 enabled. In that case we *always* set up the overflow area
12196 pointer. */
12197 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12198 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12199 || TARGET_VX_ABI)
12200 {
12201 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12202 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12203 else
12204 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12205
12206 off = INTVAL (crtl->args.arg_offset_rtx);
12207 off = off < 0 ? 0 : off;
12208 if (TARGET_DEBUG_ARG)
12209 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12210 (int)n_gpr, (int)n_fpr, off);
12211
12212 t = fold_build_pointer_plus_hwi (t, off);
12213
12214 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12215 TREE_SIDE_EFFECTS (t) = 1;
12216 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12217 }
12218
12219 /* Find the register save area. */
12220 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12221 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12222 {
12223 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12224 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12225
12226 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12227 TREE_SIDE_EFFECTS (t) = 1;
12228 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12229 }
12230 }
12231
12232 /* Implement va_arg by updating the va_list structure
12233 VALIST as required to retrieve an argument of type
12234 TYPE, and returning that argument.
12235
12236 Generates code equivalent to:
12237
12238 if (integral value) {
12239 if (size <= 4 && args.gpr < 5 ||
12240 size > 4 && args.gpr < 4 )
12241 ret = args.reg_save_area[args.gpr+8]
12242 else
12243 ret = *args.overflow_arg_area++;
12244 } else if (vector value) {
12245 ret = *args.overflow_arg_area;
12246 args.overflow_arg_area += size / 8;
12247 } else if (float value) {
12248 if (args.fgpr < 2)
12249 ret = args.reg_save_area[args.fpr+64]
12250 else
12251 ret = *args.overflow_arg_area++;
12252 } else if (aggregate value) {
12253 if (args.gpr < 5)
12254 ret = *args.reg_save_area[args.gpr]
12255 else
12256 ret = **args.overflow_arg_area++;
12257 } */
12258
12259 static tree
12260 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12261 gimple_seq *post_p ATTRIBUTE_UNUSED)
12262 {
12263 tree f_gpr, f_fpr, f_ovf, f_sav;
12264 tree gpr, fpr, ovf, sav, reg, t, u;
12265 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12266 tree lab_false, lab_over = NULL_TREE;
12267 tree addr = create_tmp_var (ptr_type_node, "addr");
12268 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12269 a stack slot. */
12270
12271 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12272 f_fpr = DECL_CHAIN (f_gpr);
12273 f_ovf = DECL_CHAIN (f_fpr);
12274 f_sav = DECL_CHAIN (f_ovf);
12275
12276 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12277 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12278 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12279
12280 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12281 both appear on a lhs. */
12282 valist = unshare_expr (valist);
12283 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12284
12285 size = int_size_in_bytes (type);
12286
12287 s390_check_type_for_vector_abi (type, true, false);
12288
12289 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12290 {
12291 if (TARGET_DEBUG_ARG)
12292 {
12293 fprintf (stderr, "va_arg: aggregate type");
12294 debug_tree (type);
12295 }
12296
12297 /* Aggregates are passed by reference. */
12298 indirect_p = 1;
12299 reg = gpr;
12300 n_reg = 1;
12301
12302 /* kernel stack layout on 31 bit: It is assumed here that no padding
12303 will be added by s390_frame_info because for va_args always an even
12304 number of gprs has to be saved r15-r2 = 14 regs. */
12305 sav_ofs = 2 * UNITS_PER_LONG;
12306 sav_scale = UNITS_PER_LONG;
12307 size = UNITS_PER_LONG;
12308 max_reg = GP_ARG_NUM_REG - n_reg;
12309 left_align_p = false;
12310 }
12311 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12312 {
12313 if (TARGET_DEBUG_ARG)
12314 {
12315 fprintf (stderr, "va_arg: vector type");
12316 debug_tree (type);
12317 }
12318
12319 indirect_p = 0;
12320 reg = NULL_TREE;
12321 n_reg = 0;
12322 sav_ofs = 0;
12323 sav_scale = 8;
12324 max_reg = 0;
12325 left_align_p = true;
12326 }
12327 else if (s390_function_arg_float (TYPE_MODE (type), type))
12328 {
12329 if (TARGET_DEBUG_ARG)
12330 {
12331 fprintf (stderr, "va_arg: float type");
12332 debug_tree (type);
12333 }
12334
12335 /* FP args go in FP registers, if present. */
12336 indirect_p = 0;
12337 reg = fpr;
12338 n_reg = 1;
12339 sav_ofs = 16 * UNITS_PER_LONG;
12340 sav_scale = 8;
12341 max_reg = FP_ARG_NUM_REG - n_reg;
12342 left_align_p = false;
12343 }
12344 else
12345 {
12346 if (TARGET_DEBUG_ARG)
12347 {
12348 fprintf (stderr, "va_arg: other type");
12349 debug_tree (type);
12350 }
12351
12352 /* Otherwise into GP registers. */
12353 indirect_p = 0;
12354 reg = gpr;
12355 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12356
12357 /* kernel stack layout on 31 bit: It is assumed here that no padding
12358 will be added by s390_frame_info because for va_args always an even
12359 number of gprs has to be saved r15-r2 = 14 regs. */
12360 sav_ofs = 2 * UNITS_PER_LONG;
12361
12362 if (size < UNITS_PER_LONG)
12363 sav_ofs += UNITS_PER_LONG - size;
12364
12365 sav_scale = UNITS_PER_LONG;
12366 max_reg = GP_ARG_NUM_REG - n_reg;
12367 left_align_p = false;
12368 }
12369
12370 /* Pull the value out of the saved registers ... */
12371
12372 if (reg != NULL_TREE)
12373 {
12374 /*
12375 if (reg > ((typeof (reg))max_reg))
12376 goto lab_false;
12377
12378 addr = sav + sav_ofs + reg * save_scale;
12379
12380 goto lab_over;
12381
12382 lab_false:
12383 */
12384
12385 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12386 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12387
12388 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12389 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12390 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12391 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12392 gimplify_and_add (t, pre_p);
12393
12394 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12395 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12396 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12397 t = fold_build_pointer_plus (t, u);
12398
12399 gimplify_assign (addr, t, pre_p);
12400
12401 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12402
12403 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12404 }
12405
12406 /* ... Otherwise out of the overflow area. */
12407
12408 t = ovf;
12409 if (size < UNITS_PER_LONG && !left_align_p)
12410 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12411
12412 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12413
12414 gimplify_assign (addr, t, pre_p);
12415
12416 if (size < UNITS_PER_LONG && left_align_p)
12417 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12418 else
12419 t = fold_build_pointer_plus_hwi (t, size);
12420
12421 gimplify_assign (ovf, t, pre_p);
12422
12423 if (reg != NULL_TREE)
12424 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12425
12426
12427 /* Increment register save count. */
12428
12429 if (n_reg > 0)
12430 {
12431 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12432 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12433 gimplify_and_add (u, pre_p);
12434 }
12435
12436 if (indirect_p)
12437 {
12438 t = build_pointer_type_for_mode (build_pointer_type (type),
12439 ptr_mode, true);
12440 addr = fold_convert (t, addr);
12441 addr = build_va_arg_indirect_ref (addr);
12442 }
12443 else
12444 {
12445 t = build_pointer_type_for_mode (type, ptr_mode, true);
12446 addr = fold_convert (t, addr);
12447 }
12448
12449 return build_va_arg_indirect_ref (addr);
12450 }
12451
12452 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12453 expanders.
12454 DEST - Register location where CC will be stored.
12455 TDB - Pointer to a 256 byte area where to store the transaction.
12456 diagnostic block. NULL if TDB is not needed.
12457 RETRY - Retry count value. If non-NULL a retry loop for CC2
12458 is emitted
12459 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12460 of the tbegin instruction pattern. */
12461
12462 void
12463 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12464 {
12465 rtx retry_plus_two = gen_reg_rtx (SImode);
12466 rtx retry_reg = gen_reg_rtx (SImode);
12467 rtx_code_label *retry_label = NULL;
12468
12469 if (retry != NULL_RTX)
12470 {
12471 emit_move_insn (retry_reg, retry);
12472 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12473 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12474 retry_label = gen_label_rtx ();
12475 emit_label (retry_label);
12476 }
12477
12478 if (clobber_fprs_p)
12479 {
12480 if (TARGET_VX)
12481 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12482 tdb));
12483 else
12484 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12485 tdb));
12486 }
12487 else
12488 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12489 tdb));
12490
12491 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12492 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12493 CC_REGNUM)),
12494 UNSPEC_CC_TO_INT));
12495 if (retry != NULL_RTX)
12496 {
12497 const int CC0 = 1 << 3;
12498 const int CC1 = 1 << 2;
12499 const int CC3 = 1 << 0;
12500 rtx jump;
12501 rtx count = gen_reg_rtx (SImode);
12502 rtx_code_label *leave_label = gen_label_rtx ();
12503
12504 /* Exit for success and permanent failures. */
12505 jump = s390_emit_jump (leave_label,
12506 gen_rtx_EQ (VOIDmode,
12507 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12508 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12509 LABEL_NUSES (leave_label) = 1;
12510
12511 /* CC2 - transient failure. Perform retry with ppa. */
12512 emit_move_insn (count, retry_plus_two);
12513 emit_insn (gen_subsi3 (count, count, retry_reg));
12514 emit_insn (gen_tx_assist (count));
12515 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12516 retry_reg,
12517 retry_reg));
12518 JUMP_LABEL (jump) = retry_label;
12519 LABEL_NUSES (retry_label) = 1;
12520 emit_label (leave_label);
12521 }
12522 }
12523
12524
12525 /* Return the decl for the target specific builtin with the function
12526 code FCODE. */
12527
12528 static tree
12529 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12530 {
12531 if (fcode >= S390_BUILTIN_MAX)
12532 return error_mark_node;
12533
12534 return s390_builtin_decls[fcode];
12535 }
12536
12537 /* We call mcount before the function prologue. So a profiled leaf
12538 function should stay a leaf function. */
12539
12540 static bool
12541 s390_keep_leaf_when_profiled ()
12542 {
12543 return true;
12544 }
12545
12546 /* Output assembly code for the trampoline template to
12547 stdio stream FILE.
12548
12549 On S/390, we use gpr 1 internally in the trampoline code;
12550 gpr 0 is used to hold the static chain. */
12551
12552 static void
12553 s390_asm_trampoline_template (FILE *file)
12554 {
12555 rtx op[2];
12556 op[0] = gen_rtx_REG (Pmode, 0);
12557 op[1] = gen_rtx_REG (Pmode, 1);
12558
12559 if (TARGET_64BIT)
12560 {
12561 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12562 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12563 output_asm_insn ("br\t%1", op); /* 2 byte */
12564 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12565 }
12566 else
12567 {
12568 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12569 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12570 output_asm_insn ("br\t%1", op); /* 2 byte */
12571 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12572 }
12573 }
12574
12575 /* Emit RTL insns to initialize the variable parts of a trampoline.
12576 FNADDR is an RTX for the address of the function's pure code.
12577 CXT is an RTX for the static chain value for the function. */
12578
12579 static void
12580 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12581 {
12582 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12583 rtx mem;
12584
12585 emit_block_move (m_tramp, assemble_trampoline_template (),
12586 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12587
12588 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12589 emit_move_insn (mem, cxt);
12590 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12591 emit_move_insn (mem, fnaddr);
12592 }
12593
12594 static void
12595 output_asm_nops (const char *user, int hw)
12596 {
12597 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12598 while (hw > 0)
12599 {
12600 if (hw >= 3)
12601 {
12602 output_asm_insn ("brcl\t0,0", NULL);
12603 hw -= 3;
12604 }
12605 else if (hw >= 2)
12606 {
12607 output_asm_insn ("bc\t0,0", NULL);
12608 hw -= 2;
12609 }
12610 else
12611 {
12612 output_asm_insn ("bcr\t0,0", NULL);
12613 hw -= 1;
12614 }
12615 }
12616 }
12617
12618 /* Output assembler code to FILE to increment profiler label # LABELNO
12619 for profiling a function entry. */
12620
12621 void
12622 s390_function_profiler (FILE *file, int labelno)
12623 {
12624 rtx op[8];
12625
12626 char label[128];
12627 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12628
12629 fprintf (file, "# function profiler \n");
12630
12631 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12632 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12633 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12634 op[7] = GEN_INT (UNITS_PER_LONG);
12635
12636 op[2] = gen_rtx_REG (Pmode, 1);
12637 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12638 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12639
12640 op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12641 if (flag_pic)
12642 {
12643 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12644 op[4] = gen_rtx_CONST (Pmode, op[4]);
12645 }
12646
12647 if (flag_record_mcount)
12648 fprintf (file, "1:\n");
12649
12650 if (flag_fentry)
12651 {
12652 if (flag_nop_mcount)
12653 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12654 else if (cfun->static_chain_decl)
12655 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12656 "with %<-mfentry%> on s390");
12657 else
12658 output_asm_insn ("brasl\t0,%4", op);
12659 }
12660 else if (TARGET_64BIT)
12661 {
12662 if (flag_nop_mcount)
12663 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12664 /* brasl */ 3 + /* lg */ 3);
12665 else
12666 {
12667 output_asm_insn ("stg\t%0,%1", op);
12668 if (flag_dwarf2_cfi_asm)
12669 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12670 output_asm_insn ("larl\t%2,%3", op);
12671 output_asm_insn ("brasl\t%0,%4", op);
12672 output_asm_insn ("lg\t%0,%1", op);
12673 if (flag_dwarf2_cfi_asm)
12674 output_asm_insn (".cfi_restore\t%0", op);
12675 }
12676 }
12677 else
12678 {
12679 if (flag_nop_mcount)
12680 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12681 /* brasl */ 3 + /* l */ 2);
12682 else
12683 {
12684 output_asm_insn ("st\t%0,%1", op);
12685 if (flag_dwarf2_cfi_asm)
12686 output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12687 output_asm_insn ("larl\t%2,%3", op);
12688 output_asm_insn ("brasl\t%0,%4", op);
12689 output_asm_insn ("l\t%0,%1", op);
12690 if (flag_dwarf2_cfi_asm)
12691 output_asm_insn (".cfi_restore\t%0", op);
12692 }
12693 }
12694
12695 if (flag_record_mcount)
12696 {
12697 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
12698 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
12699 fprintf (file, "\t.previous\n");
12700 }
12701 }
12702
12703 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12704 into its SYMBOL_REF_FLAGS. */
12705
12706 static void
12707 s390_encode_section_info (tree decl, rtx rtl, int first)
12708 {
12709 default_encode_section_info (decl, rtl, first);
12710
12711 if (TREE_CODE (decl) == VAR_DECL)
12712 {
12713 /* Store the alignment to be able to check if we can use
12714 a larl/load-relative instruction. We only handle the cases
12715 that can go wrong (i.e. no FUNC_DECLs). */
12716 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12717 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12718 else if (DECL_ALIGN (decl) % 32)
12719 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12720 else if (DECL_ALIGN (decl) % 64)
12721 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12722 }
12723
12724 /* Literal pool references don't have a decl so they are handled
12725 differently here. We rely on the information in the MEM_ALIGN
12726 entry to decide upon the alignment. */
12727 if (MEM_P (rtl)
12728 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12729 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12730 {
12731 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12732 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12733 else if (MEM_ALIGN (rtl) % 32)
12734 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12735 else if (MEM_ALIGN (rtl) % 64)
12736 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12737 }
12738 }
12739
12740 /* Output thunk to FILE that implements a C++ virtual function call (with
12741 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12742 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12743 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12744 relative to the resulting this pointer. */
12745
12746 static void
12747 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12748 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12749 tree function)
12750 {
12751 rtx op[10];
12752 int nonlocal = 0;
12753
12754 /* Make sure unwind info is emitted for the thunk if needed. */
12755 final_start_function (emit_barrier (), file, 1);
12756
12757 /* Operand 0 is the target function. */
12758 op[0] = XEXP (DECL_RTL (function), 0);
12759 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12760 {
12761 nonlocal = 1;
12762 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12763 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12764 op[0] = gen_rtx_CONST (Pmode, op[0]);
12765 }
12766
12767 /* Operand 1 is the 'this' pointer. */
12768 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12769 op[1] = gen_rtx_REG (Pmode, 3);
12770 else
12771 op[1] = gen_rtx_REG (Pmode, 2);
12772
12773 /* Operand 2 is the delta. */
12774 op[2] = GEN_INT (delta);
12775
12776 /* Operand 3 is the vcall_offset. */
12777 op[3] = GEN_INT (vcall_offset);
12778
12779 /* Operand 4 is the temporary register. */
12780 op[4] = gen_rtx_REG (Pmode, 1);
12781
12782 /* Operands 5 to 8 can be used as labels. */
12783 op[5] = NULL_RTX;
12784 op[6] = NULL_RTX;
12785 op[7] = NULL_RTX;
12786 op[8] = NULL_RTX;
12787
12788 /* Operand 9 can be used for temporary register. */
12789 op[9] = NULL_RTX;
12790
12791 /* Generate code. */
12792 if (TARGET_64BIT)
12793 {
12794 /* Setup literal pool pointer if required. */
12795 if ((!DISP_IN_RANGE (delta)
12796 && !CONST_OK_FOR_K (delta)
12797 && !CONST_OK_FOR_Os (delta))
12798 || (!DISP_IN_RANGE (vcall_offset)
12799 && !CONST_OK_FOR_K (vcall_offset)
12800 && !CONST_OK_FOR_Os (vcall_offset)))
12801 {
12802 op[5] = gen_label_rtx ();
12803 output_asm_insn ("larl\t%4,%5", op);
12804 }
12805
12806 /* Add DELTA to this pointer. */
12807 if (delta)
12808 {
12809 if (CONST_OK_FOR_J (delta))
12810 output_asm_insn ("la\t%1,%2(%1)", op);
12811 else if (DISP_IN_RANGE (delta))
12812 output_asm_insn ("lay\t%1,%2(%1)", op);
12813 else if (CONST_OK_FOR_K (delta))
12814 output_asm_insn ("aghi\t%1,%2", op);
12815 else if (CONST_OK_FOR_Os (delta))
12816 output_asm_insn ("agfi\t%1,%2", op);
12817 else
12818 {
12819 op[6] = gen_label_rtx ();
12820 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12821 }
12822 }
12823
12824 /* Perform vcall adjustment. */
12825 if (vcall_offset)
12826 {
12827 if (DISP_IN_RANGE (vcall_offset))
12828 {
12829 output_asm_insn ("lg\t%4,0(%1)", op);
12830 output_asm_insn ("ag\t%1,%3(%4)", op);
12831 }
12832 else if (CONST_OK_FOR_K (vcall_offset))
12833 {
12834 output_asm_insn ("lghi\t%4,%3", op);
12835 output_asm_insn ("ag\t%4,0(%1)", op);
12836 output_asm_insn ("ag\t%1,0(%4)", op);
12837 }
12838 else if (CONST_OK_FOR_Os (vcall_offset))
12839 {
12840 output_asm_insn ("lgfi\t%4,%3", op);
12841 output_asm_insn ("ag\t%4,0(%1)", op);
12842 output_asm_insn ("ag\t%1,0(%4)", op);
12843 }
12844 else
12845 {
12846 op[7] = gen_label_rtx ();
12847 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12848 output_asm_insn ("ag\t%4,0(%1)", op);
12849 output_asm_insn ("ag\t%1,0(%4)", op);
12850 }
12851 }
12852
12853 /* Jump to target. */
12854 output_asm_insn ("jg\t%0", op);
12855
12856 /* Output literal pool if required. */
12857 if (op[5])
12858 {
12859 output_asm_insn (".align\t4", op);
12860 targetm.asm_out.internal_label (file, "L",
12861 CODE_LABEL_NUMBER (op[5]));
12862 }
12863 if (op[6])
12864 {
12865 targetm.asm_out.internal_label (file, "L",
12866 CODE_LABEL_NUMBER (op[6]));
12867 output_asm_insn (".long\t%2", op);
12868 }
12869 if (op[7])
12870 {
12871 targetm.asm_out.internal_label (file, "L",
12872 CODE_LABEL_NUMBER (op[7]));
12873 output_asm_insn (".long\t%3", op);
12874 }
12875 }
12876 else
12877 {
12878 /* Setup base pointer if required. */
12879 if (!vcall_offset
12880 || (!DISP_IN_RANGE (delta)
12881 && !CONST_OK_FOR_K (delta)
12882 && !CONST_OK_FOR_Os (delta))
12883 || (!DISP_IN_RANGE (delta)
12884 && !CONST_OK_FOR_K (vcall_offset)
12885 && !CONST_OK_FOR_Os (vcall_offset)))
12886 {
12887 op[5] = gen_label_rtx ();
12888 output_asm_insn ("basr\t%4,0", op);
12889 targetm.asm_out.internal_label (file, "L",
12890 CODE_LABEL_NUMBER (op[5]));
12891 }
12892
12893 /* Add DELTA to this pointer. */
12894 if (delta)
12895 {
12896 if (CONST_OK_FOR_J (delta))
12897 output_asm_insn ("la\t%1,%2(%1)", op);
12898 else if (DISP_IN_RANGE (delta))
12899 output_asm_insn ("lay\t%1,%2(%1)", op);
12900 else if (CONST_OK_FOR_K (delta))
12901 output_asm_insn ("ahi\t%1,%2", op);
12902 else if (CONST_OK_FOR_Os (delta))
12903 output_asm_insn ("afi\t%1,%2", op);
12904 else
12905 {
12906 op[6] = gen_label_rtx ();
12907 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12908 }
12909 }
12910
12911 /* Perform vcall adjustment. */
12912 if (vcall_offset)
12913 {
12914 if (CONST_OK_FOR_J (vcall_offset))
12915 {
12916 output_asm_insn ("l\t%4,0(%1)", op);
12917 output_asm_insn ("a\t%1,%3(%4)", op);
12918 }
12919 else if (DISP_IN_RANGE (vcall_offset))
12920 {
12921 output_asm_insn ("l\t%4,0(%1)", op);
12922 output_asm_insn ("ay\t%1,%3(%4)", op);
12923 }
12924 else if (CONST_OK_FOR_K (vcall_offset))
12925 {
12926 output_asm_insn ("lhi\t%4,%3", op);
12927 output_asm_insn ("a\t%4,0(%1)", op);
12928 output_asm_insn ("a\t%1,0(%4)", op);
12929 }
12930 else if (CONST_OK_FOR_Os (vcall_offset))
12931 {
12932 output_asm_insn ("iilf\t%4,%3", op);
12933 output_asm_insn ("a\t%4,0(%1)", op);
12934 output_asm_insn ("a\t%1,0(%4)", op);
12935 }
12936 else
12937 {
12938 op[7] = gen_label_rtx ();
12939 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12940 output_asm_insn ("a\t%4,0(%1)", op);
12941 output_asm_insn ("a\t%1,0(%4)", op);
12942 }
12943
12944 /* We had to clobber the base pointer register.
12945 Re-setup the base pointer (with a different base). */
12946 op[5] = gen_label_rtx ();
12947 output_asm_insn ("basr\t%4,0", op);
12948 targetm.asm_out.internal_label (file, "L",
12949 CODE_LABEL_NUMBER (op[5]));
12950 }
12951
12952 /* Jump to target. */
12953 op[8] = gen_label_rtx ();
12954
12955 if (!flag_pic)
12956 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12957 else if (!nonlocal)
12958 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12959 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12960 else if (flag_pic == 1)
12961 {
12962 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12963 output_asm_insn ("l\t%4,%0(%4)", op);
12964 }
12965 else if (flag_pic == 2)
12966 {
12967 op[9] = gen_rtx_REG (Pmode, 0);
12968 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12969 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12970 output_asm_insn ("ar\t%4,%9", op);
12971 output_asm_insn ("l\t%4,0(%4)", op);
12972 }
12973
12974 output_asm_insn ("br\t%4", op);
12975
12976 /* Output literal pool. */
12977 output_asm_insn (".align\t4", op);
12978
12979 if (nonlocal && flag_pic == 2)
12980 output_asm_insn (".long\t%0", op);
12981 if (nonlocal)
12982 {
12983 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12984 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12985 }
12986
12987 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12988 if (!flag_pic)
12989 output_asm_insn (".long\t%0", op);
12990 else
12991 output_asm_insn (".long\t%0-%5", op);
12992
12993 if (op[6])
12994 {
12995 targetm.asm_out.internal_label (file, "L",
12996 CODE_LABEL_NUMBER (op[6]));
12997 output_asm_insn (".long\t%2", op);
12998 }
12999 if (op[7])
13000 {
13001 targetm.asm_out.internal_label (file, "L",
13002 CODE_LABEL_NUMBER (op[7]));
13003 output_asm_insn (".long\t%3", op);
13004 }
13005 }
13006 final_end_function ();
13007 }
13008
13009 /* Output either an indirect jump or a an indirect call
13010 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13011 using a branch trampoline disabling branch target prediction. */
13012
13013 void
13014 s390_indirect_branch_via_thunk (unsigned int regno,
13015 unsigned int return_addr_regno,
13016 rtx comparison_operator,
13017 enum s390_indirect_branch_type type)
13018 {
13019 enum s390_indirect_branch_option option;
13020
13021 if (type == s390_indirect_branch_type_return)
13022 {
13023 if (s390_return_addr_from_memory ())
13024 option = s390_opt_function_return_mem;
13025 else
13026 option = s390_opt_function_return_reg;
13027 }
13028 else if (type == s390_indirect_branch_type_jump)
13029 option = s390_opt_indirect_branch_jump;
13030 else if (type == s390_indirect_branch_type_call)
13031 option = s390_opt_indirect_branch_call;
13032 else
13033 gcc_unreachable ();
13034
13035 if (TARGET_INDIRECT_BRANCH_TABLE)
13036 {
13037 char label[32];
13038
13039 ASM_GENERATE_INTERNAL_LABEL (label,
13040 indirect_branch_table_label[option],
13041 indirect_branch_table_label_no[option]++);
13042 ASM_OUTPUT_LABEL (asm_out_file, label);
13043 }
13044
13045 if (return_addr_regno != INVALID_REGNUM)
13046 {
13047 gcc_assert (comparison_operator == NULL_RTX);
13048 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13049 }
13050 else
13051 {
13052 fputs (" \tjg", asm_out_file);
13053 if (comparison_operator != NULL_RTX)
13054 print_operand (asm_out_file, comparison_operator, 'C');
13055
13056 fputs ("\t", asm_out_file);
13057 }
13058
13059 if (TARGET_CPU_Z10)
13060 fprintf (asm_out_file,
13061 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13062 regno);
13063 else
13064 fprintf (asm_out_file,
13065 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13066 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13067
13068 if ((option == s390_opt_indirect_branch_jump
13069 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13070 || (option == s390_opt_indirect_branch_call
13071 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13072 || (option == s390_opt_function_return_reg
13073 && cfun->machine->function_return_reg == indirect_branch_thunk)
13074 || (option == s390_opt_function_return_mem
13075 && cfun->machine->function_return_mem == indirect_branch_thunk))
13076 {
13077 if (TARGET_CPU_Z10)
13078 indirect_branch_z10thunk_mask |= (1 << regno);
13079 else
13080 indirect_branch_prez10thunk_mask |= (1 << regno);
13081 }
13082 }
13083
13084 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13085 either be an address register or a label pointing to the location
13086 of the jump instruction. */
13087
13088 void
13089 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13090 {
13091 if (TARGET_INDIRECT_BRANCH_TABLE)
13092 {
13093 char label[32];
13094
13095 ASM_GENERATE_INTERNAL_LABEL (label,
13096 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13097 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13098 ASM_OUTPUT_LABEL (asm_out_file, label);
13099 }
13100
13101 if (!TARGET_ZARCH)
13102 fputs ("\t.machinemode zarch\n", asm_out_file);
13103
13104 if (REG_P (execute_target))
13105 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13106 else
13107 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13108
13109 if (!TARGET_ZARCH)
13110 fputs ("\t.machinemode esa\n", asm_out_file);
13111
13112 fputs ("0:\tj\t0b\n", asm_out_file);
13113 }
13114
13115 static bool
13116 s390_valid_pointer_mode (scalar_int_mode mode)
13117 {
13118 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13119 }
13120
13121 /* Checks whether the given CALL_EXPR would use a caller
13122 saved register. This is used to decide whether sibling call
13123 optimization could be performed on the respective function
13124 call. */
13125
13126 static bool
13127 s390_call_saved_register_used (tree call_expr)
13128 {
13129 CUMULATIVE_ARGS cum_v;
13130 cumulative_args_t cum;
13131 tree parameter;
13132 machine_mode mode;
13133 tree type;
13134 rtx parm_rtx;
13135 int reg, i;
13136
13137 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13138 cum = pack_cumulative_args (&cum_v);
13139
13140 for (i = 0; i < call_expr_nargs (call_expr); i++)
13141 {
13142 parameter = CALL_EXPR_ARG (call_expr, i);
13143 gcc_assert (parameter);
13144
13145 /* For an undeclared variable passed as parameter we will get
13146 an ERROR_MARK node here. */
13147 if (TREE_CODE (parameter) == ERROR_MARK)
13148 return true;
13149
13150 type = TREE_TYPE (parameter);
13151 gcc_assert (type);
13152
13153 mode = TYPE_MODE (type);
13154 gcc_assert (mode);
13155
13156 /* We assume that in the target function all parameters are
13157 named. This only has an impact on vector argument register
13158 usage none of which is call-saved. */
13159 if (pass_by_reference (&cum_v, mode, type, true))
13160 {
13161 mode = Pmode;
13162 type = build_pointer_type (type);
13163 }
13164
13165 parm_rtx = s390_function_arg (cum, mode, type, true);
13166
13167 s390_function_arg_advance (cum, mode, type, true);
13168
13169 if (!parm_rtx)
13170 continue;
13171
13172 if (REG_P (parm_rtx))
13173 {
13174 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13175 if (!call_used_regs[reg + REGNO (parm_rtx)])
13176 return true;
13177 }
13178
13179 if (GET_CODE (parm_rtx) == PARALLEL)
13180 {
13181 int i;
13182
13183 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13184 {
13185 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13186
13187 gcc_assert (REG_P (r));
13188
13189 for (reg = 0; reg < REG_NREGS (r); reg++)
13190 if (!call_used_regs[reg + REGNO (r)])
13191 return true;
13192 }
13193 }
13194
13195 }
13196 return false;
13197 }
13198
13199 /* Return true if the given call expression can be
13200 turned into a sibling call.
13201 DECL holds the declaration of the function to be called whereas
13202 EXP is the call expression itself. */
13203
13204 static bool
13205 s390_function_ok_for_sibcall (tree decl, tree exp)
13206 {
13207 /* The TPF epilogue uses register 1. */
13208 if (TARGET_TPF_PROFILING)
13209 return false;
13210
13211 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13212 which would have to be restored before the sibcall. */
13213 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13214 return false;
13215
13216 /* The thunks for indirect branches require r1 if no exrl is
13217 available. r1 might not be available when doing a sibling
13218 call. */
13219 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13220 && !TARGET_CPU_Z10
13221 && !decl)
13222 return false;
13223
13224 /* Register 6 on s390 is available as an argument register but unfortunately
13225 "caller saved". This makes functions needing this register for arguments
13226 not suitable for sibcalls. */
13227 return !s390_call_saved_register_used (exp);
13228 }
13229
13230 /* Return the fixed registers used for condition codes. */
13231
13232 static bool
13233 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13234 {
13235 *p1 = CC_REGNUM;
13236 *p2 = INVALID_REGNUM;
13237
13238 return true;
13239 }
13240
13241 /* This function is used by the call expanders of the machine description.
13242 It emits the call insn itself together with the necessary operations
13243 to adjust the target address and returns the emitted insn.
13244 ADDR_LOCATION is the target address rtx
13245 TLS_CALL the location of the thread-local symbol
13246 RESULT_REG the register where the result of the call should be stored
13247 RETADDR_REG the register where the return address should be stored
13248 If this parameter is NULL_RTX the call is considered
13249 to be a sibling call. */
13250
13251 rtx_insn *
13252 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13253 rtx retaddr_reg)
13254 {
13255 bool plt_call = false;
13256 rtx_insn *insn;
13257 rtx vec[4] = { NULL_RTX };
13258 int elts = 0;
13259 rtx *call = &vec[0];
13260 rtx *clobber_ret_reg = &vec[1];
13261 rtx *use = &vec[2];
13262 rtx *clobber_thunk_reg = &vec[3];
13263 int i;
13264
13265 /* Direct function calls need special treatment. */
13266 if (GET_CODE (addr_location) == SYMBOL_REF)
13267 {
13268 /* When calling a global routine in PIC mode, we must
13269 replace the symbol itself with the PLT stub. */
13270 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13271 {
13272 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13273 {
13274 addr_location = gen_rtx_UNSPEC (Pmode,
13275 gen_rtvec (1, addr_location),
13276 UNSPEC_PLT);
13277 addr_location = gen_rtx_CONST (Pmode, addr_location);
13278 plt_call = true;
13279 }
13280 else
13281 /* For -fpic code the PLT entries might use r12 which is
13282 call-saved. Therefore we cannot do a sibcall when
13283 calling directly using a symbol ref. When reaching
13284 this point we decided (in s390_function_ok_for_sibcall)
13285 to do a sibcall for a function pointer but one of the
13286 optimizers was able to get rid of the function pointer
13287 by propagating the symbol ref into the call. This
13288 optimization is illegal for S/390 so we turn the direct
13289 call into a indirect call again. */
13290 addr_location = force_reg (Pmode, addr_location);
13291 }
13292 }
13293
13294 /* If it is already an indirect call or the code above moved the
13295 SYMBOL_REF to somewhere else make sure the address can be found in
13296 register 1. */
13297 if (retaddr_reg == NULL_RTX
13298 && GET_CODE (addr_location) != SYMBOL_REF
13299 && !plt_call)
13300 {
13301 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13302 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13303 }
13304
13305 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13306 && GET_CODE (addr_location) != SYMBOL_REF
13307 && !plt_call)
13308 {
13309 /* Indirect branch thunks require the target to be a single GPR. */
13310 addr_location = force_reg (Pmode, addr_location);
13311
13312 /* Without exrl the indirect branch thunks need an additional
13313 register for larl;ex */
13314 if (!TARGET_CPU_Z10)
13315 {
13316 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13317 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13318 }
13319 }
13320
13321 addr_location = gen_rtx_MEM (QImode, addr_location);
13322 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13323
13324 if (result_reg != NULL_RTX)
13325 *call = gen_rtx_SET (result_reg, *call);
13326
13327 if (retaddr_reg != NULL_RTX)
13328 {
13329 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13330
13331 if (tls_call != NULL_RTX)
13332 *use = gen_rtx_USE (VOIDmode, tls_call);
13333 }
13334
13335
13336 for (i = 0; i < 4; i++)
13337 if (vec[i] != NULL_RTX)
13338 elts++;
13339
13340 if (elts > 1)
13341 {
13342 rtvec v;
13343 int e = 0;
13344
13345 v = rtvec_alloc (elts);
13346 for (i = 0; i < 4; i++)
13347 if (vec[i] != NULL_RTX)
13348 {
13349 RTVEC_ELT (v, e) = vec[i];
13350 e++;
13351 }
13352
13353 *call = gen_rtx_PARALLEL (VOIDmode, v);
13354 }
13355
13356 insn = emit_call_insn (*call);
13357
13358 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13359 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13360 {
13361 /* s390_function_ok_for_sibcall should
13362 have denied sibcalls in this case. */
13363 gcc_assert (retaddr_reg != NULL_RTX);
13364 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13365 }
13366 return insn;
13367 }
13368
13369 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13370
13371 static void
13372 s390_conditional_register_usage (void)
13373 {
13374 int i;
13375
13376 if (flag_pic)
13377 {
13378 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13379 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13380 }
13381 fixed_regs[BASE_REGNUM] = 0;
13382 call_used_regs[BASE_REGNUM] = 0;
13383 fixed_regs[RETURN_REGNUM] = 0;
13384 call_used_regs[RETURN_REGNUM] = 0;
13385 if (TARGET_64BIT)
13386 {
13387 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13388 call_used_regs[i] = call_really_used_regs[i] = 0;
13389 }
13390 else
13391 {
13392 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13393 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13394 }
13395
13396 if (TARGET_SOFT_FLOAT)
13397 {
13398 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13399 call_used_regs[i] = fixed_regs[i] = 1;
13400 }
13401
13402 /* Disable v16 - v31 for non-vector target. */
13403 if (!TARGET_VX)
13404 {
13405 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13406 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13407 }
13408 }
13409
13410 /* Corresponding function to eh_return expander. */
13411
13412 static GTY(()) rtx s390_tpf_eh_return_symbol;
13413 void
13414 s390_emit_tpf_eh_return (rtx target)
13415 {
13416 rtx_insn *insn;
13417 rtx reg, orig_ra;
13418
13419 if (!s390_tpf_eh_return_symbol)
13420 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13421
13422 reg = gen_rtx_REG (Pmode, 2);
13423 orig_ra = gen_rtx_REG (Pmode, 3);
13424
13425 emit_move_insn (reg, target);
13426 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13427 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13428 gen_rtx_REG (Pmode, RETURN_REGNUM));
13429 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13430 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13431
13432 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13433 }
13434
13435 /* Rework the prologue/epilogue to avoid saving/restoring
13436 registers unnecessarily. */
13437
13438 static void
13439 s390_optimize_prologue (void)
13440 {
13441 rtx_insn *insn, *new_insn, *next_insn;
13442
13443 /* Do a final recompute of the frame-related data. */
13444 s390_optimize_register_info ();
13445
13446 /* If all special registers are in fact used, there's nothing we
13447 can do, so no point in walking the insn list. */
13448
13449 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13450 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13451 return;
13452
13453 /* Search for prologue/epilogue insns and replace them. */
13454 for (insn = get_insns (); insn; insn = next_insn)
13455 {
13456 int first, last, off;
13457 rtx set, base, offset;
13458 rtx pat;
13459
13460 next_insn = NEXT_INSN (insn);
13461
13462 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13463 continue;
13464
13465 pat = PATTERN (insn);
13466
13467 /* Remove ldgr/lgdr instructions used for saving and restore
13468 GPRs if possible. */
13469 if (TARGET_Z10)
13470 {
13471 rtx tmp_pat = pat;
13472
13473 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13474 tmp_pat = XVECEXP (pat, 0, 0);
13475
13476 if (GET_CODE (tmp_pat) == SET
13477 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13478 && REG_P (SET_SRC (tmp_pat))
13479 && REG_P (SET_DEST (tmp_pat)))
13480 {
13481 int src_regno = REGNO (SET_SRC (tmp_pat));
13482 int dest_regno = REGNO (SET_DEST (tmp_pat));
13483 int gpr_regno;
13484 int fpr_regno;
13485
13486 if (!((GENERAL_REGNO_P (src_regno)
13487 && FP_REGNO_P (dest_regno))
13488 || (FP_REGNO_P (src_regno)
13489 && GENERAL_REGNO_P (dest_regno))))
13490 continue;
13491
13492 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13493 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13494
13495 /* GPR must be call-saved, FPR must be call-clobbered. */
13496 if (!call_really_used_regs[fpr_regno]
13497 || call_really_used_regs[gpr_regno])
13498 continue;
13499
13500 /* It must not happen that what we once saved in an FPR now
13501 needs a stack slot. */
13502 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13503
13504 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13505 {
13506 remove_insn (insn);
13507 continue;
13508 }
13509 }
13510 }
13511
13512 if (GET_CODE (pat) == PARALLEL
13513 && store_multiple_operation (pat, VOIDmode))
13514 {
13515 set = XVECEXP (pat, 0, 0);
13516 first = REGNO (SET_SRC (set));
13517 last = first + XVECLEN (pat, 0) - 1;
13518 offset = const0_rtx;
13519 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13520 off = INTVAL (offset);
13521
13522 if (GET_CODE (base) != REG || off < 0)
13523 continue;
13524 if (cfun_frame_layout.first_save_gpr != -1
13525 && (cfun_frame_layout.first_save_gpr < first
13526 || cfun_frame_layout.last_save_gpr > last))
13527 continue;
13528 if (REGNO (base) != STACK_POINTER_REGNUM
13529 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13530 continue;
13531 if (first > BASE_REGNUM || last < BASE_REGNUM)
13532 continue;
13533
13534 if (cfun_frame_layout.first_save_gpr != -1)
13535 {
13536 rtx s_pat = save_gprs (base,
13537 off + (cfun_frame_layout.first_save_gpr
13538 - first) * UNITS_PER_LONG,
13539 cfun_frame_layout.first_save_gpr,
13540 cfun_frame_layout.last_save_gpr);
13541 new_insn = emit_insn_before (s_pat, insn);
13542 INSN_ADDRESSES_NEW (new_insn, -1);
13543 }
13544
13545 remove_insn (insn);
13546 continue;
13547 }
13548
13549 if (cfun_frame_layout.first_save_gpr == -1
13550 && GET_CODE (pat) == SET
13551 && GENERAL_REG_P (SET_SRC (pat))
13552 && GET_CODE (SET_DEST (pat)) == MEM)
13553 {
13554 set = pat;
13555 first = REGNO (SET_SRC (set));
13556 offset = const0_rtx;
13557 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13558 off = INTVAL (offset);
13559
13560 if (GET_CODE (base) != REG || off < 0)
13561 continue;
13562 if (REGNO (base) != STACK_POINTER_REGNUM
13563 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13564 continue;
13565
13566 remove_insn (insn);
13567 continue;
13568 }
13569
13570 if (GET_CODE (pat) == PARALLEL
13571 && load_multiple_operation (pat, VOIDmode))
13572 {
13573 set = XVECEXP (pat, 0, 0);
13574 first = REGNO (SET_DEST (set));
13575 last = first + XVECLEN (pat, 0) - 1;
13576 offset = const0_rtx;
13577 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13578 off = INTVAL (offset);
13579
13580 if (GET_CODE (base) != REG || off < 0)
13581 continue;
13582
13583 if (cfun_frame_layout.first_restore_gpr != -1
13584 && (cfun_frame_layout.first_restore_gpr < first
13585 || cfun_frame_layout.last_restore_gpr > last))
13586 continue;
13587 if (REGNO (base) != STACK_POINTER_REGNUM
13588 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13589 continue;
13590 if (first > BASE_REGNUM || last < BASE_REGNUM)
13591 continue;
13592
13593 if (cfun_frame_layout.first_restore_gpr != -1)
13594 {
13595 rtx rpat = restore_gprs (base,
13596 off + (cfun_frame_layout.first_restore_gpr
13597 - first) * UNITS_PER_LONG,
13598 cfun_frame_layout.first_restore_gpr,
13599 cfun_frame_layout.last_restore_gpr);
13600
13601 /* Remove REG_CFA_RESTOREs for registers that we no
13602 longer need to save. */
13603 REG_NOTES (rpat) = REG_NOTES (insn);
13604 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13605 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13606 && ((int) REGNO (XEXP (*ptr, 0))
13607 < cfun_frame_layout.first_restore_gpr))
13608 *ptr = XEXP (*ptr, 1);
13609 else
13610 ptr = &XEXP (*ptr, 1);
13611 new_insn = emit_insn_before (rpat, insn);
13612 RTX_FRAME_RELATED_P (new_insn) = 1;
13613 INSN_ADDRESSES_NEW (new_insn, -1);
13614 }
13615
13616 remove_insn (insn);
13617 continue;
13618 }
13619
13620 if (cfun_frame_layout.first_restore_gpr == -1
13621 && GET_CODE (pat) == SET
13622 && GENERAL_REG_P (SET_DEST (pat))
13623 && GET_CODE (SET_SRC (pat)) == MEM)
13624 {
13625 set = pat;
13626 first = REGNO (SET_DEST (set));
13627 offset = const0_rtx;
13628 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13629 off = INTVAL (offset);
13630
13631 if (GET_CODE (base) != REG || off < 0)
13632 continue;
13633
13634 if (REGNO (base) != STACK_POINTER_REGNUM
13635 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13636 continue;
13637
13638 remove_insn (insn);
13639 continue;
13640 }
13641 }
13642 }
13643
13644 /* On z10 and later the dynamic branch prediction must see the
13645 backward jump within a certain windows. If not it falls back to
13646 the static prediction. This function rearranges the loop backward
13647 branch in a way which makes the static prediction always correct.
13648 The function returns true if it added an instruction. */
13649 static bool
13650 s390_fix_long_loop_prediction (rtx_insn *insn)
13651 {
13652 rtx set = single_set (insn);
13653 rtx code_label, label_ref;
13654 rtx_insn *uncond_jump;
13655 rtx_insn *cur_insn;
13656 rtx tmp;
13657 int distance;
13658
13659 /* This will exclude branch on count and branch on index patterns
13660 since these are correctly statically predicted. */
13661 if (!set
13662 || SET_DEST (set) != pc_rtx
13663 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13664 return false;
13665
13666 /* Skip conditional returns. */
13667 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13668 && XEXP (SET_SRC (set), 2) == pc_rtx)
13669 return false;
13670
13671 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13672 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13673
13674 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13675
13676 code_label = XEXP (label_ref, 0);
13677
13678 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13679 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13680 || (INSN_ADDRESSES (INSN_UID (insn))
13681 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13682 return false;
13683
13684 for (distance = 0, cur_insn = PREV_INSN (insn);
13685 distance < PREDICT_DISTANCE - 6;
13686 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13687 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13688 return false;
13689
13690 rtx_code_label *new_label = gen_label_rtx ();
13691 uncond_jump = emit_jump_insn_after (
13692 gen_rtx_SET (pc_rtx,
13693 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13694 insn);
13695 emit_label_after (new_label, uncond_jump);
13696
13697 tmp = XEXP (SET_SRC (set), 1);
13698 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13699 XEXP (SET_SRC (set), 2) = tmp;
13700 INSN_CODE (insn) = -1;
13701
13702 XEXP (label_ref, 0) = new_label;
13703 JUMP_LABEL (insn) = new_label;
13704 JUMP_LABEL (uncond_jump) = code_label;
13705
13706 return true;
13707 }
13708
13709 /* Returns 1 if INSN reads the value of REG for purposes not related
13710 to addressing of memory, and 0 otherwise. */
13711 static int
13712 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13713 {
13714 return reg_referenced_p (reg, PATTERN (insn))
13715 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13716 }
13717
13718 /* Starting from INSN find_cond_jump looks downwards in the insn
13719 stream for a single jump insn which is the last user of the
13720 condition code set in INSN. */
13721 static rtx_insn *
13722 find_cond_jump (rtx_insn *insn)
13723 {
13724 for (; insn; insn = NEXT_INSN (insn))
13725 {
13726 rtx ite, cc;
13727
13728 if (LABEL_P (insn))
13729 break;
13730
13731 if (!JUMP_P (insn))
13732 {
13733 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13734 break;
13735 continue;
13736 }
13737
13738 /* This will be triggered by a return. */
13739 if (GET_CODE (PATTERN (insn)) != SET)
13740 break;
13741
13742 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13743 ite = SET_SRC (PATTERN (insn));
13744
13745 if (GET_CODE (ite) != IF_THEN_ELSE)
13746 break;
13747
13748 cc = XEXP (XEXP (ite, 0), 0);
13749 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13750 break;
13751
13752 if (find_reg_note (insn, REG_DEAD, cc))
13753 return insn;
13754 break;
13755 }
13756
13757 return NULL;
13758 }
13759
13760 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13761 the semantics does not change. If NULL_RTX is passed as COND the
13762 function tries to find the conditional jump starting with INSN. */
13763 static void
13764 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13765 {
13766 rtx tmp = *op0;
13767
13768 if (cond == NULL_RTX)
13769 {
13770 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13771 rtx set = jump ? single_set (jump) : NULL_RTX;
13772
13773 if (set == NULL_RTX)
13774 return;
13775
13776 cond = XEXP (SET_SRC (set), 0);
13777 }
13778
13779 *op0 = *op1;
13780 *op1 = tmp;
13781 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13782 }
13783
13784 /* On z10, instructions of the compare-and-branch family have the
13785 property to access the register occurring as second operand with
13786 its bits complemented. If such a compare is grouped with a second
13787 instruction that accesses the same register non-complemented, and
13788 if that register's value is delivered via a bypass, then the
13789 pipeline recycles, thereby causing significant performance decline.
13790 This function locates such situations and exchanges the two
13791 operands of the compare. The function return true whenever it
13792 added an insn. */
13793 static bool
13794 s390_z10_optimize_cmp (rtx_insn *insn)
13795 {
13796 rtx_insn *prev_insn, *next_insn;
13797 bool insn_added_p = false;
13798 rtx cond, *op0, *op1;
13799
13800 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13801 {
13802 /* Handle compare and branch and branch on count
13803 instructions. */
13804 rtx pattern = single_set (insn);
13805
13806 if (!pattern
13807 || SET_DEST (pattern) != pc_rtx
13808 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13809 return false;
13810
13811 cond = XEXP (SET_SRC (pattern), 0);
13812 op0 = &XEXP (cond, 0);
13813 op1 = &XEXP (cond, 1);
13814 }
13815 else if (GET_CODE (PATTERN (insn)) == SET)
13816 {
13817 rtx src, dest;
13818
13819 /* Handle normal compare instructions. */
13820 src = SET_SRC (PATTERN (insn));
13821 dest = SET_DEST (PATTERN (insn));
13822
13823 if (!REG_P (dest)
13824 || !CC_REGNO_P (REGNO (dest))
13825 || GET_CODE (src) != COMPARE)
13826 return false;
13827
13828 /* s390_swap_cmp will try to find the conditional
13829 jump when passing NULL_RTX as condition. */
13830 cond = NULL_RTX;
13831 op0 = &XEXP (src, 0);
13832 op1 = &XEXP (src, 1);
13833 }
13834 else
13835 return false;
13836
13837 if (!REG_P (*op0) || !REG_P (*op1))
13838 return false;
13839
13840 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13841 return false;
13842
13843 /* Swap the COMPARE arguments and its mask if there is a
13844 conflicting access in the previous insn. */
13845 prev_insn = prev_active_insn (insn);
13846 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13847 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13848 s390_swap_cmp (cond, op0, op1, insn);
13849
13850 /* Check if there is a conflict with the next insn. If there
13851 was no conflict with the previous insn, then swap the
13852 COMPARE arguments and its mask. If we already swapped
13853 the operands, or if swapping them would cause a conflict
13854 with the previous insn, issue a NOP after the COMPARE in
13855 order to separate the two instuctions. */
13856 next_insn = next_active_insn (insn);
13857 if (next_insn != NULL_RTX && INSN_P (next_insn)
13858 && s390_non_addr_reg_read_p (*op1, next_insn))
13859 {
13860 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13861 && s390_non_addr_reg_read_p (*op0, prev_insn))
13862 {
13863 if (REGNO (*op1) == 0)
13864 emit_insn_after (gen_nop_lr1 (), insn);
13865 else
13866 emit_insn_after (gen_nop_lr0 (), insn);
13867 insn_added_p = true;
13868 }
13869 else
13870 s390_swap_cmp (cond, op0, op1, insn);
13871 }
13872 return insn_added_p;
13873 }
13874
13875 /* Number of INSNs to be scanned backward in the last BB of the loop
13876 and forward in the first BB of the loop. This usually should be a
13877 bit more than the number of INSNs which could go into one
13878 group. */
13879 #define S390_OSC_SCAN_INSN_NUM 5
13880
13881 /* Scan LOOP for static OSC collisions and return true if a osc_break
13882 should be issued for this loop. */
13883 static bool
13884 s390_adjust_loop_scan_osc (struct loop* loop)
13885
13886 {
13887 HARD_REG_SET modregs, newregs;
13888 rtx_insn *insn, *store_insn = NULL;
13889 rtx set;
13890 struct s390_address addr_store, addr_load;
13891 subrtx_iterator::array_type array;
13892 int insn_count;
13893
13894 CLEAR_HARD_REG_SET (modregs);
13895
13896 insn_count = 0;
13897 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13898 {
13899 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13900 continue;
13901
13902 insn_count++;
13903 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13904 return false;
13905
13906 find_all_hard_reg_sets (insn, &newregs, true);
13907 IOR_HARD_REG_SET (modregs, newregs);
13908
13909 set = single_set (insn);
13910 if (!set)
13911 continue;
13912
13913 if (MEM_P (SET_DEST (set))
13914 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13915 {
13916 store_insn = insn;
13917 break;
13918 }
13919 }
13920
13921 if (store_insn == NULL_RTX)
13922 return false;
13923
13924 insn_count = 0;
13925 FOR_BB_INSNS (loop->header, insn)
13926 {
13927 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13928 continue;
13929
13930 if (insn == store_insn)
13931 return false;
13932
13933 insn_count++;
13934 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13935 return false;
13936
13937 find_all_hard_reg_sets (insn, &newregs, true);
13938 IOR_HARD_REG_SET (modregs, newregs);
13939
13940 set = single_set (insn);
13941 if (!set)
13942 continue;
13943
13944 /* An intermediate store disrupts static OSC checking
13945 anyway. */
13946 if (MEM_P (SET_DEST (set))
13947 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
13948 return false;
13949
13950 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
13951 if (MEM_P (*iter)
13952 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
13953 && rtx_equal_p (addr_load.base, addr_store.base)
13954 && rtx_equal_p (addr_load.indx, addr_store.indx)
13955 && rtx_equal_p (addr_load.disp, addr_store.disp))
13956 {
13957 if ((addr_load.base != NULL_RTX
13958 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
13959 || (addr_load.indx != NULL_RTX
13960 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
13961 return true;
13962 }
13963 }
13964 return false;
13965 }
13966
13967 /* Look for adjustments which can be done on simple innermost
13968 loops. */
13969 static void
13970 s390_adjust_loops ()
13971 {
13972 struct loop *loop = NULL;
13973
13974 df_analyze ();
13975 compute_bb_for_insn ();
13976
13977 /* Find the loops. */
13978 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
13979
13980 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
13981 {
13982 if (dump_file)
13983 {
13984 flow_loop_dump (loop, dump_file, NULL, 0);
13985 fprintf (dump_file, ";; OSC loop scan Loop: ");
13986 }
13987 if (loop->latch == NULL
13988 || pc_set (BB_END (loop->latch)) == NULL_RTX
13989 || !s390_adjust_loop_scan_osc (loop))
13990 {
13991 if (dump_file)
13992 {
13993 if (loop->latch == NULL)
13994 fprintf (dump_file, " muliple backward jumps\n");
13995 else
13996 {
13997 fprintf (dump_file, " header insn: %d latch insn: %d ",
13998 INSN_UID (BB_HEAD (loop->header)),
13999 INSN_UID (BB_END (loop->latch)));
14000 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14001 fprintf (dump_file, " loop does not end with jump\n");
14002 else
14003 fprintf (dump_file, " not instrumented\n");
14004 }
14005 }
14006 }
14007 else
14008 {
14009 rtx_insn *new_insn;
14010
14011 if (dump_file)
14012 fprintf (dump_file, " adding OSC break insn: ");
14013 new_insn = emit_insn_before (gen_osc_break (),
14014 BB_END (loop->latch));
14015 INSN_ADDRESSES_NEW (new_insn, -1);
14016 }
14017 }
14018
14019 loop_optimizer_finalize ();
14020
14021 df_finish_pass (false);
14022 }
14023
14024 /* Perform machine-dependent processing. */
14025
14026 static void
14027 s390_reorg (void)
14028 {
14029 struct constant_pool *pool;
14030 rtx_insn *insn;
14031 int hw_before, hw_after;
14032
14033 if (s390_tune == PROCESSOR_2964_Z13)
14034 s390_adjust_loops ();
14035
14036 /* Make sure all splits have been performed; splits after
14037 machine_dependent_reorg might confuse insn length counts. */
14038 split_all_insns_noflow ();
14039
14040 /* Install the main literal pool and the associated base
14041 register load insns. The literal pool might be > 4096 bytes in
14042 size, so that some of its elements cannot be directly accessed.
14043
14044 To fix this, we split the single literal pool into multiple
14045 pool chunks, reloading the pool base register at various
14046 points throughout the function to ensure it always points to
14047 the pool chunk the following code expects. */
14048
14049 /* Collect the literal pool. */
14050 pool = s390_mainpool_start ();
14051 if (pool)
14052 {
14053 /* Finish up literal pool related changes. */
14054 s390_mainpool_finish (pool);
14055 }
14056 else
14057 {
14058 /* If literal pool overflowed, chunkify it. */
14059 pool = s390_chunkify_start ();
14060 s390_chunkify_finish (pool);
14061 }
14062
14063 /* Generate out-of-pool execute target insns. */
14064 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14065 {
14066 rtx label;
14067 rtx_insn *target;
14068
14069 label = s390_execute_label (insn);
14070 if (!label)
14071 continue;
14072
14073 gcc_assert (label != const0_rtx);
14074
14075 target = emit_label (XEXP (label, 0));
14076 INSN_ADDRESSES_NEW (target, -1);
14077
14078 if (JUMP_P (insn))
14079 {
14080 target = emit_jump_insn (s390_execute_target (insn));
14081 /* This is important in order to keep a table jump
14082 pointing at the jump table label. Only this makes it
14083 being recognized as table jump. */
14084 JUMP_LABEL (target) = JUMP_LABEL (insn);
14085 }
14086 else
14087 target = emit_insn (s390_execute_target (insn));
14088 INSN_ADDRESSES_NEW (target, -1);
14089 }
14090
14091 /* Try to optimize prologue and epilogue further. */
14092 s390_optimize_prologue ();
14093
14094 /* Walk over the insns and do some >=z10 specific changes. */
14095 if (s390_tune >= PROCESSOR_2097_Z10)
14096 {
14097 rtx_insn *insn;
14098 bool insn_added_p = false;
14099
14100 /* The insn lengths and addresses have to be up to date for the
14101 following manipulations. */
14102 shorten_branches (get_insns ());
14103
14104 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14105 {
14106 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14107 continue;
14108
14109 if (JUMP_P (insn))
14110 insn_added_p |= s390_fix_long_loop_prediction (insn);
14111
14112 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14113 || GET_CODE (PATTERN (insn)) == SET)
14114 && s390_tune == PROCESSOR_2097_Z10)
14115 insn_added_p |= s390_z10_optimize_cmp (insn);
14116 }
14117
14118 /* Adjust branches if we added new instructions. */
14119 if (insn_added_p)
14120 shorten_branches (get_insns ());
14121 }
14122
14123 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14124 if (hw_after > 0)
14125 {
14126 rtx_insn *insn;
14127
14128 /* Insert NOPs for hotpatching. */
14129 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14130 /* Emit NOPs
14131 1. inside the area covered by debug information to allow setting
14132 breakpoints at the NOPs,
14133 2. before any insn which results in an asm instruction,
14134 3. before in-function labels to avoid jumping to the NOPs, for
14135 example as part of a loop,
14136 4. before any barrier in case the function is completely empty
14137 (__builtin_unreachable ()) and has neither internal labels nor
14138 active insns.
14139 */
14140 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14141 break;
14142 /* Output a series of NOPs before the first active insn. */
14143 while (insn && hw_after > 0)
14144 {
14145 if (hw_after >= 3)
14146 {
14147 emit_insn_before (gen_nop_6_byte (), insn);
14148 hw_after -= 3;
14149 }
14150 else if (hw_after >= 2)
14151 {
14152 emit_insn_before (gen_nop_4_byte (), insn);
14153 hw_after -= 2;
14154 }
14155 else
14156 {
14157 emit_insn_before (gen_nop_2_byte (), insn);
14158 hw_after -= 1;
14159 }
14160 }
14161 }
14162 }
14163
14164 /* Return true if INSN is a fp load insn writing register REGNO. */
14165 static inline bool
14166 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14167 {
14168 rtx set;
14169 enum attr_type flag = s390_safe_attr_type (insn);
14170
14171 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14172 return false;
14173
14174 set = single_set (insn);
14175
14176 if (set == NULL_RTX)
14177 return false;
14178
14179 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14180 return false;
14181
14182 if (REGNO (SET_DEST (set)) != regno)
14183 return false;
14184
14185 return true;
14186 }
14187
14188 /* This value describes the distance to be avoided between an
14189 arithmetic fp instruction and an fp load writing the same register.
14190 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14191 fine but the exact value has to be avoided. Otherwise the FP
14192 pipeline will throw an exception causing a major penalty. */
14193 #define Z10_EARLYLOAD_DISTANCE 7
14194
14195 /* Rearrange the ready list in order to avoid the situation described
14196 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14197 moved to the very end of the ready list. */
14198 static void
14199 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14200 {
14201 unsigned int regno;
14202 int nready = *nready_p;
14203 rtx_insn *tmp;
14204 int i;
14205 rtx_insn *insn;
14206 rtx set;
14207 enum attr_type flag;
14208 int distance;
14209
14210 /* Skip DISTANCE - 1 active insns. */
14211 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14212 distance > 0 && insn != NULL_RTX;
14213 distance--, insn = prev_active_insn (insn))
14214 if (CALL_P (insn) || JUMP_P (insn))
14215 return;
14216
14217 if (insn == NULL_RTX)
14218 return;
14219
14220 set = single_set (insn);
14221
14222 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14223 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14224 return;
14225
14226 flag = s390_safe_attr_type (insn);
14227
14228 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14229 return;
14230
14231 regno = REGNO (SET_DEST (set));
14232 i = nready - 1;
14233
14234 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14235 i--;
14236
14237 if (!i)
14238 return;
14239
14240 tmp = ready[i];
14241 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14242 ready[0] = tmp;
14243 }
14244
14245 /* Returns TRUE if BB is entered via a fallthru edge and all other
14246 incoming edges are less than likely. */
14247 static bool
14248 s390_bb_fallthru_entry_likely (basic_block bb)
14249 {
14250 edge e, fallthru_edge;
14251 edge_iterator ei;
14252
14253 if (!bb)
14254 return false;
14255
14256 fallthru_edge = find_fallthru_edge (bb->preds);
14257 if (!fallthru_edge)
14258 return false;
14259
14260 FOR_EACH_EDGE (e, ei, bb->preds)
14261 if (e != fallthru_edge
14262 && e->probability >= profile_probability::likely ())
14263 return false;
14264
14265 return true;
14266 }
14267
14268 struct s390_sched_state
14269 {
14270 /* Number of insns in the group. */
14271 int group_state;
14272 /* Execution side of the group. */
14273 int side;
14274 /* Group can only hold two insns. */
14275 bool group_of_two;
14276 } s390_sched_state;
14277
14278 static struct s390_sched_state sched_state = {0, 1, false};
14279
14280 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14281 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14282 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14283 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14284 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14285
14286 static unsigned int
14287 s390_get_sched_attrmask (rtx_insn *insn)
14288 {
14289 unsigned int mask = 0;
14290
14291 switch (s390_tune)
14292 {
14293 case PROCESSOR_2827_ZEC12:
14294 if (get_attr_zEC12_cracked (insn))
14295 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14296 if (get_attr_zEC12_expanded (insn))
14297 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14298 if (get_attr_zEC12_endgroup (insn))
14299 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14300 if (get_attr_zEC12_groupalone (insn))
14301 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14302 break;
14303 case PROCESSOR_2964_Z13:
14304 if (get_attr_z13_cracked (insn))
14305 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14306 if (get_attr_z13_expanded (insn))
14307 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14308 if (get_attr_z13_endgroup (insn))
14309 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14310 if (get_attr_z13_groupalone (insn))
14311 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14312 if (get_attr_z13_groupoftwo (insn))
14313 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14314 break;
14315 case PROCESSOR_3906_Z14:
14316 if (get_attr_z14_cracked (insn))
14317 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14318 if (get_attr_z14_expanded (insn))
14319 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14320 if (get_attr_z14_endgroup (insn))
14321 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14322 if (get_attr_z14_groupalone (insn))
14323 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14324 if (get_attr_z14_groupoftwo (insn))
14325 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14326 break;
14327 default:
14328 gcc_unreachable ();
14329 }
14330 return mask;
14331 }
14332
14333 static unsigned int
14334 s390_get_unit_mask (rtx_insn *insn, int *units)
14335 {
14336 unsigned int mask = 0;
14337
14338 switch (s390_tune)
14339 {
14340 case PROCESSOR_2964_Z13:
14341 *units = 4;
14342 if (get_attr_z13_unit_lsu (insn))
14343 mask |= 1 << 0;
14344 if (get_attr_z13_unit_fxa (insn))
14345 mask |= 1 << 1;
14346 if (get_attr_z13_unit_fxb (insn))
14347 mask |= 1 << 2;
14348 if (get_attr_z13_unit_vfu (insn))
14349 mask |= 1 << 3;
14350 break;
14351 case PROCESSOR_3906_Z14:
14352 *units = 4;
14353 if (get_attr_z14_unit_lsu (insn))
14354 mask |= 1 << 0;
14355 if (get_attr_z14_unit_fxa (insn))
14356 mask |= 1 << 1;
14357 if (get_attr_z14_unit_fxb (insn))
14358 mask |= 1 << 2;
14359 if (get_attr_z14_unit_vfu (insn))
14360 mask |= 1 << 3;
14361 break;
14362 default:
14363 gcc_unreachable ();
14364 }
14365 return mask;
14366 }
14367
14368 static bool
14369 s390_is_fpd (rtx_insn *insn)
14370 {
14371 if (insn == NULL_RTX)
14372 return false;
14373
14374 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn);
14375 }
14376
14377 static bool
14378 s390_is_fxd (rtx_insn *insn)
14379 {
14380 if (insn == NULL_RTX)
14381 return false;
14382
14383 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn);
14384 }
14385
14386 /* Returns TRUE if INSN is a long-running instruction. */
14387 static bool
14388 s390_is_longrunning (rtx_insn *insn)
14389 {
14390 if (insn == NULL_RTX)
14391 return false;
14392
14393 return s390_is_fxd (insn) || s390_is_fpd (insn);
14394 }
14395
14396
14397 /* Return the scheduling score for INSN. The higher the score the
14398 better. The score is calculated from the OOO scheduling attributes
14399 of INSN and the scheduling state sched_state. */
14400 static int
14401 s390_sched_score (rtx_insn *insn)
14402 {
14403 unsigned int mask = s390_get_sched_attrmask (insn);
14404 int score = 0;
14405
14406 switch (sched_state.group_state)
14407 {
14408 case 0:
14409 /* Try to put insns into the first slot which would otherwise
14410 break a group. */
14411 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14412 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14413 score += 5;
14414 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14415 score += 10;
14416 break;
14417 case 1:
14418 /* Prefer not cracked insns while trying to put together a
14419 group. */
14420 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14421 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14422 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14423 score += 10;
14424 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14425 score += 5;
14426 /* If we are in a group of two already, try to schedule another
14427 group-of-two insn to avoid shortening another group. */
14428 if (sched_state.group_of_two
14429 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14430 score += 15;
14431 break;
14432 case 2:
14433 /* Prefer not cracked insns while trying to put together a
14434 group. */
14435 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14436 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14437 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14438 score += 10;
14439 /* Prefer endgroup insns in the last slot. */
14440 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14441 score += 10;
14442 /* Try to avoid group-of-two insns in the last slot as they will
14443 shorten this group as well as the next one. */
14444 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14445 score = MAX (0, score - 15);
14446 break;
14447 }
14448
14449 if (s390_tune >= PROCESSOR_2964_Z13)
14450 {
14451 int units, i;
14452 unsigned unit_mask, m = 1;
14453
14454 unit_mask = s390_get_unit_mask (insn, &units);
14455 gcc_assert (units <= MAX_SCHED_UNITS);
14456
14457 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14458 ago the last insn of this unit type got scheduled. This is
14459 supposed to help providing a proper instruction mix to the
14460 CPU. */
14461 for (i = 0; i < units; i++, m <<= 1)
14462 if (m & unit_mask)
14463 score += (last_scheduled_unit_distance[i][sched_state.side]
14464 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14465
14466 int other_side = 1 - sched_state.side;
14467
14468 /* Try to delay long-running insns when side is busy. */
14469 if (s390_is_longrunning (insn))
14470 {
14471 if (s390_is_fxd (insn))
14472 {
14473 if (fxd_longrunning[sched_state.side]
14474 && fxd_longrunning[other_side]
14475 <= fxd_longrunning[sched_state.side])
14476 score = MAX (0, score - 10);
14477
14478 else if (fxd_longrunning[other_side]
14479 >= fxd_longrunning[sched_state.side])
14480 score += 10;
14481 }
14482
14483 if (s390_is_fpd (insn))
14484 {
14485 if (fpd_longrunning[sched_state.side]
14486 && fpd_longrunning[other_side]
14487 <= fpd_longrunning[sched_state.side])
14488 score = MAX (0, score - 10);
14489
14490 else if (fpd_longrunning[other_side]
14491 >= fpd_longrunning[sched_state.side])
14492 score += 10;
14493 }
14494 }
14495 }
14496
14497 return score;
14498 }
14499
14500 /* This function is called via hook TARGET_SCHED_REORDER before
14501 issuing one insn from list READY which contains *NREADYP entries.
14502 For target z10 it reorders load instructions to avoid early load
14503 conflicts in the floating point pipeline */
14504 static int
14505 s390_sched_reorder (FILE *file, int verbose,
14506 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14507 {
14508 if (s390_tune == PROCESSOR_2097_Z10
14509 && reload_completed
14510 && *nreadyp > 1)
14511 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14512
14513 if (s390_tune >= PROCESSOR_2827_ZEC12
14514 && reload_completed
14515 && *nreadyp > 1)
14516 {
14517 int i;
14518 int last_index = *nreadyp - 1;
14519 int max_index = -1;
14520 int max_score = -1;
14521 rtx_insn *tmp;
14522
14523 /* Just move the insn with the highest score to the top (the
14524 end) of the list. A full sort is not needed since a conflict
14525 in the hazard recognition cannot happen. So the top insn in
14526 the ready list will always be taken. */
14527 for (i = last_index; i >= 0; i--)
14528 {
14529 int score;
14530
14531 if (recog_memoized (ready[i]) < 0)
14532 continue;
14533
14534 score = s390_sched_score (ready[i]);
14535 if (score > max_score)
14536 {
14537 max_score = score;
14538 max_index = i;
14539 }
14540 }
14541
14542 if (max_index != -1)
14543 {
14544 if (max_index != last_index)
14545 {
14546 tmp = ready[max_index];
14547 ready[max_index] = ready[last_index];
14548 ready[last_index] = tmp;
14549
14550 if (verbose > 5)
14551 fprintf (file,
14552 ";;\t\tBACKEND: move insn %d to the top of list\n",
14553 INSN_UID (ready[last_index]));
14554 }
14555 else if (verbose > 5)
14556 fprintf (file,
14557 ";;\t\tBACKEND: best insn %d already on top\n",
14558 INSN_UID (ready[last_index]));
14559 }
14560
14561 if (verbose > 5)
14562 {
14563 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14564 sched_state.group_state);
14565
14566 for (i = last_index; i >= 0; i--)
14567 {
14568 unsigned int sched_mask;
14569 rtx_insn *insn = ready[i];
14570
14571 if (recog_memoized (insn) < 0)
14572 continue;
14573
14574 sched_mask = s390_get_sched_attrmask (insn);
14575 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14576 INSN_UID (insn),
14577 s390_sched_score (insn));
14578 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14579 ((M) & sched_mask) ? #ATTR : "");
14580 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14581 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14582 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14583 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14584 #undef PRINT_SCHED_ATTR
14585 if (s390_tune >= PROCESSOR_2964_Z13)
14586 {
14587 unsigned int unit_mask, m = 1;
14588 int units, j;
14589
14590 unit_mask = s390_get_unit_mask (insn, &units);
14591 fprintf (file, "(units:");
14592 for (j = 0; j < units; j++, m <<= 1)
14593 if (m & unit_mask)
14594 fprintf (file, " u%d", j);
14595 fprintf (file, ")");
14596 }
14597 fprintf (file, "\n");
14598 }
14599 }
14600 }
14601
14602 return s390_issue_rate ();
14603 }
14604
14605
14606 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14607 the scheduler has issued INSN. It stores the last issued insn into
14608 last_scheduled_insn in order to make it available for
14609 s390_sched_reorder. */
14610 static int
14611 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14612 {
14613 last_scheduled_insn = insn;
14614
14615 bool ends_group = false;
14616
14617 if (s390_tune >= PROCESSOR_2827_ZEC12
14618 && reload_completed
14619 && recog_memoized (insn) >= 0)
14620 {
14621 unsigned int mask = s390_get_sched_attrmask (insn);
14622
14623 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14624 sched_state.group_of_two = true;
14625
14626 /* If this is a group-of-two insn, we actually ended the last group
14627 and this insn is the first one of the new group. */
14628 if (sched_state.group_state == 2 && sched_state.group_of_two)
14629 {
14630 sched_state.side = sched_state.side ? 0 : 1;
14631 sched_state.group_state = 0;
14632 }
14633
14634 /* Longrunning and side bookkeeping. */
14635 for (int i = 0; i < 2; i++)
14636 {
14637 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14638 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14639 }
14640
14641 unsigned latency = insn_default_latency (insn);
14642 if (s390_is_longrunning (insn))
14643 {
14644 if (s390_is_fxd (insn))
14645 fxd_longrunning[sched_state.side] = latency;
14646 else
14647 fpd_longrunning[sched_state.side] = latency;
14648 }
14649
14650 if (s390_tune >= PROCESSOR_2964_Z13)
14651 {
14652 int units, i;
14653 unsigned unit_mask, m = 1;
14654
14655 unit_mask = s390_get_unit_mask (insn, &units);
14656 gcc_assert (units <= MAX_SCHED_UNITS);
14657
14658 for (i = 0; i < units; i++, m <<= 1)
14659 if (m & unit_mask)
14660 last_scheduled_unit_distance[i][sched_state.side] = 0;
14661 else if (last_scheduled_unit_distance[i][sched_state.side]
14662 < MAX_SCHED_MIX_DISTANCE)
14663 last_scheduled_unit_distance[i][sched_state.side]++;
14664 }
14665
14666 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14667 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14668 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14669 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14670 {
14671 sched_state.group_state = 0;
14672 ends_group = true;
14673 }
14674 else
14675 {
14676 switch (sched_state.group_state)
14677 {
14678 case 0:
14679 sched_state.group_state++;
14680 break;
14681 case 1:
14682 sched_state.group_state++;
14683 if (sched_state.group_of_two)
14684 {
14685 sched_state.group_state = 0;
14686 ends_group = true;
14687 }
14688 break;
14689 case 2:
14690 sched_state.group_state++;
14691 ends_group = true;
14692 break;
14693 }
14694 }
14695
14696 if (verbose > 5)
14697 {
14698 unsigned int sched_mask;
14699
14700 sched_mask = s390_get_sched_attrmask (insn);
14701
14702 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14703 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14704 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14705 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14706 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14707 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14708 #undef PRINT_SCHED_ATTR
14709
14710 if (s390_tune >= PROCESSOR_2964_Z13)
14711 {
14712 unsigned int unit_mask, m = 1;
14713 int units, j;
14714
14715 unit_mask = s390_get_unit_mask (insn, &units);
14716 fprintf (file, "(units:");
14717 for (j = 0; j < units; j++, m <<= 1)
14718 if (m & unit_mask)
14719 fprintf (file, " %d", j);
14720 fprintf (file, ")");
14721 }
14722 fprintf (file, " sched state: %d\n", sched_state.group_state);
14723
14724 if (s390_tune >= PROCESSOR_2964_Z13)
14725 {
14726 int units, j;
14727
14728 s390_get_unit_mask (insn, &units);
14729
14730 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
14731 for (j = 0; j < units; j++)
14732 fprintf (file, "%d:%d ", j,
14733 last_scheduled_unit_distance[j][sched_state.side]);
14734 fprintf (file, "\n");
14735 }
14736 }
14737
14738 /* If this insn ended a group, the next will be on the other side. */
14739 if (ends_group)
14740 {
14741 sched_state.group_state = 0;
14742 sched_state.side = sched_state.side ? 0 : 1;
14743 sched_state.group_of_two = false;
14744 }
14745 }
14746
14747 if (GET_CODE (PATTERN (insn)) != USE
14748 && GET_CODE (PATTERN (insn)) != CLOBBER)
14749 return more - 1;
14750 else
14751 return more;
14752 }
14753
14754 static void
14755 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14756 int verbose ATTRIBUTE_UNUSED,
14757 int max_ready ATTRIBUTE_UNUSED)
14758 {
14759 /* If the next basic block is most likely entered via a fallthru edge
14760 we keep the last sched state. Otherwise we start a new group.
14761 The scheduler traverses basic blocks in "instruction stream" ordering
14762 so if we see a fallthru edge here, sched_state will be of its
14763 source block.
14764
14765 current_sched_info->prev_head is the insn before the first insn of the
14766 block of insns to be scheduled.
14767 */
14768 rtx_insn *insn = current_sched_info->prev_head
14769 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
14770 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
14771 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
14772 {
14773 last_scheduled_insn = NULL;
14774 memset (last_scheduled_unit_distance, 0,
14775 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
14776 sched_state.group_state = 0;
14777 sched_state.group_of_two = false;
14778 }
14779 }
14780
14781 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14782 a new number struct loop *loop should be unrolled if tuned for cpus with
14783 a built-in stride prefetcher.
14784 The loop is analyzed for memory accesses by calling check_dpu for
14785 each rtx of the loop. Depending on the loop_depth and the amount of
14786 memory accesses a new number <=nunroll is returned to improve the
14787 behavior of the hardware prefetch unit. */
14788 static unsigned
14789 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14790 {
14791 basic_block *bbs;
14792 rtx_insn *insn;
14793 unsigned i;
14794 unsigned mem_count = 0;
14795
14796 if (s390_tune < PROCESSOR_2097_Z10)
14797 return nunroll;
14798
14799 /* Count the number of memory references within the loop body. */
14800 bbs = get_loop_body (loop);
14801 subrtx_iterator::array_type array;
14802 for (i = 0; i < loop->num_nodes; i++)
14803 FOR_BB_INSNS (bbs[i], insn)
14804 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14805 {
14806 rtx set;
14807
14808 /* The runtime of small loops with memory block operations
14809 will be determined by the memory operation. Doing
14810 unrolling doesn't help here. Measurements to confirm
14811 this where only done on recent CPU levels. So better do
14812 not change anything for older CPUs. */
14813 if (s390_tune >= PROCESSOR_2964_Z13
14814 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
14815 && ((set = single_set (insn)) != NULL_RTX)
14816 && ((GET_MODE (SET_DEST (set)) == BLKmode
14817 && (GET_MODE (SET_SRC (set)) == BLKmode
14818 || SET_SRC (set) == const0_rtx))
14819 || (GET_CODE (SET_SRC (set)) == COMPARE
14820 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
14821 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
14822 return 1;
14823
14824 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14825 if (MEM_P (*iter))
14826 mem_count += 1;
14827 }
14828 free (bbs);
14829
14830 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14831 if (mem_count == 0)
14832 return nunroll;
14833
14834 switch (loop_depth(loop))
14835 {
14836 case 1:
14837 return MIN (nunroll, 28 / mem_count);
14838 case 2:
14839 return MIN (nunroll, 22 / mem_count);
14840 default:
14841 return MIN (nunroll, 16 / mem_count);
14842 }
14843 }
14844
14845 /* Restore the current options. This is a hook function and also called
14846 internally. */
14847
14848 static void
14849 s390_function_specific_restore (struct gcc_options *opts,
14850 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14851 {
14852 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14853 }
14854
14855 static void
14856 s390_default_align (struct gcc_options *opts)
14857 {
14858 /* Set the default function alignment to 16 in order to get rid of
14859 some unwanted performance effects. */
14860 if (opts->x_flag_align_functions && !opts->x_str_align_functions
14861 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
14862 opts->x_str_align_functions = "16";
14863 }
14864
14865 static void
14866 s390_override_options_after_change (void)
14867 {
14868 s390_default_align (&global_options);
14869 }
14870
14871 static void
14872 s390_option_override_internal (struct gcc_options *opts,
14873 const struct gcc_options *opts_set)
14874 {
14875 /* Architecture mode defaults according to ABI. */
14876 if (!(opts_set->x_target_flags & MASK_ZARCH))
14877 {
14878 if (TARGET_64BIT)
14879 opts->x_target_flags |= MASK_ZARCH;
14880 else
14881 opts->x_target_flags &= ~MASK_ZARCH;
14882 }
14883
14884 /* Set the march default in case it hasn't been specified on cmdline. */
14885 if (!opts_set->x_s390_arch)
14886 opts->x_s390_arch = PROCESSOR_2064_Z900;
14887
14888 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14889
14890 /* Determine processor to tune for. */
14891 if (!opts_set->x_s390_tune)
14892 opts->x_s390_tune = opts->x_s390_arch;
14893
14894 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14895
14896 /* Sanity checks. */
14897 if (opts->x_s390_arch == PROCESSOR_NATIVE
14898 || opts->x_s390_tune == PROCESSOR_NATIVE)
14899 gcc_unreachable ();
14900 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14901 error ("64-bit ABI not supported in ESA/390 mode");
14902
14903 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
14904 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
14905 || opts->x_s390_function_return == indirect_branch_thunk_inline
14906 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
14907 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
14908 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
14909
14910 if (opts->x_s390_indirect_branch != indirect_branch_keep)
14911 {
14912 if (!opts_set->x_s390_indirect_branch_call)
14913 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
14914
14915 if (!opts_set->x_s390_indirect_branch_jump)
14916 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
14917 }
14918
14919 if (opts->x_s390_function_return != indirect_branch_keep)
14920 {
14921 if (!opts_set->x_s390_function_return_reg)
14922 opts->x_s390_function_return_reg = opts->x_s390_function_return;
14923
14924 if (!opts_set->x_s390_function_return_mem)
14925 opts->x_s390_function_return_mem = opts->x_s390_function_return;
14926 }
14927
14928 /* Enable hardware transactions if available and not explicitly
14929 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14930 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14931 {
14932 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14933 opts->x_target_flags |= MASK_OPT_HTM;
14934 else
14935 opts->x_target_flags &= ~MASK_OPT_HTM;
14936 }
14937
14938 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14939 {
14940 if (TARGET_OPT_VX_P (opts->x_target_flags))
14941 {
14942 if (!TARGET_CPU_VX_P (opts))
14943 error ("hardware vector support not available on %s",
14944 processor_table[(int)opts->x_s390_arch].name);
14945 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14946 error ("hardware vector support not available with "
14947 "%<-msoft-float%>");
14948 }
14949 }
14950 else
14951 {
14952 if (TARGET_CPU_VX_P (opts))
14953 /* Enable vector support if available and not explicitly disabled
14954 by user. E.g. with -m31 -march=z13 -mzarch */
14955 opts->x_target_flags |= MASK_OPT_VX;
14956 else
14957 opts->x_target_flags &= ~MASK_OPT_VX;
14958 }
14959
14960 /* Use hardware DFP if available and not explicitly disabled by
14961 user. E.g. with -m31 -march=z10 -mzarch */
14962 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14963 {
14964 if (TARGET_DFP_P (opts))
14965 opts->x_target_flags |= MASK_HARD_DFP;
14966 else
14967 opts->x_target_flags &= ~MASK_HARD_DFP;
14968 }
14969
14970 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14971 {
14972 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14973 {
14974 if (!TARGET_CPU_DFP_P (opts))
14975 error ("hardware decimal floating point instructions"
14976 " not available on %s",
14977 processor_table[(int)opts->x_s390_arch].name);
14978 if (!TARGET_ZARCH_P (opts->x_target_flags))
14979 error ("hardware decimal floating point instructions"
14980 " not available in ESA/390 mode");
14981 }
14982 else
14983 opts->x_target_flags &= ~MASK_HARD_DFP;
14984 }
14985
14986 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14987 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14988 {
14989 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14990 && TARGET_HARD_DFP_P (opts->x_target_flags))
14991 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
14992 "%<-msoft-float%>");
14993
14994 opts->x_target_flags &= ~MASK_HARD_DFP;
14995 }
14996
14997 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14998 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14999 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15000 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15001 "supported in combination");
15002
15003 if (opts->x_s390_stack_size)
15004 {
15005 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15006 error ("stack size must be greater than the stack guard value");
15007 else if (opts->x_s390_stack_size > 1 << 16)
15008 error ("stack size must not be greater than 64k");
15009 }
15010 else if (opts->x_s390_stack_guard)
15011 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15012
15013 /* Our implementation of the stack probe requires the probe interval
15014 to be used as displacement in an address operand. The maximum
15015 probe interval currently is 64k. This would exceed short
15016 displacements. Trim that value down to 4k if that happens. This
15017 might result in too many probes being generated only on the
15018 oldest supported machine level z900. */
15019 if (!DISP_IN_RANGE ((1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL))))
15020 set_param_value ("stack-clash-protection-probe-interval", 12,
15021 opts->x_param_values,
15022 opts_set->x_param_values);
15023
15024 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15025 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15026 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15027 #endif
15028
15029 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15030 {
15031 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
15032 opts->x_param_values,
15033 opts_set->x_param_values);
15034 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
15035 opts->x_param_values,
15036 opts_set->x_param_values);
15037 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
15038 opts->x_param_values,
15039 opts_set->x_param_values);
15040 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
15041 opts->x_param_values,
15042 opts_set->x_param_values);
15043 }
15044
15045 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
15046 opts->x_param_values,
15047 opts_set->x_param_values);
15048 /* values for loop prefetching */
15049 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
15050 opts->x_param_values,
15051 opts_set->x_param_values);
15052 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
15053 opts->x_param_values,
15054 opts_set->x_param_values);
15055 /* s390 has more than 2 levels and the size is much larger. Since
15056 we are always running virtualized assume that we only get a small
15057 part of the caches above l1. */
15058 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
15059 opts->x_param_values,
15060 opts_set->x_param_values);
15061 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
15062 opts->x_param_values,
15063 opts_set->x_param_values);
15064 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
15065 opts->x_param_values,
15066 opts_set->x_param_values);
15067
15068 /* Use the alternative scheduling-pressure algorithm by default. */
15069 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
15070 opts->x_param_values,
15071 opts_set->x_param_values);
15072
15073 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
15074 opts->x_param_values,
15075 opts_set->x_param_values);
15076
15077 /* Use aggressive inlining parameters. */
15078 if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15079 {
15080 maybe_set_param_value (PARAM_INLINE_MIN_SPEEDUP, 2,
15081 opts->x_param_values,
15082 opts_set->x_param_values);
15083
15084 maybe_set_param_value (PARAM_MAX_INLINE_INSNS_AUTO, 80,
15085 opts->x_param_values,
15086 opts_set->x_param_values);
15087 }
15088
15089 /* Set the default alignment. */
15090 s390_default_align (opts);
15091
15092 /* Call target specific restore function to do post-init work. At the moment,
15093 this just sets opts->x_s390_cost_pointer. */
15094 s390_function_specific_restore (opts, NULL);
15095
15096 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15097 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15098 not the case when the code runs before the prolog. */
15099 if (opts->x_flag_fentry && !TARGET_64BIT)
15100 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15101 }
15102
15103 static void
15104 s390_option_override (void)
15105 {
15106 unsigned int i;
15107 cl_deferred_option *opt;
15108 vec<cl_deferred_option> *v =
15109 (vec<cl_deferred_option> *) s390_deferred_options;
15110
15111 if (v)
15112 FOR_EACH_VEC_ELT (*v, i, opt)
15113 {
15114 switch (opt->opt_index)
15115 {
15116 case OPT_mhotpatch_:
15117 {
15118 int val1;
15119 int val2;
15120 char *s = strtok (ASTRDUP (opt->arg), ",");
15121 char *t = strtok (NULL, "\0");
15122
15123 if (t != NULL)
15124 {
15125 val1 = integral_argument (s);
15126 val2 = integral_argument (t);
15127 }
15128 else
15129 {
15130 val1 = -1;
15131 val2 = -1;
15132 }
15133 if (val1 == -1 || val2 == -1)
15134 {
15135 /* argument is not a plain number */
15136 error ("arguments to %qs should be non-negative integers",
15137 "-mhotpatch=n,m");
15138 break;
15139 }
15140 else if (val1 > s390_hotpatch_hw_max
15141 || val2 > s390_hotpatch_hw_max)
15142 {
15143 error ("argument to %qs is too large (max. %d)",
15144 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15145 break;
15146 }
15147 s390_hotpatch_hw_before_label = val1;
15148 s390_hotpatch_hw_after_label = val2;
15149 break;
15150 }
15151 default:
15152 gcc_unreachable ();
15153 }
15154 }
15155
15156 /* Set up function hooks. */
15157 init_machine_status = s390_init_machine_status;
15158
15159 s390_option_override_internal (&global_options, &global_options_set);
15160
15161 /* Save the initial options in case the user does function specific
15162 options. */
15163 target_option_default_node = build_target_option_node (&global_options);
15164 target_option_current_node = target_option_default_node;
15165
15166 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15167 requires the arch flags to be evaluated already. Since prefetching
15168 is beneficial on s390, we enable it if available. */
15169 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15170 flag_prefetch_loop_arrays = 1;
15171
15172 if (!s390_pic_data_is_text_relative && !flag_pic)
15173 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15174 "%<-fpic%>/%<-fPIC%>");
15175
15176 if (TARGET_TPF)
15177 {
15178 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15179 debuggers do not yet support DWARF 3/4. */
15180 if (!global_options_set.x_dwarf_strict)
15181 dwarf_strict = 1;
15182 if (!global_options_set.x_dwarf_version)
15183 dwarf_version = 2;
15184 }
15185 }
15186
15187 #if S390_USE_TARGET_ATTRIBUTE
15188 /* Inner function to process the attribute((target(...))), take an argument and
15189 set the current options from the argument. If we have a list, recursively go
15190 over the list. */
15191
15192 static bool
15193 s390_valid_target_attribute_inner_p (tree args,
15194 struct gcc_options *opts,
15195 struct gcc_options *new_opts_set,
15196 bool force_pragma)
15197 {
15198 char *next_optstr;
15199 bool ret = true;
15200
15201 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15202 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15203 static const struct
15204 {
15205 const char *string;
15206 size_t len;
15207 int opt;
15208 int has_arg;
15209 int only_as_pragma;
15210 } attrs[] = {
15211 /* enum options */
15212 S390_ATTRIB ("arch=", OPT_march_, 1),
15213 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15214 /* uinteger options */
15215 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15216 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15217 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15218 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15219 /* flag options */
15220 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15221 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15222 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15223 S390_ATTRIB ("htm", OPT_mhtm, 0),
15224 S390_ATTRIB ("vx", OPT_mvx, 0),
15225 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15226 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15227 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15228 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15229 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15230 /* boolean options */
15231 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15232 };
15233 #undef S390_ATTRIB
15234 #undef S390_PRAGMA
15235
15236 /* If this is a list, recurse to get the options. */
15237 if (TREE_CODE (args) == TREE_LIST)
15238 {
15239 bool ret = true;
15240 int num_pragma_values;
15241 int i;
15242
15243 /* Note: attribs.c:decl_attributes prepends the values from
15244 current_target_pragma to the list of target attributes. To determine
15245 whether we're looking at a value of the attribute or the pragma we
15246 assume that the first [list_length (current_target_pragma)] values in
15247 the list are the values from the pragma. */
15248 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15249 ? list_length (current_target_pragma) : 0;
15250 for (i = 0; args; args = TREE_CHAIN (args), i++)
15251 {
15252 bool is_pragma;
15253
15254 is_pragma = (force_pragma || i < num_pragma_values);
15255 if (TREE_VALUE (args)
15256 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15257 opts, new_opts_set,
15258 is_pragma))
15259 {
15260 ret = false;
15261 }
15262 }
15263 return ret;
15264 }
15265
15266 else if (TREE_CODE (args) != STRING_CST)
15267 {
15268 error ("attribute %<target%> argument not a string");
15269 return false;
15270 }
15271
15272 /* Handle multiple arguments separated by commas. */
15273 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15274
15275 while (next_optstr && *next_optstr != '\0')
15276 {
15277 char *p = next_optstr;
15278 char *orig_p = p;
15279 char *comma = strchr (next_optstr, ',');
15280 size_t len, opt_len;
15281 int opt;
15282 bool opt_set_p;
15283 char ch;
15284 unsigned i;
15285 int mask = 0;
15286 enum cl_var_type var_type;
15287 bool found;
15288
15289 if (comma)
15290 {
15291 *comma = '\0';
15292 len = comma - next_optstr;
15293 next_optstr = comma + 1;
15294 }
15295 else
15296 {
15297 len = strlen (p);
15298 next_optstr = NULL;
15299 }
15300
15301 /* Recognize no-xxx. */
15302 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15303 {
15304 opt_set_p = false;
15305 p += 3;
15306 len -= 3;
15307 }
15308 else
15309 opt_set_p = true;
15310
15311 /* Find the option. */
15312 ch = *p;
15313 found = false;
15314 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15315 {
15316 opt_len = attrs[i].len;
15317 if (ch == attrs[i].string[0]
15318 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15319 && memcmp (p, attrs[i].string, opt_len) == 0)
15320 {
15321 opt = attrs[i].opt;
15322 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15323 continue;
15324 mask = cl_options[opt].var_value;
15325 var_type = cl_options[opt].var_type;
15326 found = true;
15327 break;
15328 }
15329 }
15330
15331 /* Process the option. */
15332 if (!found)
15333 {
15334 error ("attribute(target(\"%s\")) is unknown", orig_p);
15335 return false;
15336 }
15337 else if (attrs[i].only_as_pragma && !force_pragma)
15338 {
15339 /* Value is not allowed for the target attribute. */
15340 error ("value %qs is not supported by attribute %<target%>",
15341 attrs[i].string);
15342 return false;
15343 }
15344
15345 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15346 {
15347 if (var_type == CLVC_BIT_CLEAR)
15348 opt_set_p = !opt_set_p;
15349
15350 if (opt_set_p)
15351 opts->x_target_flags |= mask;
15352 else
15353 opts->x_target_flags &= ~mask;
15354 new_opts_set->x_target_flags |= mask;
15355 }
15356
15357 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15358 {
15359 int value;
15360
15361 if (cl_options[opt].cl_uinteger)
15362 {
15363 /* Unsigned integer argument. Code based on the function
15364 decode_cmdline_option () in opts-common.c. */
15365 value = integral_argument (p + opt_len);
15366 }
15367 else
15368 value = (opt_set_p) ? 1 : 0;
15369
15370 if (value != -1)
15371 {
15372 struct cl_decoded_option decoded;
15373
15374 /* Value range check; only implemented for numeric and boolean
15375 options at the moment. */
15376 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15377 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15378 set_option (opts, new_opts_set, opt, value,
15379 p + opt_len, DK_UNSPECIFIED, input_location,
15380 global_dc);
15381 }
15382 else
15383 {
15384 error ("attribute(target(\"%s\")) is unknown", orig_p);
15385 ret = false;
15386 }
15387 }
15388
15389 else if (cl_options[opt].var_type == CLVC_ENUM)
15390 {
15391 bool arg_ok;
15392 int value;
15393
15394 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15395 if (arg_ok)
15396 set_option (opts, new_opts_set, opt, value,
15397 p + opt_len, DK_UNSPECIFIED, input_location,
15398 global_dc);
15399 else
15400 {
15401 error ("attribute(target(\"%s\")) is unknown", orig_p);
15402 ret = false;
15403 }
15404 }
15405
15406 else
15407 gcc_unreachable ();
15408 }
15409 return ret;
15410 }
15411
15412 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15413
15414 tree
15415 s390_valid_target_attribute_tree (tree args,
15416 struct gcc_options *opts,
15417 const struct gcc_options *opts_set,
15418 bool force_pragma)
15419 {
15420 tree t = NULL_TREE;
15421 struct gcc_options new_opts_set;
15422
15423 memset (&new_opts_set, 0, sizeof (new_opts_set));
15424
15425 /* Process each of the options on the chain. */
15426 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15427 force_pragma))
15428 return error_mark_node;
15429
15430 /* If some option was set (even if it has not changed), rerun
15431 s390_option_override_internal, and then save the options away. */
15432 if (new_opts_set.x_target_flags
15433 || new_opts_set.x_s390_arch
15434 || new_opts_set.x_s390_tune
15435 || new_opts_set.x_s390_stack_guard
15436 || new_opts_set.x_s390_stack_size
15437 || new_opts_set.x_s390_branch_cost
15438 || new_opts_set.x_s390_warn_framesize
15439 || new_opts_set.x_s390_warn_dynamicstack_p)
15440 {
15441 const unsigned char *src = (const unsigned char *)opts_set;
15442 unsigned char *dest = (unsigned char *)&new_opts_set;
15443 unsigned int i;
15444
15445 /* Merge the original option flags into the new ones. */
15446 for (i = 0; i < sizeof(*opts_set); i++)
15447 dest[i] |= src[i];
15448
15449 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15450 s390_option_override_internal (opts, &new_opts_set);
15451 /* Save the current options unless we are validating options for
15452 #pragma. */
15453 t = build_target_option_node (opts);
15454 }
15455 return t;
15456 }
15457
15458 /* Hook to validate attribute((target("string"))). */
15459
15460 static bool
15461 s390_valid_target_attribute_p (tree fndecl,
15462 tree ARG_UNUSED (name),
15463 tree args,
15464 int ARG_UNUSED (flags))
15465 {
15466 struct gcc_options func_options;
15467 tree new_target, new_optimize;
15468 bool ret = true;
15469
15470 /* attribute((target("default"))) does nothing, beyond
15471 affecting multi-versioning. */
15472 if (TREE_VALUE (args)
15473 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15474 && TREE_CHAIN (args) == NULL_TREE
15475 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15476 return true;
15477
15478 tree old_optimize = build_optimization_node (&global_options);
15479
15480 /* Get the optimization options of the current function. */
15481 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15482
15483 if (!func_optimize)
15484 func_optimize = old_optimize;
15485
15486 /* Init func_options. */
15487 memset (&func_options, 0, sizeof (func_options));
15488 init_options_struct (&func_options, NULL);
15489 lang_hooks.init_options_struct (&func_options);
15490
15491 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15492
15493 /* Initialize func_options to the default before its target options can
15494 be set. */
15495 cl_target_option_restore (&func_options,
15496 TREE_TARGET_OPTION (target_option_default_node));
15497
15498 new_target = s390_valid_target_attribute_tree (args, &func_options,
15499 &global_options_set,
15500 (args ==
15501 current_target_pragma));
15502 new_optimize = build_optimization_node (&func_options);
15503 if (new_target == error_mark_node)
15504 ret = false;
15505 else if (fndecl && new_target)
15506 {
15507 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15508 if (old_optimize != new_optimize)
15509 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15510 }
15511 return ret;
15512 }
15513
15514 /* Hook to determine if one function can safely inline another. */
15515
15516 static bool
15517 s390_can_inline_p (tree caller, tree callee)
15518 {
15519 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15520 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15521
15522 if (!callee_tree)
15523 callee_tree = target_option_default_node;
15524 if (!caller_tree)
15525 caller_tree = target_option_default_node;
15526 if (callee_tree == caller_tree)
15527 return true;
15528
15529 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15530 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15531 bool ret = true;
15532
15533 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15534 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15535 ret = false;
15536
15537 /* Don't inline functions to be compiled for a more recent arch into a
15538 function for an older arch. */
15539 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15540 ret = false;
15541
15542 /* Inlining a hard float function into a soft float function is only
15543 allowed if the hard float function doesn't actually make use of
15544 floating point.
15545
15546 We are called from FEs for multi-versioning call optimization, so
15547 beware of ipa_fn_summaries not available. */
15548 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15549 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15550 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15551 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15552 && (! ipa_fn_summaries
15553 || ipa_fn_summaries->get
15554 (cgraph_node::get (callee))->fp_expressions))
15555 ret = false;
15556
15557 return ret;
15558 }
15559 #endif
15560
15561 /* Set VAL to correct enum value according to the indirect-branch or
15562 function-return attribute in ATTR. */
15563
15564 static inline void
15565 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15566 {
15567 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15568 if (strcmp (str, "keep") == 0)
15569 *val = indirect_branch_keep;
15570 else if (strcmp (str, "thunk") == 0)
15571 *val = indirect_branch_thunk;
15572 else if (strcmp (str, "thunk-inline") == 0)
15573 *val = indirect_branch_thunk_inline;
15574 else if (strcmp (str, "thunk-extern") == 0)
15575 *val = indirect_branch_thunk_extern;
15576 }
15577
15578 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15579 from either the cmdline or the function attributes in
15580 cfun->machine. */
15581
15582 static void
15583 s390_indirect_branch_settings (tree fndecl)
15584 {
15585 tree attr;
15586
15587 if (!fndecl)
15588 return;
15589
15590 /* Initialize with the cmdline options and let the attributes
15591 override it. */
15592 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15593 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15594
15595 cfun->machine->function_return_reg = s390_function_return_reg;
15596 cfun->machine->function_return_mem = s390_function_return_mem;
15597
15598 if ((attr = lookup_attribute ("indirect_branch",
15599 DECL_ATTRIBUTES (fndecl))))
15600 {
15601 s390_indirect_branch_attrvalue (attr,
15602 &cfun->machine->indirect_branch_jump);
15603 s390_indirect_branch_attrvalue (attr,
15604 &cfun->machine->indirect_branch_call);
15605 }
15606
15607 if ((attr = lookup_attribute ("indirect_branch_jump",
15608 DECL_ATTRIBUTES (fndecl))))
15609 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15610
15611 if ((attr = lookup_attribute ("indirect_branch_call",
15612 DECL_ATTRIBUTES (fndecl))))
15613 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15614
15615 if ((attr = lookup_attribute ("function_return",
15616 DECL_ATTRIBUTES (fndecl))))
15617 {
15618 s390_indirect_branch_attrvalue (attr,
15619 &cfun->machine->function_return_reg);
15620 s390_indirect_branch_attrvalue (attr,
15621 &cfun->machine->function_return_mem);
15622 }
15623
15624 if ((attr = lookup_attribute ("function_return_reg",
15625 DECL_ATTRIBUTES (fndecl))))
15626 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15627
15628 if ((attr = lookup_attribute ("function_return_mem",
15629 DECL_ATTRIBUTES (fndecl))))
15630 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15631 }
15632
15633 #if S390_USE_TARGET_ATTRIBUTE
15634 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15635 cache. */
15636
15637 void
15638 s390_activate_target_options (tree new_tree)
15639 {
15640 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15641 if (TREE_TARGET_GLOBALS (new_tree))
15642 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15643 else if (new_tree == target_option_default_node)
15644 restore_target_globals (&default_target_globals);
15645 else
15646 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15647 s390_previous_fndecl = NULL_TREE;
15648 }
15649 #endif
15650
15651 /* Establish appropriate back-end context for processing the function
15652 FNDECL. The argument might be NULL to indicate processing at top
15653 level, outside of any function scope. */
15654 static void
15655 s390_set_current_function (tree fndecl)
15656 {
15657 #if S390_USE_TARGET_ATTRIBUTE
15658 /* Only change the context if the function changes. This hook is called
15659 several times in the course of compiling a function, and we don't want to
15660 slow things down too much or call target_reinit when it isn't safe. */
15661 if (fndecl == s390_previous_fndecl)
15662 {
15663 s390_indirect_branch_settings (fndecl);
15664 return;
15665 }
15666
15667 tree old_tree;
15668 if (s390_previous_fndecl == NULL_TREE)
15669 old_tree = target_option_current_node;
15670 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15671 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15672 else
15673 old_tree = target_option_default_node;
15674
15675 if (fndecl == NULL_TREE)
15676 {
15677 if (old_tree != target_option_current_node)
15678 s390_activate_target_options (target_option_current_node);
15679 return;
15680 }
15681
15682 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15683 if (new_tree == NULL_TREE)
15684 new_tree = target_option_default_node;
15685
15686 if (old_tree != new_tree)
15687 s390_activate_target_options (new_tree);
15688 s390_previous_fndecl = fndecl;
15689 #endif
15690 s390_indirect_branch_settings (fndecl);
15691 }
15692
15693 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15694
15695 static bool
15696 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15697 unsigned int align ATTRIBUTE_UNUSED,
15698 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15699 bool speed_p ATTRIBUTE_UNUSED)
15700 {
15701 return (size == 1 || size == 2
15702 || size == 4 || (TARGET_ZARCH && size == 8));
15703 }
15704
15705 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15706
15707 static void
15708 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15709 {
15710 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15711 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15712 tree call_efpc = build_call_expr (efpc, 0);
15713 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15714
15715 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15716 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15717 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15718 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15719 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15720 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15721
15722 /* Generates the equivalent of feholdexcept (&fenv_var)
15723
15724 fenv_var = __builtin_s390_efpc ();
15725 __builtin_s390_sfpc (fenv_var & mask) */
15726 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15727 tree new_fpc =
15728 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15729 build_int_cst (unsigned_type_node,
15730 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15731 FPC_EXCEPTION_MASK)));
15732 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15733 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15734
15735 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15736
15737 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15738 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15739 build_int_cst (unsigned_type_node,
15740 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15741 *clear = build_call_expr (sfpc, 1, new_fpc);
15742
15743 /* Generates the equivalent of feupdateenv (fenv_var)
15744
15745 old_fpc = __builtin_s390_efpc ();
15746 __builtin_s390_sfpc (fenv_var);
15747 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15748
15749 old_fpc = create_tmp_var_raw (unsigned_type_node);
15750 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15751 old_fpc, call_efpc);
15752
15753 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15754
15755 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15756 build_int_cst (unsigned_type_node,
15757 FPC_FLAGS_MASK));
15758 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15759 build_int_cst (unsigned_type_node,
15760 FPC_FLAGS_SHIFT));
15761 tree atomic_feraiseexcept
15762 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15763 raise_old_except = build_call_expr (atomic_feraiseexcept,
15764 1, raise_old_except);
15765
15766 *update = build2 (COMPOUND_EXPR, void_type_node,
15767 build2 (COMPOUND_EXPR, void_type_node,
15768 store_old_fpc, set_new_fpc),
15769 raise_old_except);
15770
15771 #undef FPC_EXCEPTION_MASK
15772 #undef FPC_FLAGS_MASK
15773 #undef FPC_DXC_MASK
15774 #undef FPC_EXCEPTION_MASK_SHIFT
15775 #undef FPC_FLAGS_SHIFT
15776 #undef FPC_DXC_SHIFT
15777 }
15778
15779 /* Return the vector mode to be used for inner mode MODE when doing
15780 vectorization. */
15781 static machine_mode
15782 s390_preferred_simd_mode (scalar_mode mode)
15783 {
15784 if (TARGET_VXE)
15785 switch (mode)
15786 {
15787 case E_SFmode:
15788 return V4SFmode;
15789 default:;
15790 }
15791
15792 if (TARGET_VX)
15793 switch (mode)
15794 {
15795 case E_DFmode:
15796 return V2DFmode;
15797 case E_DImode:
15798 return V2DImode;
15799 case E_SImode:
15800 return V4SImode;
15801 case E_HImode:
15802 return V8HImode;
15803 case E_QImode:
15804 return V16QImode;
15805 default:;
15806 }
15807 return word_mode;
15808 }
15809
15810 /* Our hardware does not require vectors to be strictly aligned. */
15811 static bool
15812 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15813 const_tree type ATTRIBUTE_UNUSED,
15814 int misalignment ATTRIBUTE_UNUSED,
15815 bool is_packed ATTRIBUTE_UNUSED)
15816 {
15817 if (TARGET_VX)
15818 return true;
15819
15820 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15821 is_packed);
15822 }
15823
15824 /* The vector ABI requires vector types to be aligned on an 8 byte
15825 boundary (our stack alignment). However, we allow this to be
15826 overriden by the user, while this definitely breaks the ABI. */
15827 static HOST_WIDE_INT
15828 s390_vector_alignment (const_tree type)
15829 {
15830 if (!TARGET_VX_ABI)
15831 return default_vector_alignment (type);
15832
15833 if (TYPE_USER_ALIGN (type))
15834 return TYPE_ALIGN (type);
15835
15836 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15837 }
15838
15839 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
15840 LARL instruction. */
15841
15842 static HOST_WIDE_INT
15843 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
15844 {
15845 return MAX (align, 16);
15846 }
15847
15848 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15849 /* Implement TARGET_ASM_FILE_START. */
15850 static void
15851 s390_asm_file_start (void)
15852 {
15853 default_file_start ();
15854 s390_asm_output_machine_for_arch (asm_out_file);
15855 }
15856 #endif
15857
15858 /* Implement TARGET_ASM_FILE_END. */
15859 static void
15860 s390_asm_file_end (void)
15861 {
15862 #ifdef HAVE_AS_GNU_ATTRIBUTE
15863 varpool_node *vnode;
15864 cgraph_node *cnode;
15865
15866 FOR_EACH_VARIABLE (vnode)
15867 if (TREE_PUBLIC (vnode->decl))
15868 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15869
15870 FOR_EACH_FUNCTION (cnode)
15871 if (TREE_PUBLIC (cnode->decl))
15872 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15873
15874
15875 if (s390_vector_abi != 0)
15876 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15877 s390_vector_abi);
15878 #endif
15879 file_end_indicate_exec_stack ();
15880
15881 if (flag_split_stack)
15882 file_end_indicate_split_stack ();
15883 }
15884
15885 /* Return true if TYPE is a vector bool type. */
15886 static inline bool
15887 s390_vector_bool_type_p (const_tree type)
15888 {
15889 return TYPE_VECTOR_OPAQUE (type);
15890 }
15891
15892 /* Return the diagnostic message string if the binary operation OP is
15893 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15894 static const char*
15895 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15896 {
15897 bool bool1_p, bool2_p;
15898 bool plusminus_p;
15899 bool muldiv_p;
15900 bool compare_p;
15901 machine_mode mode1, mode2;
15902
15903 if (!TARGET_ZVECTOR)
15904 return NULL;
15905
15906 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15907 return NULL;
15908
15909 bool1_p = s390_vector_bool_type_p (type1);
15910 bool2_p = s390_vector_bool_type_p (type2);
15911
15912 /* Mixing signed and unsigned types is forbidden for all
15913 operators. */
15914 if (!bool1_p && !bool2_p
15915 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15916 return N_("types differ in signedness");
15917
15918 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15919 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15920 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15921 || op == ROUND_DIV_EXPR);
15922 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15923 || op == EQ_EXPR || op == NE_EXPR);
15924
15925 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15926 return N_("binary operator does not support two vector bool operands");
15927
15928 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15929 return N_("binary operator does not support vector bool operand");
15930
15931 mode1 = TYPE_MODE (type1);
15932 mode2 = TYPE_MODE (type2);
15933
15934 if (bool1_p != bool2_p && plusminus_p
15935 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15936 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15937 return N_("binary operator does not support mixing vector "
15938 "bool with floating point vector operands");
15939
15940 return NULL;
15941 }
15942
15943 /* Implement TARGET_C_EXCESS_PRECISION.
15944
15945 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15946 double on s390, causing operations on float_t to operate in a higher
15947 precision than is necessary. However, it is not the case that SFmode
15948 operations have implicit excess precision, and we generate more optimal
15949 code if we let the compiler know no implicit extra precision is added.
15950
15951 That means when we are compiling with -fexcess-precision=fast, the value
15952 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15953 float_t (though they would be correct for -fexcess-precision=standard).
15954
15955 A complete fix would modify glibc to remove the unnecessary typedef
15956 of float_t to double. */
15957
15958 static enum flt_eval_method
15959 s390_excess_precision (enum excess_precision_type type)
15960 {
15961 switch (type)
15962 {
15963 case EXCESS_PRECISION_TYPE_IMPLICIT:
15964 case EXCESS_PRECISION_TYPE_FAST:
15965 /* The fastest type to promote to will always be the native type,
15966 whether that occurs with implicit excess precision or
15967 otherwise. */
15968 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15969 case EXCESS_PRECISION_TYPE_STANDARD:
15970 /* Otherwise, when we are in a standards compliant mode, to
15971 ensure consistency with the implementation in glibc, report that
15972 float is evaluated to the range and precision of double. */
15973 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15974 default:
15975 gcc_unreachable ();
15976 }
15977 return FLT_EVAL_METHOD_UNPREDICTABLE;
15978 }
15979
15980 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
15981
15982 static unsigned HOST_WIDE_INT
15983 s390_asan_shadow_offset (void)
15984 {
15985 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
15986 }
15987
15988 #ifdef HAVE_GAS_HIDDEN
15989 # define USE_HIDDEN_LINKONCE 1
15990 #else
15991 # define USE_HIDDEN_LINKONCE 0
15992 #endif
15993
15994 /* Output an indirect branch trampoline for target register REGNO. */
15995
15996 static void
15997 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
15998 {
15999 tree decl;
16000 char thunk_label[32];
16001 int i;
16002
16003 if (z10_p)
16004 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16005 else
16006 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16007 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16008
16009 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16010 get_identifier (thunk_label),
16011 build_function_type_list (void_type_node, NULL_TREE));
16012 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16013 NULL_TREE, void_type_node);
16014 TREE_PUBLIC (decl) = 1;
16015 TREE_STATIC (decl) = 1;
16016 DECL_IGNORED_P (decl) = 1;
16017
16018 if (USE_HIDDEN_LINKONCE)
16019 {
16020 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16021
16022 targetm.asm_out.unique_section (decl, 0);
16023 switch_to_section (get_named_section (decl, NULL, 0));
16024
16025 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16026 fputs ("\t.hidden\t", asm_out_file);
16027 assemble_name (asm_out_file, thunk_label);
16028 putc ('\n', asm_out_file);
16029 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16030 }
16031 else
16032 {
16033 switch_to_section (text_section);
16034 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16035 }
16036
16037 DECL_INITIAL (decl) = make_node (BLOCK);
16038 current_function_decl = decl;
16039 allocate_struct_function (decl, false);
16040 init_function_start (decl);
16041 cfun->is_thunk = true;
16042 first_function_block_is_cold = false;
16043 final_start_function (emit_barrier (), asm_out_file, 1);
16044
16045 /* This makes CFI at least usable for indirect jumps.
16046
16047 Stopping in the thunk: backtrace will point to the thunk target
16048 is if it was interrupted by a signal. For a call this means that
16049 the call chain will be: caller->callee->thunk */
16050 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16051 {
16052 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16053 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16054 for (i = 0; i < FPR15_REGNUM; i++)
16055 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16056 }
16057
16058 if (z10_p)
16059 {
16060 /* exrl 0,1f */
16061
16062 /* We generate a thunk for z10 compiled code although z10 is
16063 currently not enabled. Tell the assembler to accept the
16064 instruction. */
16065 if (!TARGET_CPU_Z10)
16066 {
16067 fputs ("\t.machine push\n", asm_out_file);
16068 fputs ("\t.machine z10\n", asm_out_file);
16069 }
16070 /* We use exrl even if -mzarch hasn't been specified on the
16071 command line so we have to tell the assembler to accept
16072 it. */
16073 if (!TARGET_ZARCH)
16074 fputs ("\t.machinemode zarch\n", asm_out_file);
16075
16076 fputs ("\texrl\t0,1f\n", asm_out_file);
16077
16078 if (!TARGET_ZARCH)
16079 fputs ("\t.machinemode esa\n", asm_out_file);
16080
16081 if (!TARGET_CPU_Z10)
16082 fputs ("\t.machine pop\n", asm_out_file);
16083 }
16084 else
16085 {
16086 /* larl %r1,1f */
16087 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16088 INDIRECT_BRANCH_THUNK_REGNUM);
16089
16090 /* ex 0,0(%r1) */
16091 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16092 INDIRECT_BRANCH_THUNK_REGNUM);
16093 }
16094
16095 /* 0: j 0b */
16096 fputs ("0:\tj\t0b\n", asm_out_file);
16097
16098 /* 1: br <regno> */
16099 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16100
16101 final_end_function ();
16102 init_insn_lengths ();
16103 free_after_compilation (cfun);
16104 set_cfun (NULL);
16105 current_function_decl = NULL;
16106 }
16107
16108 /* Implement the asm.code_end target hook. */
16109
16110 static void
16111 s390_code_end (void)
16112 {
16113 int i;
16114
16115 for (i = 1; i < 16; i++)
16116 {
16117 if (indirect_branch_z10thunk_mask & (1 << i))
16118 s390_output_indirect_thunk_function (i, true);
16119
16120 if (indirect_branch_prez10thunk_mask & (1 << i))
16121 s390_output_indirect_thunk_function (i, false);
16122 }
16123
16124 if (TARGET_INDIRECT_BRANCH_TABLE)
16125 {
16126 int o;
16127 int i;
16128
16129 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16130 {
16131 if (indirect_branch_table_label_no[o] == 0)
16132 continue;
16133
16134 switch_to_section (get_section (indirect_branch_table_name[o],
16135 0,
16136 NULL_TREE));
16137 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16138 {
16139 char label_start[32];
16140
16141 ASM_GENERATE_INTERNAL_LABEL (label_start,
16142 indirect_branch_table_label[o], i);
16143
16144 fputs ("\t.long\t", asm_out_file);
16145 assemble_name_raw (asm_out_file, label_start);
16146 fputs ("-.\n", asm_out_file);
16147 }
16148 switch_to_section (current_function_section ());
16149 }
16150 }
16151 }
16152
16153 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16154
16155 unsigned int
16156 s390_case_values_threshold (void)
16157 {
16158 /* Disabling branch prediction for indirect jumps makes jump tables
16159 much more expensive. */
16160 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16161 return 20;
16162
16163 return default_case_values_threshold ();
16164 }
16165
16166 /* Initialize GCC target structure. */
16167
16168 #undef TARGET_ASM_ALIGNED_HI_OP
16169 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16170 #undef TARGET_ASM_ALIGNED_DI_OP
16171 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16172 #undef TARGET_ASM_INTEGER
16173 #define TARGET_ASM_INTEGER s390_assemble_integer
16174
16175 #undef TARGET_ASM_OPEN_PAREN
16176 #define TARGET_ASM_OPEN_PAREN ""
16177
16178 #undef TARGET_ASM_CLOSE_PAREN
16179 #define TARGET_ASM_CLOSE_PAREN ""
16180
16181 #undef TARGET_OPTION_OVERRIDE
16182 #define TARGET_OPTION_OVERRIDE s390_option_override
16183
16184 #ifdef TARGET_THREAD_SSP_OFFSET
16185 #undef TARGET_STACK_PROTECT_GUARD
16186 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16187 #endif
16188
16189 #undef TARGET_ENCODE_SECTION_INFO
16190 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16191
16192 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16193 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16194
16195 #ifdef HAVE_AS_TLS
16196 #undef TARGET_HAVE_TLS
16197 #define TARGET_HAVE_TLS true
16198 #endif
16199 #undef TARGET_CANNOT_FORCE_CONST_MEM
16200 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16201
16202 #undef TARGET_DELEGITIMIZE_ADDRESS
16203 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16204
16205 #undef TARGET_LEGITIMIZE_ADDRESS
16206 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16207
16208 #undef TARGET_RETURN_IN_MEMORY
16209 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16210
16211 #undef TARGET_INIT_BUILTINS
16212 #define TARGET_INIT_BUILTINS s390_init_builtins
16213 #undef TARGET_EXPAND_BUILTIN
16214 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16215 #undef TARGET_BUILTIN_DECL
16216 #define TARGET_BUILTIN_DECL s390_builtin_decl
16217
16218 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16219 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16220
16221 #undef TARGET_ASM_OUTPUT_MI_THUNK
16222 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16223 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16224 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16225
16226 #undef TARGET_C_EXCESS_PRECISION
16227 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16228
16229 #undef TARGET_SCHED_ADJUST_PRIORITY
16230 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16231 #undef TARGET_SCHED_ISSUE_RATE
16232 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16233 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16234 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16235
16236 #undef TARGET_SCHED_VARIABLE_ISSUE
16237 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16238 #undef TARGET_SCHED_REORDER
16239 #define TARGET_SCHED_REORDER s390_sched_reorder
16240 #undef TARGET_SCHED_INIT
16241 #define TARGET_SCHED_INIT s390_sched_init
16242
16243 #undef TARGET_CANNOT_COPY_INSN_P
16244 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16245 #undef TARGET_RTX_COSTS
16246 #define TARGET_RTX_COSTS s390_rtx_costs
16247 #undef TARGET_ADDRESS_COST
16248 #define TARGET_ADDRESS_COST s390_address_cost
16249 #undef TARGET_REGISTER_MOVE_COST
16250 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16251 #undef TARGET_MEMORY_MOVE_COST
16252 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16253 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16254 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16255 s390_builtin_vectorization_cost
16256
16257 #undef TARGET_MACHINE_DEPENDENT_REORG
16258 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16259
16260 #undef TARGET_VALID_POINTER_MODE
16261 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16262
16263 #undef TARGET_BUILD_BUILTIN_VA_LIST
16264 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16265 #undef TARGET_EXPAND_BUILTIN_VA_START
16266 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16267 #undef TARGET_ASAN_SHADOW_OFFSET
16268 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16269 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16270 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16271
16272 #undef TARGET_PROMOTE_FUNCTION_MODE
16273 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16274 #undef TARGET_PASS_BY_REFERENCE
16275 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16276
16277 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16278 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16279
16280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16281 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16282 #undef TARGET_FUNCTION_ARG
16283 #define TARGET_FUNCTION_ARG s390_function_arg
16284 #undef TARGET_FUNCTION_ARG_ADVANCE
16285 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16286 #undef TARGET_FUNCTION_ARG_PADDING
16287 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16288 #undef TARGET_FUNCTION_VALUE
16289 #define TARGET_FUNCTION_VALUE s390_function_value
16290 #undef TARGET_LIBCALL_VALUE
16291 #define TARGET_LIBCALL_VALUE s390_libcall_value
16292 #undef TARGET_STRICT_ARGUMENT_NAMING
16293 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16294
16295 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16296 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16297
16298 #undef TARGET_FIXED_CONDITION_CODE_REGS
16299 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16300
16301 #undef TARGET_CC_MODES_COMPATIBLE
16302 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16303
16304 #undef TARGET_INVALID_WITHIN_DOLOOP
16305 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16306
16307 #ifdef HAVE_AS_TLS
16308 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16309 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16310 #endif
16311
16312 #undef TARGET_DWARF_FRAME_REG_MODE
16313 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16314
16315 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16316 #undef TARGET_MANGLE_TYPE
16317 #define TARGET_MANGLE_TYPE s390_mangle_type
16318 #endif
16319
16320 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16321 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16322
16323 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16324 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16325
16326 #undef TARGET_PREFERRED_RELOAD_CLASS
16327 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16328
16329 #undef TARGET_SECONDARY_RELOAD
16330 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16331 #undef TARGET_SECONDARY_MEMORY_NEEDED
16332 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16333 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16334 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16335
16336 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16337 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16338
16339 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16340 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16341
16342 #undef TARGET_LEGITIMATE_ADDRESS_P
16343 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16344
16345 #undef TARGET_LEGITIMATE_CONSTANT_P
16346 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16347
16348 #undef TARGET_LRA_P
16349 #define TARGET_LRA_P s390_lra_p
16350
16351 #undef TARGET_CAN_ELIMINATE
16352 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16353
16354 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16355 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16356
16357 #undef TARGET_LOOP_UNROLL_ADJUST
16358 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16359
16360 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16361 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16362 #undef TARGET_TRAMPOLINE_INIT
16363 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16364
16365 /* PR 79421 */
16366 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16367 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16368
16369 #undef TARGET_UNWIND_WORD_MODE
16370 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16371
16372 #undef TARGET_CANONICALIZE_COMPARISON
16373 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16374
16375 #undef TARGET_HARD_REGNO_SCRATCH_OK
16376 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16377
16378 #undef TARGET_HARD_REGNO_NREGS
16379 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16380 #undef TARGET_HARD_REGNO_MODE_OK
16381 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16382 #undef TARGET_MODES_TIEABLE_P
16383 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16384
16385 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16386 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16387 s390_hard_regno_call_part_clobbered
16388
16389 #undef TARGET_ATTRIBUTE_TABLE
16390 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16391
16392 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16393 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16394
16395 #undef TARGET_SET_UP_BY_PROLOGUE
16396 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16397
16398 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16399 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16400
16401 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16402 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16403 s390_use_by_pieces_infrastructure_p
16404
16405 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16406 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16407
16408 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16409 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16410
16411 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16412 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16413
16414 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16415 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16416
16417 #undef TARGET_VECTOR_ALIGNMENT
16418 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16419
16420 #undef TARGET_INVALID_BINARY_OP
16421 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16422
16423 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16424 #undef TARGET_ASM_FILE_START
16425 #define TARGET_ASM_FILE_START s390_asm_file_start
16426 #endif
16427
16428 #undef TARGET_ASM_FILE_END
16429 #define TARGET_ASM_FILE_END s390_asm_file_end
16430
16431 #undef TARGET_SET_CURRENT_FUNCTION
16432 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16433
16434 #if S390_USE_TARGET_ATTRIBUTE
16435 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16436 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16437
16438 #undef TARGET_CAN_INLINE_P
16439 #define TARGET_CAN_INLINE_P s390_can_inline_p
16440 #endif
16441
16442 #undef TARGET_OPTION_RESTORE
16443 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16444
16445 #undef TARGET_CAN_CHANGE_MODE_CLASS
16446 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16447
16448 #undef TARGET_CONSTANT_ALIGNMENT
16449 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16450
16451 #undef TARGET_ASM_CODE_END
16452 #define TARGET_ASM_CODE_END s390_code_end
16453
16454 #undef TARGET_CASE_VALUES_THRESHOLD
16455 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16456
16457 /* Use only short displacement, since long displacement is not available for
16458 the floating point instructions. */
16459 #undef TARGET_MAX_ANCHOR_OFFSET
16460 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16461
16462 struct gcc_target targetm = TARGET_INITIALIZER;
16463
16464 #include "gt-s390.h"